diff --git a/.gitignore b/.gitignore index 998ad136e81..0a0a9f2c3fe 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,9 @@ GRTAGS GPATH GSYMS +# python compiled sources +*.pyc + # Make dependencies .depend.mk @@ -57,8 +60,11 @@ GSYMS *.manifest # /src/ +/src/.short_version +/src/base/version.h /src/doc/table/ /src/doc/tools.dox +/src/doc/*.html /src/htdocs/ /src/html.tar.gz /src/kaldi.mk @@ -82,6 +88,14 @@ GSYMS /tools/openfst-1.3.4/ /tools/openfst-1.4.1.tar.gz /tools/openfst-1.4.1/ +/tools/openfst-1.5.4.tar.gz +/tools/openfst-1.5.4/ +/tools/openfst-1.6.0.tar.gz +/tools/openfst-1.6.0/ +/tools/openfst-1.6.1.tar.gz +/tools/openfst-1.6.1/ +/tools/openfst-1.6.2.tar.gz +/tools/openfst-1.6.2/ /tools/pa_stable_v19_20111121.tgz /tools/portaudio/ /tools/sctk-2.4.0-20091110-0958.tar.bz2 @@ -108,6 +122,14 @@ GSYMS /tools/pthreads /tools/pthreads*.zip /tools/sequitur +/tools/sequitur-g2p /tools/srilm.tgz +/tools/liblbfgs-1.10.tar.gz +/tools/liblbfgs-1.10/ +/tools/openfst-1.5.0.tar.gz +/tools/openfst-1.5.0/ +/tools/srilm-1.7.2-beta.tar.gz +/tools/liblbfgs/ +/tools/sequitur-g2p/ /kaldiwin_vs* diff --git a/.travis.yml b/.travis.yml index 85bbc7a52e4..9f94726c07b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,16 +10,19 @@ addons: apt: sources: - ubuntu-toolchain-r-test + - llvm-toolchain-precise-3.8 packages: - gdb - gcc-4.9 - g++-4.9 - gfortran-4.9 - liblapack-dev + - clang-3.8 branches: only: - master + - kaldi_52 before_install: - cat /proc/sys/kernel/core_pattern @@ -27,7 +30,7 @@ before_install: - tools/extras/travis_install_bindeps.sh $XROOT script: - - CXX=g++-4.9 + - CXX=clang++-3.8 CFLAGS="-march=native" LDFLAGS="-llapack" INCDIRS="$XROOT/usr/include" diff --git a/README.md b/README.md index 32d4945a909..73abe9f1e3f 100644 --- a/README.md +++ b/README.md @@ -40,25 +40,30 @@ Development pattern for contributors ------------------------------------ 1. [Create a personal fork](https://help.github.com/articles/fork-a-repo/) - of the [main Kaldi repository] (https://github.com/kaldi-asr/kaldi) in GitHub. + of the [main Kaldi repository](https://github.com/kaldi-asr/kaldi) in GitHub. 2. Make your changes in a named branch different from `master`, e.g. you create a branch `my-awesome-feature`. 3. [Generate a pull request](https://help.github.com/articles/creating-a-pull-request/) through the Web interface of GitHub. -4. As a general rule, please follow [Google C++ Style Guide] - (https://google.github.io/styleguide/cppguide.html). +4. As a general rule, please follow [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). There are a [few exceptions in Kaldi](http://kaldi-asr.org/doc/style.html). - You can use the [Google's cpplint.py] - (https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py) + You can use the [Google's cpplint.py](https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py) to verify that your code is free of basic mistakes. Platform specific notes ----------------------- -PowerPC 64bits little-endian (ppc64le): +### PowerPC 64bits little-endian (ppc64le) + - Kaldi is expected to work out of the box in RHEL >= 7 and Ubuntu >= 16.04 with OpenBLAS, ATLAS, or CUDA. -- CUDA drivers for ppc64le can be found at [https://developer.nvidia.com/cuda-downloads] - (https://developer.nvidia.com/cuda-downloads). -- An [IBM Redbook] (https://www.redbooks.ibm.com/abstracts/redp5169.html) is +- CUDA drivers for ppc64le can be found at [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). +- An [IBM Redbook](https://www.redbooks.ibm.com/abstracts/redp5169.html) is available as a guide to install and configure CUDA. + +### Android + +- Kaldi supports cross compiling for Android using Android NDK, clang++ and + OpenBLAS. +- See [this blog post](http://jcsilva.github.io/2017/03/18/compile-kaldi-android/) + for details. diff --git a/egs/ami/s5/local/ami_ihm_data_prep.sh b/egs/ami/s5/local/ami_ihm_data_prep.sh index 3a1d43d1ea1..b3ec1723713 100755 --- a/egs/ami/s5/local/ami_ihm_data_prep.sh +++ b/egs/ami/s5/local/ami_ihm_data_prep.sh @@ -69,7 +69,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh index c3b9914d7a0..b69732a61eb 100755 --- a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh @@ -68,7 +68,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5/local/ami_mdm_data_prep.sh b/egs/ami/s5/local/ami_mdm_data_prep.sh index bc7e4180b4a..2cc973cb2d5 100755 --- a/egs/ami/s5/local/ami_mdm_data_prep.sh +++ b/egs/ami/s5/local/ami_mdm_data_prep.sh @@ -75,7 +75,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp #prep reco2file_and_channel cat $dir/wav.scp | \ diff --git a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh index ab0fd185f70..8d9e24a9838 100755 --- a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh @@ -67,7 +67,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5/local/ami_sdm_data_prep.sh b/egs/ami/s5/local/ami_sdm_data_prep.sh index 8eda00f1d15..e662759a610 100755 --- a/egs/ami/s5/local/ami_sdm_data_prep.sh +++ b/egs/ami/s5/local/ami_sdm_data_prep.sh @@ -74,7 +74,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select a single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # this file reco2file_and_channel maps recording-id cat $dir/wav.scp | \ diff --git a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh index 01173d2e3a6..3fa7c938479 100755 --- a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh @@ -72,7 +72,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh index 24176d69a34..a6c2d02b7af 100755 --- a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh +++ b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh @@ -8,7 +8,7 @@ # This example script demonstrates how speed perturbation of the data helps the nnet training in the SWB setup. . ./cmd.sh -set -e +set -e stage=1 train_stage=-10 use_gpu=true @@ -27,13 +27,13 @@ fix_nnet=false if $use_gpu; then if ! cuda-compiled; then - cat < $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh index 3ae42afb3d8..746c42c4c1a 100755 --- a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh @@ -74,7 +74,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5b/local/ami_mdm_data_prep.sh b/egs/ami/s5b/local/ami_mdm_data_prep.sh index 0ab11c5893b..d100347a356 100755 --- a/egs/ami/s5b/local/ami_mdm_data_prep.sh +++ b/egs/ami/s5b/local/ami_mdm_data_prep.sh @@ -79,7 +79,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp #prep reco2file_and_channel cat $dir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh index 4fbfe12ccad..65f514f223c 100755 --- a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh @@ -72,7 +72,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_normalize_transcripts.pl b/egs/ami/s5b/local/ami_normalize_transcripts.pl new file mode 100644 index 00000000000..772e8b50fec --- /dev/null +++ b/egs/ami/s5b/local/ami_normalize_transcripts.pl @@ -0,0 +1,129 @@ +#!/usr/bin/env perl + +# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski) +# 2016 Vimal Manohar + +# The script - based on punctuation times - splits segments longer than #words (input parameter) +# and produces bit more more normalised form of transcripts, as follows +# MeetID Channel Spkr stime etime transcripts + +#use List::MoreUtils 'indexes'; +use strict; +use warnings; + +sub normalise_transcripts; + +sub merge_hashes { + my ($h1, $h2) = @_; + my %hash1 = %$h1; my %hash2 = %$h2; + foreach my $key2 ( keys %hash2 ) { + if( exists $hash1{$key2} ) { + warn "Key [$key2] is in both hashes!"; + next; + } else { + $hash1{$key2} = $hash2{$key2}; + } + } + return %hash1; +} + +sub print_hash { + my ($h) = @_; + my %hash = %$h; + foreach my $k (sort keys %hash) { + print "$k : $hash{$k}\n"; + } +} + +sub get_name { + #no warnings; + my $sname = sprintf("%07d_%07d", $_[0]*100, $_[1]*100) || die 'Input undefined!'; + #use warnings; + return $sname; +} + +sub split_on_comma { + + my ($text, $comma_times, $btime, $etime, $max_words_per_seg)= @_; + my %comma_hash = %$comma_times; + + print "Btime, Etime : $btime, $etime\n"; + + my $stime = ($etime+$btime)/2; #split time + my $skey = ""; + my $otime = $btime; + foreach my $k (sort {$comma_hash{$a} cmp $comma_hash{$b} } keys %comma_hash) { + print "Key : $k : $comma_hash{$k}\n"; + my $ktime = $comma_hash{$k}; + if ($ktime==$btime) { next; } + if ($ktime==$etime) { last; } + if (abs($stime-$ktime)/20) { + $st=$comma_hash{$skey}; + $et = $etime; + } + my (@utts) = split (' ', $utts1[$i]); + if ($#utts < $max_words_per_seg) { + my $nm = get_name($st, $et); + print "SplittedOnComma[$i]: $nm : $utts1[$i]\n"; + $transcripts{$nm} = $utts1[$i]; + } else { + print 'Continue splitting!'; + my %transcripts2 = split_on_comma($utts1[$i], \%comma_hash, $st, $et, $max_words_per_seg); + %transcripts = merge_hashes(\%transcripts, \%transcripts2); + } + } + return %transcripts; +} + +sub normalise_transcripts { + my $text = $_; + + #DO SOME ROUGH AND OBVIOUS PRELIMINARY NORMALISATION, AS FOLLOWS + #remove the remaining punctation labels e.g. some text ,0 some text ,1 + $text =~ s/[\.\,\?\!\:][0-9]+//g; + #there are some extra spurious puncations without spaces, e.g. UM,I, replace with space + $text =~ s/[A-Z']+,[A-Z']+/ /g; + #split words combination, ie. ANTI-TRUST to ANTI TRUST (None of them appears in cmudict anyway) + #$text =~ s/(.*)([A-Z])\s+(\-)(.*)/$1$2$3$4/g; + $text =~ s/\-/ /g; + #substitute X_M_L with X. M. L. etc. + $text =~ s/\_/. /g; + #normalise and trim spaces + $text =~ s/^\s*//g; + $text =~ s/\s*$//g; + $text =~ s/\s+/ /g; + #some transcripts are empty with -, nullify (and ignore) them + $text =~ s/^\-$//g; + $text =~ s/\s+\-$//; + # apply few exception for dashed phrases, Mm-Hmm, Uh-Huh, etc. those are frequent in AMI + # and will be added to dictionary + $text =~ s/MM HMM/MM\-HMM/g; + $text =~ s/UH HUH/UH\-HUH/g; + + return $text; +} + +while(<>) { + chomp; + print normalise_transcripts($_) . "\n"; +} + diff --git a/egs/ami/s5b/local/ami_sdm_data_prep.sh b/egs/ami/s5b/local/ami_sdm_data_prep.sh index 267aef75535..327595070a6 100755 --- a/egs/ami/s5b/local/ami_sdm_data_prep.sh +++ b/egs/ami/s5b/local/ami_sdm_data_prep.sh @@ -86,7 +86,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select a single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # this file reco2file_and_channel maps recording-id cat $dir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh index d0609e552cd..1378f8b8965 100755 --- a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh @@ -82,7 +82,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh b/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh index 617336236ed..0f6abaf94c1 100755 --- a/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh +++ b/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh @@ -1,9 +1,9 @@ #!/bin/bash # This is a chain-training script with TDNN neural networks. -# This script is based on local/chain/run_tdnn.sh, but adding +# This script is based on local/chain/tuning/run_tdnn_1a.sh, but adding # the reverberated IHM data into the train set. -# This script obtains better results on both IHM and SDM tasks. +# This script obtains better results on IHM, SDM and MDM tasks. # Please see RESULTS_* for examples of command lines invoking this script. @@ -20,7 +20,7 @@ stage=1 mic=ihm nj=30 min_seg_len=1.55 -use_ihm_ali=true +use_ihm_ali=false train_set=train_cleaned gmm=tri3_cleaned # the gmm for the target data ihm_gmm=tri3_cleaned # the gmm for the IHM system (if --use-ihm-ali true). diff --git a/egs/ami/s5b/local/chain/multi_condition/run_tdnn_lstm.sh b/egs/ami/s5b/local/chain/multi_condition/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/ami/s5b/local/chain/multi_condition/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..2869049843f --- /dev/null +++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,334 @@ +#!/bin/bash + +# This is a chain-training script with TDNN+LSTM neural networks. +# This script is based on local/chain/tuning/run_tdnn_lstm_1i.sh, but adding +# the reverberated IHM data into the train set. +# This script obtains better results on IHM, SDM and MDM tasks. + +# Please see RESULTS_* for examples of command lines invoking this script. + +# local/chain/multi_condition/run_tdnn_lstm.sh --mic ihm --train-set train_cleaned --gmm tri3_cleaned & +# local/chain/multi_condition/run_tdnn_lstm.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned & +# local/chain/multi_condition/run_tdnn_lstm.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned & + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3_cleaned # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +num_data_reps=1 + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1i #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! $use_ihm_ali; then + [ "$mic" != "ihm" ] && \ + echo "$0: you cannot specify --use-ihm-ali false if the microphone is not ihm." && \ + exit 1; +else + [ "$mic" == "ihm" ] && \ + echo "$0: you must specify --use-ihm-ali false if the microphone is ihm." && \ + exit 1; +fi + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $original_lat_dir + rm $original_lat_dir/fsts.*.gz # save space + + lat_dir_ihmdata=exp/ihm/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats + + mkdir -p $lat_dir/temp/ + mkdir -p $lat_dir/temp2/ + lattice-copy "ark:gunzip -c $original_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp + lattice-copy "ark:gunzip -c $lat_dir_ihmdata/lat.*.gz |" ark,scp:$lat_dir/temp2/lats.ark,$lat_dir/temp2/lats.scp + + # copy the lattices for the reverberated data + rm -f $lat_dir/temp/combined_lats.scp + touch $lat_dir/temp/combined_lats.scp + cat $lat_dir/temp/lats.scp >> $lat_dir/temp/combined_lats.scp + for i in `seq 1 $num_data_reps`; do + cat $lat_dir/temp2/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp + done + sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp + + lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1; + echo "1" > $lat_dir/num_jobs + + # copy other files from original lattice dir + for f in cmvn_opts final.mdl splice_opts tree; do + cp $original_lat_dir/$f $lat_dir/$f + done +fi + + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}${rvb_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/run_decode.sh b/egs/ami/s5b/local/chain/run_decode.sh new file mode 100755 index 00000000000..545bdc7b157 --- /dev/null +++ b/egs/ami/s5b/local/chain/run_decode.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +set -e +set -o pipefail +set -u + +stage=-1 +decode_stage=1 + +mic=ihm +use_ihm_ali=false +exp_name=tdnn + +nj=20 + +cleanup_affix= +graph_dir= + +decode_set=dev +decode_suffix= + +extractor= +use_ivectors=true +use_offline_ivectors=false +frames_per_chunk=50 + +scoring_opts= + +. path.sh +. cmd.sh + +. parse_options.sh + +new_mic=$mic +if [ $use_ihm_ali == "true" ]; then + new_mic=${mic}_cleanali +fi + +dir=exp/$new_mic/chain${cleanup_affix:+_$cleanup_affix}/${exp_name} + +if [ $stage -le -1 ]; then + mfccdir=mfcc_${mic} + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc.conf \ + --cmd "$train_cmd" data/$mic/${decode_set} exp/make_${mic}/$decode_set $mfccdir || exit 1; + + steps/compute_cmvn_stats.sh data/$mic/${decode_set} exp/make_${mic}/$mic/$decode_set $mfccdir || exit 1; + + utils/fix_data_dir.sh data/$mic/${decode_set} +fi + +if [ $stage -le 0 ]; then + mfccdir=mfcc_${mic}_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + utils/copy_data_dir.sh data/$mic/$decode_set data/$mic/${decode_set}_hires + + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/$mic/${decode_set}_hires exp/make_${mic}_hires/$decode_set $mfccdir || exit 1; + + steps/compute_cmvn_stats.sh data/$mic/${decode_set}_hires exp/make_${mic}_hires/$mic/$decode_set $mfccdir || exit 1; + + utils/fix_data_dir.sh data/$mic/${decode_set}_hires +fi + +if $use_ivectors && [ $stage -le 1 ]; then + if [ -z "$extractor" ]; then + echo "--extractor must be supplied when using ivectors" + exit 1 + fi + + if $use_offline_ivectors; then + steps/online/nnet2/extract_ivectors.sh \ + --cmd "$train_cmd" --nj 8 \ + data/$mic/${decode_set}_hires data/lang $extractor \ + exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_offline_${decode_set} || exit 1 + else + steps/online/nnet2/extract_ivectors_online.sh \ + --cmd "$train_cmd" --nj 8 \ + data/$mic/${decode_set}_hires $extractor \ + exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set} || exit 1 + fi +fi + +final_lm=`cat data/local/lm/final_lm` +LM=$final_lm.pr1-7 + +if [ -z "$graph_dir" ]; then + graph_dir=$dir/graph_${LM} + if [ $stage -le 2 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + fi +fi + +nj=`cat data/$mic/${decode_set}/utt2spk|cut -d' ' -f2|sort -u|wc -l` + +if [ $nj -gt 50 ]; then + nj=50 +fi + +if [ "$frames_per_chunk" -ne 50 ]; then + decode_suffix=${decode_suffix}_cs${frames_per_chunk} +fi + +if [ $stage -le 3 ]; then + ivector_opts= + if $use_ivectors; then + if $use_offline_ivectors; then + ivector_opts="--online-ivector-dir exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_offline_${decode_set}" + decode_suffix=${decode_suffix}_offline + else + ivector_opts="--online-ivector-dir exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set}" + fi + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --stage $decode_stage --frames-per-chunk $frames_per_chunk \ + --nj $nj --cmd "$decode_cmd" $ivector_opts \ + --scoring-opts "--min-lmwt 5 --decode-mbr false $scoring_opts" \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode${decode_suffix}_${decode_set} || exit 1; +fi diff --git a/egs/ami/s5b/local/chain/run_decode_two_stage.sh b/egs/ami/s5b/local/chain/run_decode_two_stage.sh new file mode 100755 index 00000000000..0d354bfa574 --- /dev/null +++ b/egs/ami/s5b/local/chain/run_decode_two_stage.sh @@ -0,0 +1,135 @@ +#!/bin/bash + +set -e -u +set -o pipefail + +stage=-1 +decode_stage=1 + +mic=ihm +use_ihm_ali=false +exp_name=tdnn + +cleanup_affix= + +decode_set=dev +extractor= +use_ivectors=true +scoring_opts= +lmwt=8 +pad_frames=10 + +. path.sh +. cmd.sh + +. parse_options.sh + +new_mic=$mic +if [ $use_ihm_ali == "true" ]; then + new_mic=${mic}_cleanali +fi + +dir=exp/$new_mic/chain${cleanup_affix:+_$cleanup_affix}/${exp_name} + +nj=20 + +if [ $stage -le -1 ]; then + mfccdir=mfcc_${mic} + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc.conf \ + --cmd "$train_cmd" data/$mic/${decode_set} exp/make_${mic}/$decode_set $mfccdir || exit 1; + + steps/compute_cmvn_stats.sh data/$mic/${decode_set} exp/make_${mic}/$mic/$decode_set $mfccdir || exit 1; + + utils/fix_data_dir.sh data/$mic/${decode_set} +fi + +utils/data/get_utt2dur.sh data/$mic/${decode_set} + +if [ $stage -le 0 ]; then + mfccdir=mfcc_${mic}_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + utils/copy_data_dir.sh data/$mic/$decode_set data/$mic/${decode_set}_hires + + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/$mic/${decode_set}_hires exp/make_${mic}_hires/$decode_set $mfccdir || exit 1; + + steps/compute_cmvn_stats.sh data/$mic/${decode_set}_hires exp/make_${mic}_hires/$mic/$decode_set $mfccdir || exit 1; + + utils/fix_data_dir.sh data/$mic/${decode_set}_hires +fi + +if $use_ivectors && [ $stage -le 1 ]; then + if [ -z "$extractor" ]; then + "--extractor must be supplied when using ivectors" + fi + + steps/online/nnet2/extract_ivectors_online.sh \ + --cmd "$train_cmd" --nj 8 \ + data/$mic/${decode_set}_hires $extractor \ + exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set} || exit 1 +fi + +final_lm=`cat data/local/lm/final_lm` +LM=$final_lm.pr1-7 +graph_dir=$dir/graph_${LM} +if [ $stage -le 2 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +nj=`cat data/$mic/${decode_set}/utt2spk|cut -d' ' -f2|sort -u|wc -l` + +if [ $nj -gt 50 ]; then + nj=50 +fi + +if [ $stage -le 3 ]; then + ivector_opts= + if $use_ivectors; then + ivector_opts="--online-ivector-dir exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set}" + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --stage $decode_stage \ + --nj $nj --cmd "$decode_cmd" $ivector_opts \ + --scoring-opts "--min-lmwt 5 $scoring_opts" \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; +fi + +ivector_weights=$dir/decode_${decode_set}/ascore_$lmwt/ivector_weights.gz + +if [ $stage -le 4 ]; then + cat $dir/decode_${decode_set}/ascore_$lmwt/${decode_set}_hires.utt.ctm | \ + grep -i -v -E '\[noise|laughter|vocalized-noise\]' | \ + local/get_ivector_weights_from_ctm_conf.pl \ + --pad-frames $pad_frames data/$mic/${decode_set}/utt2dur | \ + gzip -c > $ivector_weights +fi + +if [ $stage -le 5 ]; then + steps/online/nnet2/extract_ivectors_online.sh \ + --cmd "$train_cmd" --nj $nj --weights $ivector_weights \ + data/$mic/${decode_set}_hires $extractor \ + exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set}_stage2 || exit 1 +fi + +if [ $stage -le 6 ]; then + ivector_opts= + if $use_ivectors; then + ivector_opts="--online-ivector-dir exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set}_stage2" + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --stage $decode_stage \ + --nj $nj --cmd "$decode_cmd" $ivector_opts \ + --scoring-opts "--min-lmwt 5 $scoring_opts" \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set}_stage2 || exit 1; +fi + diff --git a/egs/ami/s5b/local/chain/run_tdnn.sh b/egs/ami/s5b/local/chain/run_tdnn.sh index 61f8f499182..e1adaa9346d 120000 --- a/egs/ami/s5b/local/chain/run_tdnn.sh +++ b/egs/ami/s5b/local/chain/run_tdnn.sh @@ -1 +1 @@ -tuning/run_tdnn_1b.sh \ No newline at end of file +tuning/run_tdnn_1d.sh \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/run_tdnn_lstm.sh b/egs/ami/s5b/local/chain/run_tdnn_lstm.sh new file mode 120000 index 00000000000..23906f31954 --- /dev/null +++ b/egs/ami/s5b/local/chain/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1j.sh \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/run_tdnn_noivec.sh b/egs/ami/s5b/local/chain/run_tdnn_noivec.sh new file mode 100755 index 00000000000..d1329dc2bd1 --- /dev/null +++ b/egs/ami/s5b/local/chain/run_tdnn_noivec.sh @@ -0,0 +1,245 @@ +#!/bin/bash + +# This is a chain-training script with TDNN neural networks. +# Please see RESULTS_* for examples of command lines invoking this script. + + +# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali true --mic sdm1 # rerunning with biphone +# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali false --mic sdm1 + +# local/chain/run_tdnn.sh --use-ihm-ali true --mic sdm1 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & + +# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --stage 12 & +# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & + +# local/chain/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned& + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 15 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale-nonlinearity 0.00001 \ + --feat-dir data/$mic/${train_set}_sp_hires_comb \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.cmvn-opts "--norm-means=true --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in dev eval; do + ( + nj_dev=`cat data/$mic/${decode_set}_hires/spk2utt | wc -l` + if [ $nj_dev -gt 30 ]; then + nj_dev=30 + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj_dev --cmd "$decode_cmd" \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 + diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh index b3a645c0c11..88c09c2cb15 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -184,9 +184,7 @@ if [ $stage -le 16 ]; then /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage fi - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ @@ -226,8 +224,12 @@ if [ $stage -le 18 ]; then rm $dir/.error 2>/dev/null || true for decode_set in dev eval; do ( + nj_dev=`cat data/$mic/${decode_set}_hires/spk2utt | wc -l` + if [ $nj_dev -gt $nj ]; then + nj_dev=$nj + fi steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $nj --cmd "$decode_cmd" \ + --nj $nj_dev --cmd "$decode_cmd" \ --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ --scoring-opts "--min-lmwt 5 " \ $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh index 0644d624606..98dc95e59a2 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh @@ -212,9 +212,7 @@ if [ $stage -le 16 ]; then /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage fi - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh index 0a49575ebb0..f87e1a12d36 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh @@ -199,9 +199,7 @@ if [ $stage -le 16 ]; then /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage fi - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh new file mode 100755 index 00000000000..eb84a1cd876 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh @@ -0,0 +1,267 @@ +#!/bin/bash + +# same as 1b but uses PCA instead of +# LDA features for the ivector extractor. + +# Results on 03/27/2017: +# local/chain/compare_wer_general.sh ihm tdnn1b_sp_bi tdnn1d_sp_bi +# System tdnn1b_sp_bi tdnn1d_sp_bi +# WER on dev 22.0 21.9 +# WER on eval 22.2 22.3 +# Final train prob -0.0813472 -0.0807054 +# Final valid prob -0.132032 -0.133564 +# Final train prob (xent) -1.41543 -1.41951 +# Final valid prob (xent) -1.62316 -1.63021 + +set -e -o pipefail +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +ivector_transform_type=pca +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh index 3e3976ac7a8..92636b4c17e 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -26,6 +26,7 @@ gmm=tri3_cleaned # the gmm for the target data ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). num_threads_ubm=32 nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +num_epochs=4 chunk_width=150 chunk_left_context=40 @@ -242,7 +243,7 @@ if [ $stage -le 16 ]; then --egs.chunk-right-context $chunk_right_context \ --trainer.num-chunk-per-minibatch 64 \ --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs 4 \ + --trainer.num-epochs $num_epochs \ --trainer.optimization.shrink-value 0.99 \ --trainer.optimization.num-jobs-initial 2 \ --trainer.optimization.num-jobs-final 12 \ diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh new file mode 100755 index 00000000000..a96230075b6 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -0,0 +1,307 @@ +#!/bin/bash + +# 1j is same as 1i but with changes related to fast-lstmp layer +# changed num-chunk-per-minibatch to be variable +# added extra_left_context_initial=0 +# and extra_right_context_final=0 +# These changes are similar to those between swbd's run_tdnn_lstm_1{c,d}.sh +# recipes + +# Results with flags : --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1i_sp_bi_ihmali_ld5 tdnn_lstm1j_sp_bi_ihmali_ld5 +#WER on dev 37.6 37.3 +#WER on eval 40.9 40.4 +#Final train prob -0.114135 -0.118532 +#Final valid prob -0.245208 -0.245593 +#Final train prob (xent) -1.47648 -1.48337 +#Final valid prob (xent) -2.16365 -2.11097 + +# steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3770 combine=-0.142->-0.131 xent:train/valid[57,86,final]=(-1.78,-1.48,-1.48/-2.22,-2.17,-2.16) logprob:train/valid[57,86,final]=(-0.157,-0.117,-0.114/-0.243,-0.249,-0.245) +# exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3770 combine=-0.139->-0.130 xent:train/valid[57,86,final]=(-1.82,-1.50,-1.48/-2.18,-2.12,-2.11) logprob:train/valid[57,86,final]=(-0.165,-0.121,-0.119/-0.240,-0.247,-0.246) + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +num_epochs=4 + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1j #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh new file mode 100755 index 00000000000..b8d947d8e92 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -0,0 +1,302 @@ +#!/bin/bash + +# 1k is same as 1j but with smaller delay on the first lstm layer +# there is a 37% increase in training time 11hrs vs 8hrs and the gains are modest + +# Results with flags : --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned +#System tdnn_lstm1j_sp_bi_ihmali_ld5 tdnn_lstm1k_sp_bi_ihmali_ld5 +#WER on dev 37.3 36.9 +#WER on eval 40.4 40.0 +#Final train prob -0.118532 -0.119421 +#Final valid prob -0.245593 -0.24915 +#Final train prob (xent) -1.48337 -1.48024 +#Final valid prob (xent) -2.11097 -2.1196 + +#steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1k_sp_bi_ihmali_ld5 +# exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3770 combine=-0.139->-0.130 xent:train/valid[57,86,final]=(-1.82,-1.50,-1.48/-2.18,-2.12,-2.11) logprob:train/valid[57,86,final]=(-0.165,-0.121,-0.119/-0.240,-0.247,-0.246) +# exp/sdm1/chain_cleaned/tdnn_lstm1k_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3770 combine=-0.140->-0.130 xent:train/valid[57,86,final]=(-1.81,-1.49,-1.48/-2.19,-2.13,-2.12) logprob:train/valid[57,86,final]=(-0.163,-0.121,-0.119/-0.242,-0.249,-0.249) + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1k #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-1 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh new file mode 100644 index 00000000000..74c0f5a6ead --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -0,0 +1,344 @@ +#!/bin/bash + +# This (1l.sh) is the same as 1i but with per-frame dropout on LSTM layer +# It is a regular (non-fast) LSTM with per-frame dropout on [i, f, o] gates of the LSTM, +# the dropout-adding place is "place4" in paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf. +# We have tried both 4-epoch and 5-epoch training. + +### IHM +# Results with flags : --mic ihm --train-set train_cleaned --gmm tri3_cleaned\ +#System tdnn_lstm1i_sp_bi_ld5 tdnn_lstm1l_sp_bi_ld5 +#WER on dev 20.6 19.8 +#WER on eval 20.1 19.2 +#Final train prob -0.044763 -0.0666221 +#Final valid prob -0.0981107 -0.097616 +#Final train prob (xent) -0.722765 -0.915559 +#Final valid prob (xent) -1.03985 -1.09907 + +# ./steps/info/chain_dir_info.pl exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/ exp/ihm/chain_cleaned/tdnn_lstm1l_sp_bi_ld5/ +# exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/: num-iters=89 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.064->-0.059 xent:train/valid[58,88,final]=(-0.940,-0.739,-0.723/-1.14,-1.04,-1.04) logprob:train/valid[58,88,final]=(-0.067,-0.046,-0.045/-0.103,-0.099,-0.098) +# exp/ihm/chain_cleaned/tdnn_lstm1l_sp_bi_ld5/: num-iters=89 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.094->-0.082 xent:train/valid[58,88,final]=(-3.10,-1.11,-0.916/-3.17,-1.29,-1.10) logprob:train/valid[58,88,final]=(-0.164,-0.073,-0.067/-0.182,-0.104,-0.098) + +# Results with flags for (1l.sh) : --num-epochs 5 --tlstm-affix 1i_5epoch --mic ihm --train-set train_cleaned --gmm tri3_cleaned\ +# Results with flags for (1i.sh) : --num-epochs 5 --tlstm-affix 1l_5epoch --mic ihm --train-set train_cleaned --gmm tri3_cleaned\ +#System tdnn_lstm1i_5epoch_sp_bi_ld5 tdnn_lstm1l_5epoch_sp_bi_ld5 +#WER on dev 20.8 19.7 +#WER on eval 20.6 19.3 +#Final train prob -0.0347795-0.0600903 +#Final valid prob -0.102486-0.0964607 +#Final train prob (xent) -0.621007 -0.84667 +#Final valid prob (xent) -1.02634 -1.04725 + +# ./steps/info/chain_dir_info.pl exp/ihm/chain_cleaned/tdnn_lstm1i_5epoch_sp_bi_ld5/ exp/ihm/chain_cleaned/tdnn_lstm1l_5epoch_sp_bi_ld5/ +# exp/ihm/chain_cleaned/tdnn_lstm1i_5epoch_sp_bi_ld5/: num-iters=111 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.053->-0.049 xent:train/valid[73,110,final]=(-0.832,-0.631,-0.621/-1.09,-1.03,-1.03) logprob:train/valid[73,110,final]=(-0.057,-0.037,-0.035/-0.102,-0.103,-0.102) +# exp/ihm/chain_cleaned/tdnn_lstm1l_5epoch_sp_bi_ld5/: num-iters=111 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.085->-0.074 xent:train/valid[73,110,final]=(-3.14,-1.02,-0.847/-3.20,-1.21,-1.05) logprob:train/valid[73,110,final]=(-0.162,-0.065,-0.060/-0.177,-0.101,-0.096) + +### SDM +# Results with flags : --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1i_sp_bi_ihmali_ld5 tdnn_lstm1l_sp_bi_ihmali_ld5 +#WER on dev 37.0 35.9 +#WER on eval 40.0 39.4 +#Final train prob -0.106971 -0.15439 +#Final valid prob -0.252201 -0.244499 +#Final train prob (xent) -1.41142 -1.73795 +#Final valid prob (xent) -2.13741 -2.14519 + +# ./steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1l_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.138->-0.128 xent:train/valid[57,86,final]=(-1.78,-1.42,-1.41/-2.23,-2.14,-2.14) logprob:train/valid[57,86,final]=(-0.155,-0.108,-0.107/-0.251,-0.254,-0.252) +# exp/sdm1/chain_cleaned/tdnn_lstm1l_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.192->-0.174 xent:train/valid[57,86,final]=(-3.74,-1.95,-1.74/-3.86,-2.31,-2.15) logprob:train/valid[57,86,final]=(-0.287,-0.165,-0.154/-0.335,-0.250,-0.244) + +# Results with flags for (1i.sh) : --num-epochs 5 --tlstm-affix 1i_5epoch --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned\ +# Results with flags for (1l.sh) : --num-epochs 5 --tlstm-affix 1l_5epoch --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned\ +#System tdnn_lstm1i_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1l_5epoch_sp_bi_ihmali_ld5 +#WER on dev 36.9 35.8 +#WER on eval 40.2 39.5 +#Final train prob -0.0854552 -0.134189 +#Final valid prob -0.262789 -0.244183 +#inal train prob (xent) -1.2195 -1.58789 +#Final valid prob (xent) -2.13389 -2.08964 + +# ./steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1i_5epoch_sp_bi_ihmali_ld5 exp/sdm1/chain_cleaned/tdnn_lstm1l_5epoch_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1i_5epoch_sp_bi_ihmali_ld5: num-iters=109 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.111->-0.104 xent:train/valid[71,108,final]=(-1.61,-1.25,-1.22/-2.16,-2.15,-2.13) logprob:train/valid[71,108,final]=(-0.133,-0.089,-0.085/-0.246,-0.264,-0.263) +# exp/sdm1/chain_cleaned/tdnn_lstm1l_5epoch_sp_bi_ihmali_ld5/: num-iters=109 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.170->-0.153 xent:train/valid[71,108,final]=(-3.67,-1.76,-1.59/-3.81,-2.22,-2.09) logprob:train/valid[71,108,final]=(-0.274,-0.144,-0.134/-0.327,-0.248,-0.244) + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +dropout_schedule='0,0@0.20,0.3@0.50,0' + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1l #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh new file mode 100644 index 00000000000..b0e7af0618d --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -0,0 +1,352 @@ +#!/bin/bash + +# This (1m.sh) is the same as 1j but with per-frame dropout on LSTM layer +# It is a fast LSTM with per-frame dropout on [i, f, o] gates of the LSTM, +# the dropout-adding place is "place4" in paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf. +# We have tried both 4-epoch and 5-epoch training. + +### IHM +# Results with flags : --mic ihm --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1j_sp_bi_ld5 tdnn_lstm1m_sp_bi_ld5 +#WER on dev 20.8 19.9 +#WER on eval 20.3 19.3 +#Final train prob -0.0439145 -0.0653269 +#Final valid prob -0.10673 -0.0998743 +#Final train prob (xent) -0.683776 -0.884698 +#Final valid prob (xent) -1.05254 -1.09002 + +# steps/info/chain_dir_info.pl exp/ihm/chain_cleaned/tdnn_lstm1j_sp_bi_ld5/ exp/ihm/chain_cleaned/tdnn_lstm1m_sp_bi_ld5/ +# exp/ihm/chain_cleaned/tdnn_lstm1j_sp_bi_ld5: num-iters=89 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.063->-0.058 xent:train/valid[58,88,final]=(-0.888,-0.695,-0.684/-1.12,-1.06,-1.05) logprob:train/valid[58,88,final]=(-0.065,-0.045,-0.044/-0.105,-0.107,-0.107) +# exp/ihm/chain_cleaned/tdnn_lstm1m_sp_bi_ld5: num-iters=89 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.092->-0.080 xent:train/valid[58,88,final]=(-3.12,-1.09,-0.885/-3.20,-1.27,-1.09) logprob:train/valid[58,88,final]=(-0.164,-0.072,-0.065/-0.181,-0.103,-0.100) + +# Results with flags for (1m.sh) : --num-epochs 5 --tlstm-affix 1m_5epoch --mic ihm --train-set train_cleaned --gmm tri3_cleaned \ +# Results with flags for (1j.sh) : --num-epochs 5 --tlstm-affix 1j_5epoch --mic ihm --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1j_5epoch_sp_bi_ld5 tdnn_lstm1m_5epoch_sp_bi_ld5 +#WER on dev 21.1 19.9 +#WER on eval 20.9 19.8 +#Final train prob -0.0365079 -0.057024 +#Final valid prob -0.112709-0.0992725 +#inal train prob (xent) -0.601602 -0.800653 +#Final valid prob (xent) -1.03241 -1.04748 + +# ./steps/info/chain_dir_info.pl exp/ihm/chain_cleaned/tdnn_lstm1j_5epoch_sp_bi_ld5/ exp/ihm/chain_cleaned/tdnn_lstm1m_5epoch_sp_bi_ld5/ +# exp/ihm/chain_cleaned/tdnn_lstm1j_5epoch_sp_bi_ld5/: num-iters=111 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.053->-0.049 xent:train/valid[73,110,final]=(-0.813,-0.615,-0.602/-1.08,-1.04,-1.03) logprob:train/valid[73,110,final]=(-0.057,-0.038,-0.037/-0.106,-0.113,-0.113) +# exp/ihm/chain_cleaned/tdnn_lstm1m_5epoch_sp_bi_ld5/: num-iters=111 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.080->-0.072 xent:train/valid[73,110,final]=(-3.15,-0.985,-0.801/-3.26,-1.21,-1.05) logprob:train/valid[73,110,final]=(-0.161,-0.062,-0.057/-0.183,-0.102,-0.099) + +#### SDM +# Results with flags : --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1j_sp_bi_ihmali_ld5 tdnn_lstm1m_sp_bi_ihmali_ld5 +#WER on dev 36.9 36.4 +#WER on eval 40.5 39.9 +#Final train prob -0.108141 -0.148861 +#Final valid prob -0.257468 -0.240962 +#Final train prob (xent) -1.38179 -1.70258 +#Final valid prob (xent) -2.13095 -2.12803 + +# ./steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1m_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.138->-0.128 xent:train/valid[57,86,final]=(-1.71,-1.39,-1.38/-2.18,-2.14,-2.13) logprob:train/valid[57,86,final]=(-0.150,-0.110,-0.108/-0.251,-0.260,-0.257) +# exp/sdm1/chain_cleaned/tdnn_lstm1m_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.187->-0.170 xent:train/valid[57,86,final]=(-3.74,-1.90,-1.70/-3.88,-2.28,-2.13) logprob:train/valid[57,86,final]=(-0.286,-0.158,-0.149/-0.336,-0.245,-0.241) + +# Results with flags for (1m.sh) : --num-epochs 5 --tlstm-affix 1m_5epoch --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned\ +# Results with flags for (1j.sh) : --num-epochs 5 --tlstm-affix 1j_5epoch --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned\ +#System tdnn_lstm1j_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1m_5epoch_sp_bi_ihmali_ld5 +#WER on dev 37.4 36.0 +#WER on eval 40.7 39.6 +#Final train prob -0.0879063 -0.133092 +#Final valid prob -0.270953 -0.243246 +#Final train prob (xent) -1.20822 -1.56293 +#Final valid prob (xent) -2.1425 -2.07265 + +# ./steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1j_5epoch_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1m_5epoch_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1j_5epoch_sp_bi_ihmali_ld5/: num-iters=109 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.115->-0.107 xent:train/valid[71,108,final]=(-1.56,-1.22,-1.21/-2.16,-2.16,-2.14) logprob:train/valid[71,108,final]=(-0.131,-0.090,-0.088/-0.256,-0.273,-0.271) +# exp/sdm1/chain_cleaned/tdnn_lstm1m_5epoch_sp_bi_ihmali_ld5/: num-iters=109 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.167->-0.153 xent:train/valid[71,108,final]=(-3.69,-1.71,-1.56/-3.84,-2.20,-2.07) logprob:train/valid[71,108,final]=(-0.279,-0.140,-0.133/-0.329,-0.247,-0.243) + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +dropout_schedule='0,0@0.20,0.3@0.50,0' # dropout schedule controls the dropout + # proportion for each training iteration. +num_epochs=4 + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1m #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20 dropout-proportion=0.0" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/get_ivector_weights_from_ctm_conf.pl b/egs/ami/s5b/local/get_ivector_weights_from_ctm_conf.pl new file mode 100755 index 00000000000..96db9af3638 --- /dev/null +++ b/egs/ami/s5b/local/get_ivector_weights_from_ctm_conf.pl @@ -0,0 +1,77 @@ +#! /usr/bin/perl +use strict; +use warnings; +use Getopt::Long; + +my $pad_frames = 0; +my $silence_weight = 0.00001; +my $scale_weights_by_ctm_conf = "false"; +my $frame_shift = 0.01; + +GetOptions('pad-frames:i' => \$pad_frames, + 'silence-weight:f' => \$frame_shift, + 'scale-weights-by-ctm-conf:s' => \$scale_weights_by_ctm_conf, + 'frame-shift:f' => \$frame_shift); + +if (scalar @ARGV != 1) { + die "Usage: get_ivector_weights_from_ctm_conf.pl < > "; +} + +my $utt2dur = shift @ARGV; + +$pad_frames >= 0 || die "Bad pad-frames value $pad_frames; must be >= 0"; +($scale_weights_by_ctm_conf eq 'false') || ($scale_weights_by_ctm_conf eq 'true') || die "Bad scale-weights-by-ctm-conf $scale_weights_by_ctm_conf; must be true/false"; + +open(L, "<$utt2dur") || die "unable to open utt2dur file $utt2dur"; + +my @all_utts = (); +my %utt2weights; + +while () { + chomp; + my @A = split; + @A == 2 || die "Incorrent format of utt2dur file $_"; + my ($utt, $len) = @A; + + push @all_utts, $utt; + $len = int($len / $frame_shift); + + # Initialize weights for each utterance + my $weights = []; + for (my $n = 0; $n < $len; $n++) { + push @$weights, $silence_weight; + } + $utt2weights{$utt} = $weights; +} +close(L); + +while () { + chomp; + my @A = split; + @A == 6 || die "bad ctm line $_"; + + my $utt = $A[0]; + my $beg = $A[2]; + my $len = $A[3]; + my $beg_int = int($beg / $frame_shift) - $pad_frames; + my $len_int = int($len / $frame_shift) + 2*$pad_frames; + my $conf = $A[5]; + + my $array_ref = $utt2weights{$utt}; + defined $array_ref || die "No length info for utterance $utt"; + + for (my $t = $beg_int; $t < $beg_int + $len_int; $t++) { + if ($t >= 0 && $t < @$array_ref) { + if ($scale_weights_by_ctm_conf eq "false") { + ${$array_ref}[$t] = 1; + } else { + ${$array_ref}[$t] = $conf; + } + } + } +} + +foreach my $utt (keys %utt2weights) { + my $array_ref = $utt2weights{$utt}; + print ($utt, " [ ", join(" ", @$array_ref), " ]\n"); +} diff --git a/egs/ami/s5b/local/make_rt_2004_dev.pl b/egs/ami/s5b/local/make_rt_2004_dev.pl new file mode 120000 index 00000000000..a0d27619369 --- /dev/null +++ b/egs/ami/s5b/local/make_rt_2004_dev.pl @@ -0,0 +1 @@ +../../../rt/s5/local/make_rt_2004_dev.pl \ No newline at end of file diff --git a/egs/ami/s5b/local/make_rt_2004_eval.pl b/egs/ami/s5b/local/make_rt_2004_eval.pl new file mode 120000 index 00000000000..8b951f9c940 --- /dev/null +++ b/egs/ami/s5b/local/make_rt_2004_eval.pl @@ -0,0 +1 @@ +../../../rt/s5/local/make_rt_2004_eval.pl \ No newline at end of file diff --git a/egs/ami/s5b/local/make_rt_2005_eval.pl b/egs/ami/s5b/local/make_rt_2005_eval.pl new file mode 120000 index 00000000000..6185b83a5a3 --- /dev/null +++ b/egs/ami/s5b/local/make_rt_2005_eval.pl @@ -0,0 +1 @@ +../../../rt/s5/local/make_rt_2005_eval.pl \ No newline at end of file diff --git a/egs/ami/s5b/local/modify_stm.py b/egs/ami/s5b/local/modify_stm.py new file mode 100755 index 00000000000..52ab6fed1ef --- /dev/null +++ b/egs/ami/s5b/local/modify_stm.py @@ -0,0 +1,97 @@ +#! /usr/bin/env python + +import sys +import collections +import itertools +import argparse + +from collections import defaultdict + +def IgnoreWordList(stm_lines, wordlist): + for i in range(0, len(stm_lines)): + line = stm_lines[i] + splits = line.strip().split() + + line_changed = False + for j in range(5, len(splits)): + if str.lower(splits[j]) in wordlist: + splits[j] = "{{ {0} / @ }}".format(splits[j]) + line_changed = True + + + if line_changed: + stm_lines[i] = " ".join(splits) + +def IgnoreIsolatedWords(stm_lines): + for i in range(0, len(stm_lines)): + line = stm_lines[i] + splits = line.strip().split() + + assert( splits[5][0] != '<' ) + + if len(splits) == 6 and splits[5] != "IGNORE_TIME_SEGMENT_IN_SCORING": + splits.insert(5, "") + else: + splits.insert(5, "") + stm_lines[i] = " ".join(splits) + +def IgnoreBeginnings(stm_lines): + beg_times = defaultdict(itertools.repeat(float("inf")).next) + + lines_to_add = [] + for line in stm_lines: + splits = line.strip().split() + + beg_times[(splits[0],splits[1])] = min(beg_times[(splits[0],splits[1])], float(splits[3])) + + for t,v in beg_times.iteritems(): + lines_to_add.append("{0} {1} {0} 0.0 {2} IGNORE_TIME_SEGMENT_IN_SCORING".format(t[0], t[1], v)) + + stm_lines.extend(lines_to_add) + +def WriteStmLines(stm_lines): + for line in stm_lines: + print(line) + +def GetArgs(): + parser = argparse.ArgumentParser("This script modifies STM to remove certain words and segments from scoring. Use sort +0 -1 +1 -2 +3nb -4 while writing out.", + formatter_class = argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("--ignore-beginnings", + type = str, choices = ["true", "false"], + help = "Ignore beginnings of the recordings since " + "they are not transcribed") + parser.add_argument("--ignore-isolated-words", + type = str, choices = ["true", "false"], + help = "Remove isolated words from scoring " + "because they may be hard to recognize without " + "speaker diarization") + parser.add_argument("--ignore-word-list", + type = str, + help = "List of words to be ignored") + + args = parser.parse_args() + + return args + +def Main(): + args = GetArgs() + + stm_lines = [ x.strip() for x in sys.stdin.readlines() ] + + print (';; LABEL "NO_ISO", "No isolated words", "Ignoring isolated words"') + print (';; LABEL "ISO", "Isolated words", "isolated words"') + + #if args.ignore_word_list is not None: + # wordlist = {} + # for x in open(args.ignore_word_list).readlines(): + # wordlist[str.lower(x.strip())] = 1 + # IgnoreWordList(stm_lines, wordlist) + + IgnoreIsolatedWords(stm_lines) + IgnoreBeginnings(stm_lines) + + WriteStmLines(stm_lines) + +if __name__ == "__main__": + Main() diff --git a/egs/ami/s5b/local/nnet3/run_blstm.sh b/egs/ami/s5b/local/nnet3/run_blstm.sh index 776151fb5aa..e0e7bcfcdcf 100755 --- a/egs/ami/s5b/local/nnet3/run_blstm.sh +++ b/egs/ami/s5b/local/nnet3/run_blstm.sh @@ -7,6 +7,7 @@ remove_egs=true use_ihm_ali=false train_set=train_cleaned ihm_gmm=tri3 +gmm=tri3a_cleaned nnet3_affix=_cleaned # BLSTM params @@ -32,6 +33,7 @@ local/nnet3/run_lstm.sh --affix $affix \ --srand $srand \ --train-stage $train_stage \ --train-set $train_set \ + --gmm $gmm \ --ihm-gmm $ihm_gmm \ --nnet3-affix $nnet3_affix \ --lstm-delay " [-1,1] [-2,2] [-3,3] " \ @@ -49,4 +51,3 @@ local/nnet3/run_lstm.sh --affix $affix \ --num-epochs $num_epochs \ --use-ihm-ali $use_ihm_ali \ --remove-egs $remove_egs - diff --git a/egs/ami/s5b/local/nnet3/run_ivector_common.sh b/egs/ami/s5b/local/nnet3/run_ivector_common.sh index bccbb42494c..860009c5ef5 100755 --- a/egs/ami/s5b/local/nnet3/run_ivector_common.sh +++ b/egs/ami/s5b/local/nnet3/run_ivector_common.sh @@ -17,8 +17,8 @@ train_set=train # you might set this to e.g. train_cleaned. gmm=tri3 # This specifies a GMM-dir from the features of the type you're training the system on; # it should contain alignments for 'train_set'. - num_threads_ubm=32 +ivector_transform_type=lda nnet3_affix=_cleaned # affix for exp/$mic/nnet3 directory to put iVector stuff in, so it # becomes exp/$mic/nnet3_cleaned or whatever. @@ -30,7 +30,7 @@ nnet3_affix=_cleaned # affix for exp/$mic/nnet3 directory to put iVector stu gmmdir=exp/${mic}/${gmm} -for f in data/${mic}/${train_set}/feats.scp ${gmmdir}/final.mdl; do +for f in data/${mic}/${train_set}/feats.scp ; do if [ ! -f $f ]; then echo "$0: expected file $f to exist" exit 1 @@ -110,20 +110,36 @@ if [ $stage -le 4 ]; then echo "$0: warning: number of feats $n1 != $n2, if these are very different it could be bad." fi - echo "$0: training a system on the hires data for its LDA+MLLT transform, in order to produce the diagonal GMM." - if [ -e exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl ]; then - # we don't want to overwrite old stuff, ask the user to delete it. - echo "$0: exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl already exists: " - echo " ... please delete and then rerun, or use a later --stage option." - exit 1; - fi - steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 7 --mllt-iters "2 4 6" \ - --splice-opts "--left-context=3 --right-context=3" \ - 3000 10000 $temp_data_root/${train_set}_hires data/lang \ - $gmmdir exp/$mic/nnet3${nnet3_affix}/tri5 + case $ivector_transform_type in + lda) + if [ ! -f ${gmmdir}/final.mdl ]; then + echo "$0: expected file ${gmmdir}/final.mdl to exist" + exit 1; + fi + echo "$0: training a system on the hires data for its LDA+MLLT transform, in order to produce the diagonal GMM." + if [ -e exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl ]; then + # we don't want to overwrite old stuff, ask the user to delete it. + echo "$0: exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl already exists: " + echo " ... please delete and then rerun, or use a later --stage option." + exit 1; + fi + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 7 --mllt-iters "2 4 6" \ + --splice-opts "--left-context=3 --right-context=3" \ + 3000 10000 $temp_data_root/${train_set}_hires data/lang \ + $gmmdir exp/$mic/nnet3${nnet3_affix}/tri5 + ;; + pca) + echo "$0: computing a PCA transform from the hires data." + steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=3 --right-context=3" \ + --max-utts 10000 --subsample 2 \ + $temp_data_root/${train_set}_hires \ + exp/$mic/nnet3${nnet3_affix}/tri5 + ;; + *) echo "$0: invalid iVector transform type $ivector_transform_type" && exit 1; + esac fi - if [ $stage -le 5 ]; then echo "$0: computing a subset of data to train the diagonal UBM." diff --git a/egs/ami/s5b/local/nnet3/run_lstm.sh b/egs/ami/s5b/local/nnet3/run_lstm.sh index c5583e2d0ef..25254629933 100755 --- a/egs/ami/s5b/local/nnet3/run_lstm.sh +++ b/egs/ami/s5b/local/nnet3/run_lstm.sh @@ -225,9 +225,12 @@ if [ $stage -le 14 ]; then [ ! -z $decode_iter ] && model_opts=" --iter $decode_iter "; for decode_set in dev eval; do ( - num_jobs=`cat data/$mic/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + nj_dev=`cat data/$mic/${decode_set}_hires/spk2utt | wc -l` + if [ $nj_dev -gt $nj ]; then + nj_dev=$nj + fi decode_dir=${dir}/decode_${decode_set} - steps/nnet3/decode.sh --nj 250 --cmd "$decode_cmd" \ + steps/nnet3/decode.sh --nj $nj_dev --cmd "$decode_cmd" \ $model_opts \ --extra-left-context $extra_left_context \ --extra-right-context $extra_right_context \ diff --git a/egs/ami/s5b/local/nnet3/run_tdnn.sh b/egs/ami/s5b/local/nnet3/run_tdnn.sh index bbc6ed5c042..7b463f4ce57 100755 --- a/egs/ami/s5b/local/nnet3/run_tdnn.sh +++ b/egs/ami/s5b/local/nnet3/run_tdnn.sh @@ -45,10 +45,12 @@ tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the co # Options which are not passed through to run_ivector_common.sh train_stage=-10 splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" -remove_egs=true +remove_egs=false relu_dim=850 num_epochs=3 +common_egs_dir= + . cmd.sh . ./path.sh . ./utils/parse_options.sh @@ -122,30 +124,55 @@ fi [ ! -f $ali_dir/ali.1.gz ] && echo "$0: expected $ali_dir/ali.1.gz to exist" && exit 1 if [ $stage -le 12 ]; then + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale-nonlinearity 0.00001 \ + --feat-dir $train_data_dir \ + --ivector-dir $train_ivector_dir \ + --ali-dir $ali_dir \ + --relu-dim $relu_dim \ + --splice-indexes "$splice_indexes" \ + --use-presoftmax-prior-scale true \ + --include-log-softmax true \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 13 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage fi - steps/nnet3/tdnn/train.sh --stage $train_stage \ - --num-epochs $num_epochs --num-jobs-initial 2 --num-jobs-final 12 \ - --splice-indexes "$splice_indexes" \ - --feat-type raw \ - --online-ivector-dir ${train_ivector_dir} \ - --cmvn-opts "--norm-means=false --norm-vars=false" \ - --initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \ + steps/nnet3/train_dnn.py --stage $train_stage \ --cmd "$decode_cmd" \ - --relu-dim "$relu_dim" \ - --remove-egs "$remove_egs" \ - $train_data_dir data/lang $ali_dir $dir + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --egs.dir "$common_egs_dir" \ + --trainer.samples-per-iter 400000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.0015 \ + --trainer.optimization.final-effective-lrate 0.00015 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs "$remove_egs" \ + --cleanup true \ + --feat-dir $train_data_dir \ + --lang data/lang \ + --ali-dir $ali_dir \ + --dir $dir fi -if [ $stage -le 12 ]; then +if [ $stage -le 14 ]; then rm $dir/.error || true 2>/dev/null for decode_set in dev eval; do ( + nj_dev=`cat data/$mic/${decode_set}_hires/spk2utt | wc -l` + if [ $nj_dev -gt $nj ]; then + nj_dev=$nj + fi decode_dir=${dir}/decode_${decode_set} - steps/nnet3/decode.sh --nj $nj --cmd "$decode_cmd" \ + steps/nnet3/decode.sh --nj $nj_dev --cmd "$decode_cmd" \ --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ $graph_dir data/$mic/${decode_set}_hires $decode_dir ) & diff --git a/egs/ami/s5b/local/prepare_parallel_train_data.sh b/egs/ami/s5b/local/prepare_parallel_train_data.sh index b049c906c3b..b551bacfb92 100755 --- a/egs/ami/s5b/local/prepare_parallel_train_data.sh +++ b/egs/ami/s5b/local/prepare_parallel_train_data.sh @@ -5,6 +5,10 @@ # but the wav data is copied from data/ihm. This is a little tricky because the # utterance ids are different between the different mics +train_set=train + +. utils/parse_options.sh + if [ $# != 1 ]; then echo "Usage: $0 [sdm1|mdm8]" @@ -18,12 +22,10 @@ if [ $mic == "ihm" ]; then exit 1; fi -train_set=train - . cmd.sh . ./path.sh -for f in data/ihm/train/utt2spk data/$mic/train/utt2spk; do +for f in data/ihm/${train_set}/utt2spk data/$mic/${train_set}/utt2spk; do if [ ! -f $f ]; then echo "$0: expected file $f to exist" exit 1 @@ -32,12 +34,12 @@ done set -e -o pipefail -mkdir -p data/$mic/train_ihmdata +mkdir -p data/$mic/${train_set}_ihmdata # the utterance-ids and speaker ids will be from the SDM or MDM data -cp data/$mic/train/{spk2utt,text,utt2spk} data/$mic/train_ihmdata/ +cp data/$mic/${train_set}/{spk2utt,text,utt2spk} data/$mic/${train_set}_ihmdata/ # the recording-ids will be from the IHM data. -cp data/ihm/train/{wav.scp,reco2file_and_channel} data/$mic/train_ihmdata/ +cp data/ihm/${train_set}/{wav.scp,reco2file_and_channel} data/$mic/${train_set}_ihmdata/ # map sdm/mdm segments to the ihm segments @@ -47,19 +49,17 @@ mic_base_upcase=$(echo $mic | sed 's/[0-9]//g' | tr 'a-z' 'A-Z') # It has lines like: # AMI_EN2001a_H02_FEO065_0021133_0021442 AMI_EN2001a_SDM_FEO065_0021133_0021442 -tmpdir=data/$mic/train_ihmdata/ +tmpdir=data/$mic/${train_set}_ihmdata/ -awk '{print $1, $1}' $tmpdir/ihmutt2utt # Map the 1st field of the segments file from the ihm data (the 1st field being # the utterance-id) to the corresponding SDM or MDM utterance-id. The other # fields remain the same (e.g. we want the recording-ids from the IHM data). -utils/apply_map.pl -f 1 $tmpdir/ihmutt2utt data/$mic/train_ihmdata/segments - -utils/fix_data_dir.sh data/$mic/train_ihmdata +utils/apply_map.pl -f 1 $tmpdir/ihmutt2utt data/$mic/${train_set}_ihmdata/segments -rm $tmpdir/ihmutt2utt +utils/fix_data_dir.sh data/$mic/${train_set}_ihmdata exit 0; diff --git a/egs/ami/s5b/local/run_cleanup_segmentation.sh b/egs/ami/s5b/local/run_cleanup_segmentation.sh index e2f0b0516ce..9a947ce1fce 100755 --- a/egs/ami/s5b/local/run_cleanup_segmentation.sh +++ b/egs/ami/s5b/local/run_cleanup_segmentation.sh @@ -129,7 +129,6 @@ fi final_lm=`cat data/local/lm/final_lm` LM=$final_lm.pr1-7 - if [ $stage -le 5 ]; then graph_dir=exp/$mic/${gmm}_${cleanup_affix}/graph_$LM nj_dev=$(cat data/$mic/dev/spk2utt | wc -l) @@ -137,9 +136,9 @@ if [ $stage -le 5 ]; then $decode_cmd $graph_dir/mkgraph.log \ utils/mkgraph.sh data/lang_$LM exp/$mic/${gmm}_${cleanup_affix} $graph_dir - steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \ + steps/decode_fmllr.sh --nj $nj_dev --cmd "$decode_cmd" --config conf/decode.conf \ $graph_dir data/$mic/dev exp/$mic/${gmm}_${cleanup_affix}/decode_dev_$LM - steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \ + steps/decode_fmllr.sh --nj $nj_eval --cmd "$decode_cmd" --config conf/decode.conf \ $graph_dir data/$mic/eval exp/$mic/${gmm}_${cleanup_affix}/decode_eval_$LM fi diff --git a/egs/ami/s5b/local/run_prepare_rt.sh b/egs/ami/s5b/local/run_prepare_rt.sh new file mode 120000 index 00000000000..e10f1d53a19 --- /dev/null +++ b/egs/ami/s5b/local/run_prepare_rt.sh @@ -0,0 +1 @@ +../../../rt/s5/local/run_prepare_rt.sh \ No newline at end of file diff --git a/egs/ami/s5b/local/run_train_raw_lstm.sh b/egs/ami/s5b/local/run_train_raw_lstm.sh new file mode 100755 index 00000000000..5c0431fe796 --- /dev/null +++ b/egs/ami/s5b/local/run_train_raw_lstm.sh @@ -0,0 +1,143 @@ +#!/bin/bash + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= + +# LSTM options +splice_indexes="-2,-1,0,1,2 0" +label_delay=0 +num_lstm_layers=2 +cell_dim=64 +hidden_dim=64 +recurrent_projection_dim=32 +non_recurrent_projection_dim=32 +chunk_width=40 +chunk_left_context=40 +lstm_delay="-1 -2" + +# training options +num_epochs=3 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +momentum=0.5 +num_chunk_per_minibatch=256 +samples_per_iter=20000 +remove_egs=false +max_param_change=1 + +num_utts_subset_valid=6 +num_utts_subset_train=6 + +use_dense_targets=false +extra_egs_copy_cmd="nnet3-copy-egs-overlap-detection ark:- ark:- |" + +# target options +train_data_dir=data/sdm1/train_whole_sp_hires_bp +targets_scp=exp/sdm1/overlap_speech_train_cleaned_sp/overlap_feats.scp +deriv_weights_scp=exp/sdm1/overlap_speech_train_cleaned_sp/deriv_weights.scp +egs_dir= +nj=40 +feat_type=raw +config_dir= +compute_objf_opts= + +mic=sdm1 +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_hidden_layers=`echo $splice_indexes | perl -ane 'print scalar @F'` || exit 1 +if [ -z "$dir" ]; then + dir=exp/$mic/nnet3_raw/nnet_lstm +fi + +dir=$dir${affix:+_$affix}_n${num_hidden_layers} +if [ $label_delay -gt 0 ]; then dir=${dir}_ld$label_delay; fi + + +if ! cuda-compiled; then + cat <' $dir/ascore_${LMWT}/${name}.JOB.ctm || touch $dir/.error; + if $decode_mbr; then + $cmd JOB=1:$nj $dir/ascoring/log/get_ctm.${LMWT}.JOB.log \ + mkdir -p $dir/ascore_${LMWT}/ '&&' \ + lattice-scale --inv-acoustic-scale=${LMWT} "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \ + lattice-limit-depth ark:- ark:- \| \ + lattice-push --push-strings=false ark:- ark:- \| \ + lattice-align-words-lexicon --max-expand=10.0 \ + $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \ + '>' $dir/ascore_${LMWT}/${name}.JOB.ctm || touch $dir/.error; + else + $cmd JOB=1:$nj $dir/ascoring/log/get_ctm.${LMWT}.JOB.log \ + mkdir -p $dir/ascore_${LMWT}/ '&&' \ + lattice-scale --inv-acoustic-scale=${LMWT} "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \ + lattice-limit-depth ark:- ark:- \| \ + lattice-1best ark:- ark:- \| \ + lattice-push --push-strings=false ark:- ark:- \| \ + lattice-align-words-lexicon --max-expand=10.0 \ + $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + nbest-to-ctm $frame_shift_opt ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \ + '>' $dir/ascore_${LMWT}/${name}.JOB.ctm || touch $dir/.error; + fi + # Merge and clean, - for ((n=1; n<=nj; n++)); do cat $dir/ascore_${LMWT}/${name}.${n}.ctm; done > $dir/ascore_${LMWT}/${name}.ctm - rm -f $dir/ascore_${LMWT}/${name}.*.ctm + for ((n=1; n<=nj; n++)); do + cat $dir/ascore_${LMWT}/${name}.${n}.ctm; + rm -f $dir/ascore_${LMWT}/${name}.${n}.ctm + done > $dir/ascore_${LMWT}/${name}.utt.ctm )& done wait; [ -f $dir/.error ] && echo "$0: error during ctm generation. check $dir/ascoring/log/get_ctm.*.log" && exit 1; fi +if [ $stage -le 1 ]; then + for LMWT in $(seq $min_lmwt $max_lmwt); do + cat $dir/ascore_${LMWT}/${name}.utt.ctm | \ + $copy_ctm_script | utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + > $dir/ascore_${LMWT}/${name}.ctm || exit 1 + done +fi + if [ $stage -le 1 ]; then # Remove some stuff we don't want to score, from the ctm. # - we remove hesitations here, otherwise the CTM would have a bug! # (confidences in place of the removed hesitations), - for x in $dir/ascore_*/${name}.ctm; do - cp $x $x.tmpf; + for LMWT in $(seq $min_lmwt $max_lmwt); do + x=$dir/ascore_${LMWT}/${name}.ctm + mv $x $x.tmpf; cat $x.tmpf | grep -i -v -E '\[noise|laughter|vocalized-noise\]' | \ grep -i -v -E ' (ACH|AH|EEE|EH|ER|EW|HA|HEE|HM|HMM|HUH|MM|OOF|UH|UM) ' | \ grep -i -v -E '' > $x; @@ -94,8 +126,9 @@ fi if [ $stage -le 2 ]; then if [ "$asclite" == "true" ]; then - oname=$name + oname=${name} [ ! -z $overlap_spk ] && oname=${name}_o$overlap_spk + oname=${oname}${stm_suffix} echo "asclite is starting" # Run scoring, meaning of hubscr.pl options: # -G .. produce alignment graphs, @@ -109,10 +142,10 @@ if [ $stage -le 2 ]; then # -V .. skip validation of input transcripts, # -h rt-stt .. removes non-lexical items from CTM, $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/score.LMWT.log \ - cp $data/stm $dir/ascore_LMWT/ '&&' \ + cp $data/stm${stm_suffix} $dir/ascore_LMWT/ '&&' \ cp $dir/ascore_LMWT/${name}.ctm $dir/ascore_LMWT/${oname}.ctm '&&' \ $hubscr -G -v -m 1:2 -o$overlap_spk -a -C -B 8192 -p $hubdir -V -l english \ - -h rt-stt -g $data/glm -r $dir/ascore_LMWT/stm $dir/ascore_LMWT/${oname}.ctm || exit 1 + -h rt-stt -g $data/glm -r $dir/ascore_LMWT/stm${stm_suffix} $dir/ascore_LMWT/${oname}.ctm || exit 1 # Compress some scoring outputs : alignment info and graphs, echo -n "compressing asclite outputs " for LMWT in $(seq $min_lmwt $max_lmwt); do @@ -126,8 +159,8 @@ if [ $stage -le 2 ]; then echo done else $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/score.LMWT.log \ - cp $data/stm $dir/ascore_LMWT/ '&&' \ - $hubscr -p $hubdir -v -V -l english -h hub5 -g $data/glm -r $dir/ascore_LMWT/stm $dir/ascore_LMWT/${name}.ctm || exit 1 + cp $data/stm${stm_suffix} $dir/ascore_LMWT/ '&&' \ + $hubscr -p $hubdir -v -V -l english -h hub5 -g $data/glm -r $dir/ascore_LMWT/stm${suffix} $dir/ascore_LMWT/${name}${stm_suffix}.ctm || exit 1 fi fi diff --git a/egs/ami/s5b/path.sh b/egs/ami/s5b/path.sh index ad2c93b309b..b4711d23926 100644 --- a/egs/ami/s5b/path.sh +++ b/egs/ami/s5b/path.sh @@ -9,5 +9,4 @@ LMBIN=$KALDI_ROOT/tools/irstlm/bin SRILM=$KALDI_ROOT/tools/srilm/bin/i686-m64 BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt -export PATH=$PATH:$LMBIN:$BEAMFORMIT:$SRILM - +export PATH=$LMBIN:$BEAMFORMIT:$SRILM:$PATH diff --git a/egs/ami/s5b/run.sh b/egs/ami/s5b/run.sh index 56cdd29e311..0a630a87a5b 100755 --- a/egs/ami/s5b/run.sh +++ b/egs/ami/s5b/run.sh @@ -56,7 +56,7 @@ if [ "$base_mic" == "mdm" ]; then PROCESSED_AMI_DIR=$AMI_DIR/beamformed if [ $stage -le 1 ]; then # for MDM data, do beamforming - ! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/; make beamformit;'" && exit 1 + ! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/; extras/install_beamformit.sh; cd -;'" && exit 1 local/ami_beamform.sh --cmd "$train_cmd" --nj 20 $nmics $AMI_DIR $PROCESSED_AMI_DIR fi else diff --git a/egs/aspire/s5/conf/mfcc_hires_bp.conf b/egs/aspire/s5/conf/mfcc_hires_bp.conf new file mode 100644 index 00000000000..64292e8b489 --- /dev/null +++ b/egs/aspire/s5/conf/mfcc_hires_bp.conf @@ -0,0 +1,13 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=28 +--num-ceps=28 +--cepstral-lifter=0 +--low-freq=330 # low cutoff frequency for mel bins +--high-freq=-1000 # high cutoff frequently, relative to Nyquist of 4000 (=3000) + + diff --git a/egs/aspire/s5/conf/segmentation_music.conf b/egs/aspire/s5/conf/segmentation_music.conf new file mode 100644 index 00000000000..28b5feaf5d5 --- /dev/null +++ b/egs/aspire/s5/conf/segmentation_music.conf @@ -0,0 +1,14 @@ +# General segmentation options +pad_length=-1 # Pad speech segments by this many frames on either side +max_blend_length=-1 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=0 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=-1 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=250 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=100000 # Min silence length at which to split very long segments diff --git a/egs/aspire/s5/conf/segmentation_ovlp.conf b/egs/aspire/s5/conf/segmentation_ovlp.conf new file mode 100644 index 00000000000..28b5feaf5d5 --- /dev/null +++ b/egs/aspire/s5/conf/segmentation_ovlp.conf @@ -0,0 +1,14 @@ +# General segmentation options +pad_length=-1 # Pad speech segments by this many frames on either side +max_blend_length=-1 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=0 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=-1 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=250 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=100000 # Min silence length at which to split very long segments diff --git a/egs/aspire/s5/conf/segmentation_speech.conf b/egs/aspire/s5/conf/segmentation_speech.conf new file mode 100644 index 00000000000..c4c75b212fc --- /dev/null +++ b/egs/aspire/s5/conf/segmentation_speech.conf @@ -0,0 +1,14 @@ +# General segmentation options +pad_length=20 # Pad speech segments by this many frames on either side +max_relabel_length=10 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=30 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=10 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=250 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=20 # Min silence length at which to split very long segments diff --git a/egs/aspire/s5/conf/segmentation_speech_simple.conf b/egs/aspire/s5/conf/segmentation_speech_simple.conf new file mode 100644 index 00000000000..56c178c8115 --- /dev/null +++ b/egs/aspire/s5/conf/segmentation_speech_simple.conf @@ -0,0 +1,14 @@ +# General segmentation options +pad_length=20 # Pad speech segments by this many frames on either side +max_relabel_length=-1 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=30 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=-1 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=250 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=20 # Min silence length at which to split very long segments diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh index d91d016a1f4..4bddb3e5955 100755 --- a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh +++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh @@ -100,6 +100,7 @@ if [ $stage -le 9 ]; then data/train data/train_temp_for_lats utils/data/combine_short_segments.sh \ data/train_temp_for_lats $min_seg_len data/train_min${min_seg_len} + steps/compute_cmvn_stats.sh data/train_min${min_seg_len} || exit 1; fi if [ $stage -le 10 ]; then @@ -112,8 +113,8 @@ if [ $stage -le 10 ]; then rm -f $lat_dir/fsts.*.gz # save space rvb_lat_dir=exp/tri5a_rvb_min${min_seg_len}_lats - #mkdir -p $rvb_lat_dir/temp/ - #lattice-copy "ark:gunzip -c $lat_dir/lat.*.gz |" ark,scp:$rvb_lat_dir/temp/lats.ark,$rvb_lat_dir/temp/lats.scp + mkdir -p $rvb_lat_dir/temp/ + lattice-copy "ark:gunzip -c $lat_dir/lat.*.gz |" ark,scp:$rvb_lat_dir/temp/lats.ark,$rvb_lat_dir/temp/lats.scp # copy the lattices for the reverberated data rm -f $rvb_lat_dir/temp/combined_lats.scp diff --git a/egs/aspire/s5/local/multi_condition/decode.sh b/egs/aspire/s5/local/multi_condition/decode.sh index 566524095a6..b09c4780e71 100755 --- a/egs/aspire/s5/local/multi_condition/decode.sh +++ b/egs/aspire/s5/local/multi_condition/decode.sh @@ -47,7 +47,7 @@ if [ $# -ne 3 ]; then echo " --iter # Iteration of model to decode; default is final." echo " --scoring-opts # options to local/score.sh" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" + echo " --parallel-opts # e.g. '--num-threads 4' if you supply --num-threads 4" exit 1; fi diff --git a/egs/aspire/s5/local/multi_condition/get_ctm.sh b/egs/aspire/s5/local/multi_condition/get_ctm.sh index f67a1191544..67c2c0bd87b 100755 --- a/egs/aspire/s5/local/multi_condition/get_ctm.sh +++ b/egs/aspire/s5/local/multi_condition/get_ctm.sh @@ -7,7 +7,7 @@ decode_mbr=true filter_ctm_command=cp glm= stm= -window=10 +resolve_overlaps=true overlap=5 [ -f ./path.sh ] && . ./path.sh . parse_options.sh || exit 1; @@ -62,7 +62,11 @@ lattice-align-words-lexicon --output-error-lats=true --output-if-empty=true --ma lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr ark:- $decode_dir/score_$LMWT/penalty_$wip/ctm.overlapping || exit 1; # combine the segment-wise ctm files, while resolving overlaps -python local/multi_condition/resolve_ctm_overlaps.py --overlap $overlap --window-length $window $data_dir/utt2spk $decode_dir/score_$LMWT/penalty_$wip/ctm.overlapping $decode_dir/score_$LMWT/penalty_$wip/ctm.merged || exit 1; +if $resolve_overlaps; then + steps/resolve_ctm_overlaps.py $data_dir/segments $decode_dir/score_$LMWT/penalty_$wip/ctm.overlapping $decode_dir/score_$LMWT/penalty_$wip/ctm.merged || exit 1; +else + cp $decode_dir/score_$LMWT/penalty_$wip/ctm.overlapping $decode_dir/score_$LMWT/penalty_$wip/ctm.merged || exit 1; +fi merged_ctm=$decode_dir/score_$LMWT/penalty_$wip/ctm.merged cat $merged_ctm | utils/int2sym.pl -f 5 $lang/words.txt | \ diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh index 3b778b23162..4e34c78255a 100755 --- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh +++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh @@ -28,7 +28,7 @@ If you want to use GPUs (and have them), go to src/, and configure and make on a where "nvcc" is installed. Otherwise, call this script with --use-gpu false EOF fi - parallel_opts="-l gpu=1" + parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 @@ -47,7 +47,7 @@ else # almost the same, but this may be a little bit slow. num_threads=16 minibatch_size=128 - parallel_opts="-pe smp $num_threads" + parallel_opts="--num-threads $num_threads" fi # do the common parts of the script. diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh index ad5fba0929f..dc285f28f8e 100755 --- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh +++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh @@ -8,7 +8,7 @@ # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# # Note: rather than using any features we have dumped on disk, this script # regenerates them from the wav data three times-- when we do lattice # generation, numerator alignment and discriminative training. This made the @@ -42,20 +42,20 @@ set -e if $use_gpu; then if ! cuda-compiled; then - cat < " + echo " Options:" + echo " --stage (0|1|2) # start scoring script from part-way through." + echo "e.g.:" + echo "$0 dev_aspire data/lang exp/tri5a/graph_pp exp/nnet3/tdnn" + exit 1; +fi + +data_set=$1 +sad_nnet_dir=$2 +lang=$3 # data/lang +graph=$4 #exp/tri5a/graph_pp +dir=$5 # exp/nnet3/tdnn + +model_affix=`basename $dir` +ivector_dir=exp/nnet3 +ivector_affix=${affix:+_$affix}_chain_${model_affix}_iter$iter +affix=_${affix}_iter${iter} +act_data_set=${data_set} # we will modify the data dir, when segmenting it + # so we will keep track of original data dirfor the glm and stm files + +if [[ "$data_set" =~ "test_aspire" ]]; then + out_file=single_dev_test${affix}_$model_affix.ctm +elif [[ "$data_set" =~ "eval_aspire" ]]; then + out_file=single_eval${affix}_$model_affix.ctm +elif [[ "$data_set" =~ "dev_aspire" ]]; then + # we will just decode the directory without oracle segments file + # as we would like to operate in the actual evaluation condition + out_file=single_dev${affix}_${model_affix}.ctm +else + exit 1 +fi + +if [ $stage -le 1 ]; then + steps/segmentation/do_segmentation_data_dir.sh --reco-nj $num_jobs \ + --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --iter $sad_iter \ + --do-downsampling false --extra-left-context 100 --extra-right-context 20 \ + --output-name output-speech --frame-subsampling-factor 6 \ + data/${data_set} $sad_nnet_dir mfcc_hires_bp data/${data_set}${affix} + # Output will be in data/${data_set}_seg +fi + +# uniform segmentation script would have created this dataset +# so update that script if you plan to change this variable +segmented_data_set=${data_set}${affix}_seg + +if [ $stage -le 2 ]; then + mfccdir=mfcc_reverb + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + date=$(date +'%m_%d_%H_%M') + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/aspire-$date/s5/$mfccdir/storage $mfccdir/storage + fi + + utils/copy_data_dir.sh data/${segmented_data_set} data/${segmented_data_set}_hires + steps/make_mfcc.sh --nj 30 --cmd "$train_cmd" \ + --mfcc-config conf/mfcc_hires.conf data/${segmented_data_set}_hires \ + exp/make_reverb_hires/${segmented_data_set} $mfccdir + steps/compute_cmvn_stats.sh data/${segmented_data_set}_hires \ + exp/make_reverb_hires/${segmented_data_set} $mfccdir + utils/fix_data_dir.sh data/${segmented_data_set}_hires + utils/validate_data_dir.sh --no-text data/${segmented_data_set}_hires +fi + +decode_dir=$dir/decode_${segmented_data_set}_pp +if [ $stage -le 5 ]; then + echo "Extracting i-vectors, stage 2" + # this does offline decoding, except we estimate the iVectors per + # speaker, excluding silence (based on alignments from a DNN decoding), with a + # different script. This is just to demonstrate that script. + # the --sub-speaker-frames is optional; if provided, it will divide each speaker + # up into "sub-speakers" of at least that many frames... can be useful if + # acoustic conditions drift over time within the speaker's data. + steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj 20 \ + --sub-speaker-frames $sub_speaker_frames --max-count $max_count \ + data/${segmented_data_set}_hires $lang $ivector_dir/extractor \ + $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}; +fi + +if [ $stage -le 6 ]; then + echo "Generating lattices, stage 2 with --acwt $acwt" + rm -f ${decode_dir}_tg/.error + steps/nnet3/decode.sh --nj $decode_num_jobs --cmd "$decode_cmd" --config conf/decode.config $pass2_decode_opts \ + --acwt $acwt --post-decode-acwt $post_decode_acwt \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --skip-scoring true --iter $iter --lattice-beam $lattice_beam \ + --online-ivector-dir $ivector_dir/ivectors_${segmented_data_set}${ivector_affix} \ + $graph data/${segmented_data_set}_hires ${decode_dir}_tg || touch ${decode_dir}_tg/.error + [ -f ${decode_dir}_tg/.error ] && echo "$0: Error decoding" && exit 1; +fi + +if [ $stage -le 7 ]; then + echo "Rescoring lattices" + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + --skip-scoring true \ + ${lang}_pp_test{,_fg} data/${segmented_data_set}_hires \ + ${decode_dir}_{tg,fg}; +fi + +decode_dir=${decode_dir}_fg + +if [ $stage -le 8 ]; then + local/score_aspire.sh --cmd "$decode_cmd" \ + --min-lmwt 1 --max-lmwt 20 \ + --word-ins-penalties "0.0,0.25,0.5,0.75,1.0" \ + --ctm-beam 6 \ + --iter $iter \ + --decode-mbr true \ + --resolve-overlaps false \ + --tune-hyper true \ + $lang $decode_dir $act_data_set $segmented_data_set $out_file +fi + +# Two-pass decoding baseline +# %WER 27.8 | 2120 27217 | 78.2 13.6 8.2 6.0 27.8 75.9 | -0.613 | exp/chain/tdnn_7b/decode_dev_aspire_whole_uniformsegmented_win10_over5_v6_200jobs_iterfinal_pp_fg/score_9/penalty_0.0/ctm.filt.filt.sys +# Using automatic segmentation +# %WER 28.2 | 2120 27214 | 76.5 12.4 11.1 4.7 28.2 75.2 | -0.522 | exp/chain/tdnn_7b/decode_dev_aspire_seg_v7_n_stddev_iterfinal_pp_fg/score_10/penalty_0.0/ctm.filt.filt.sys diff --git a/egs/aspire/s5/local/score_aspire.sh b/egs/aspire/s5/local/score_aspire.sh index 3e35b6d3dae..9c08a6c85d1 100755 --- a/egs/aspire/s5/local/score_aspire.sh +++ b/egs/aspire/s5/local/score_aspire.sh @@ -14,10 +14,9 @@ word_ins_penalties=0.0,0.25,0.5,0.75,1.0 default_wip=0.0 ctm_beam=6 decode_mbr=true -window=30 -overlap=5 cmd=run.pl stage=1 +resolve_overlaps=true tune_hyper=true # if true: # if the data set is "dev_aspire" we check for the # best lmwt and word_insertion_penalty, @@ -89,7 +88,7 @@ if $tune_hyper ; then # or use the default values if [ $stage -le 1 ]; then - if [ "$act_data_set" == "dev_aspire" ]; then + if [[ "$act_data_set" =~ "dev_aspire" ]]; then wip_string=$(echo $word_ins_penalties | sed 's/,/ /g') temp_wips=($wip_string) $cmd WIP=1:${#temp_wips[@]} $decode_dir/scoring/log/score.wip.WIP.log \ @@ -98,8 +97,8 @@ if $tune_hyper ; then echo \$wip \&\& \ $cmd LMWT=$min_lmwt:$max_lmwt $decode_dir/scoring/log/score.LMWT.\$wip.log \ local/multi_condition/get_ctm.sh --filter-ctm-command "$filter_ctm_command" \ - --window $window --overlap $overlap \ --beam $ctm_beam --decode-mbr $decode_mbr \ + --resolve-overlaps $resolve_overlaps \ --glm data/${act_data_set}/glm --stm data/${act_data_set}/stm \ LMWT \$wip $lang data/${segmented_data_set}_hires $model $decode_dir || exit 1; @@ -124,7 +123,7 @@ wipfile.close() fi - if [ "$act_data_set" == "test_aspire" ] || [ "$act_data_set" == "eval_aspire" ]; then + if [[ "$act_data_set" =~ "test_aspire" ]] || [[ "$act_data_set" =~ "eval_aspire" ]]; then # check for the best values from dev_aspire decodes dev_decode_dir=$(echo $decode_dir|sed "s/test_aspire/dev_aspire_whole/g; s/eval_aspire/dev_aspire_whole/g") if [ -f $dev_decode_dir/scoring/bestLMWT ]; then diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh new file mode 100755 index 00000000000..45fdf6c1c5c --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh @@ -0,0 +1,138 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +stage=0 +corruption_stage=-10 +corrupt_only=false + +# Data options +data_dir=data/train_si284 # Expecting whole data directory. +speed_perturb=true +num_data_reps=5 # Number of corrupted versions +snrs="20:10:15:5:0:-5" +foreground_snrs="20:10:15:5:0:-5" +background_snrs="20:10:15:5:2:0:-2:-5" +base_rirs=simulated +speeds="0.9 1.0 1.1" + +# Parallel options +reco_nj=40 +cmd=queue.pl + +# Options for feature extraction +mfcc_config=conf/mfcc_hires_bp.conf +feat_suffix=hires_bp + +reco_vad_dir= # Output of prepare_unsad_data.sh. + # If provided, the speech labels and deriv weights will be + # copied into the output data directory. + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +data_id=`basename ${data_dir}` + +rvb_opts=() +if [ "$base_rirs" == "simulated" ]; then + # This is the config for the system using simulated RIRs and point-source noises + rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") + rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") + rvb_opts+=(--noise-set-parameters "0.1, RIRS_NOISES/pointsource_noises/background_noise_list") + rvb_opts+=(--noise-set-parameters "0.9, RIRS_NOISES/pointsource_noises/foreground_noise_list") +else + # This is the config for the JHU ASpIRE submission system + rvb_opts+=(--rir-set-parameters "1.0, RIRS_NOISES/real_rirs_isotropic_noises/rir_list") + rvb_opts+=(--noise-set-parameters RIRS_NOISES/real_rirs_isotropic_noises/noise_list) +fi + +corrupted_data_id=${data_id}_corrupted + +if [ $stage -le 1 ]; then + python steps/data/reverberate_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="rev" \ + --foreground-snrs=$foreground_snrs \ + --background-snrs=$background_snrs \ + --speech-rvb-probability=1 \ + --pointsource-noise-addition-probability=1 \ + --isotropic-noise-addition-probability=1 \ + --num-replications=$num_data_reps \ + --max-noises-per-minute=2 \ + data/${data_id} data/${corrupted_data_id} +fi + +corrupted_data_dir=data/${corrupted_data_id} + +if $speed_perturb; then + if [ $stage -le 2 ]; then + ## Assuming whole data directories + for x in $corrupted_data_dir; do + cp $x/reco2dur $x/utt2dur + utils/data/perturb_data_dir_speed_random.sh --speeds "$speeds" $x ${x}_spr + done + fi + + corrupted_data_dir=${corrupted_data_dir}_spr + corrupted_data_id=${corrupted_data_id}_spr + + if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 \ + ${corrupted_data_dir} + fi +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 4 ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $reco_nj \ + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir +else + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix +fi + +if [ $stage -le 8 ]; then + if [ ! -z "$reco_vad_dir" ]; then + if [ ! -f $reco_vad_dir/speech_labels.scp ]; then + echo "$0: Could not find file $reco_vad_dir/speech_labels.scp" + exit 1 + fi + + cat $reco_vad_dir/speech_labels.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/speech_labels.scp + + cat $reco_vad_dir/deriv_weights.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/deriv_weights.scp + fi +fi + +exit 0 diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh new file mode 100755 index 00000000000..8865e640674 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh @@ -0,0 +1,236 @@ +#!/bin/bash +set -e +set -u +set -o pipefail + +. path.sh +. cmd.sh + +num_data_reps=5 +data_dir=data/train_si284 + +nj=40 +reco_nj=40 + +stage=0 +corruption_stage=-10 + +pad_silence=false + +mfcc_config=conf/mfcc_hires_bp_vh.conf +feat_suffix=hires_bp_vh +mfcc_irm_config=conf/mfcc_hires_bp.conf + +dry_run=false +corrupt_only=false +speed_perturb=true +speeds="0.9 1.0 1.1" + +reco_vad_dir= + +max_jobs_run=20 + +foreground_snrs="5:2:1:0:-2:-5:-10:-20" +background_snrs="5:2:1:0:-2:-5:-10:-20" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +data_id=`basename ${data_dir}` + +rvb_opts=() +# This is the config for the system using simulated RIRs and point-source noises +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") +rvb_opts+=(--noise-set-parameters RIRS_NOISES/music/music_list) + +music_utt2num_frames=RIRS_NOISES/music/split_utt2num_frames + +corrupted_data_id=${data_id}_music_corrupted +orig_corrupted_data_id=$corrupted_data_id + +if [ $stage -le 1 ]; then + python steps/data/reverberate_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="music" \ + --foreground-snrs=$foreground_snrs \ + --background-snrs=$background_snrs \ + --speech-rvb-probability=1 \ + --pointsource-noise-addition-probability=1 \ + --isotropic-noise-addition-probability=1 \ + --num-replications=$num_data_reps \ + --max-noises-per-minute=5 \ + data/${data_id} data/${corrupted_data_id} +fi + +if $dry_run; then + exit 0 +fi + +corrupted_data_dir=data/${corrupted_data_id} +# Data dir without speed perturbation +orig_corrupted_data_dir=$corrupted_data_dir + +if $speed_perturb; then + if [ $stage -le 2 ]; then + ## Assuming whole data directories + for x in $corrupted_data_dir; do + cp $x/reco2dur $x/utt2dur + utils/data/perturb_data_dir_speed_random.sh --speeds "$speeds" $x ${x}_spr + done + fi + + corrupted_data_dir=${corrupted_data_dir}_spr + corrupted_data_id=${corrupted_data_id}_spr + + if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 \ + ${corrupted_data_dir} + fi +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 4 ]; then + if [ ! -z $feat_suffix ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + fi + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $corrupted_data_dir exp/make_${mfccdir}/${corrupted_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $corrupted_data_dir exp/make_${mfccdir}/${corrupted_data_id} $mfccdir +else + if [ ! -z $feat_suffix ]; then + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + fi +fi + +if [ $stage -le 8 ]; then + if [ ! -z "$reco_vad_dir" ]; then + if [ ! -f $reco_vad_dir/speech_labels.scp ]; then + echo "$0: Could not find file $reco_vad_dir/speech_labels.scp" + exit 1 + fi + + cat $reco_vad_dir/speech_labels.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "music" | \ + sort -k1,1 > ${corrupted_data_dir}/speech_labels.scp + + cat $reco_vad_dir/deriv_weights.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "music" | \ + sort -k1,1 > ${corrupted_data_dir}/deriv_weights.scp + fi +fi + +# music_dir is without speed perturbation +music_dir=exp/make_music_labels/${orig_corrupted_data_id} +music_data_dir=$music_dir/music_data + +mkdir -p $music_data_dir + +if [ $stage -le 10 ]; then + utils/data/get_reco2num_frames.sh --nj $reco_nj $orig_corrupted_data_dir + utils/split_data.sh --per-reco ${orig_corrupted_data_dir} $reco_nj + + cp $orig_corrupted_data_dir/wav.scp $music_data_dir + + # The first rspecifier is a dummy required to get the recording-id as key. + # It has no segments in it as they are all removed by --remove-labels. + $train_cmd JOB=1:$reco_nj $music_dir/log/get_music_seg.JOB.log \ + segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:${orig_corrupted_data_dir}/reco2num_frames \ + --additive-signals-segmentation-rspecifier="ark:segmentation-init-from-lengths ark:$music_utt2num_frames ark:- |" \ + "ark,t:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt $orig_corrupted_data_dir/additive_signals_info.txt |" \ + ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- \ + ark:- \| \ + segmentation-to-segments ark:- ark:$music_data_dir/utt2spk.JOB \ + $music_data_dir/segments.JOB + + utils/data/get_reco2utt.sh $corrupted_data_dir + for n in `seq $reco_nj`; do cat $music_data_dir/utt2spk.$n; done > $music_data_dir/utt2spk + for n in `seq $reco_nj`; do cat $music_data_dir/segments.$n; done > $music_data_dir/segments + + utils/fix_data_dir.sh $music_data_dir + + if $speed_perturb; then + utils/data/perturb_data_dir_speed_4way.sh $music_data_dir ${music_data_dir}_spr + mv ${music_data_dir}_spr/segments{,.temp} + cat ${music_data_dir}_spr/segments.temp | \ + utils/filter_scp.pl -f 2 ${corrupted_data_dir}/reco2utt > ${music_data_dir}_spr/segments + utils/fix_data_dir.sh ${music_data_dir}_spr + rm ${music_data_dir}_spr/segments.temp + fi +fi + +if $speed_perturb; then + music_data_dir=${music_data_dir}_spr +fi + +label_dir=music_labels + +mkdir -p $label_dir +label_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $label_dir ${PWD}` + +if [ $stage -le 11 ]; then + utils/split_data.sh --per-reco ${corrupted_data_dir} $reco_nj + # TODO: Don't assume that its whole data directory. + nj=$reco_nj + if [ $nj -gt 4 ]; then + nj=4 + fi + utils/data/get_utt2num_frames.sh --cmd "$train_cmd" --nj $nj ${corrupted_data_dir} + utils/data/get_reco2utt.sh $music_data_dir/ + + $train_cmd JOB=1:$reco_nj $music_dir/log/get_music_labels.JOB.log \ + segmentation-init-from-segments --shift-to-zero=false \ + "utils/filter_scp.pl -f 2 ${corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt ${music_data_dir}/segments |" ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + "ark,t:utils/filter_scp.pl ${corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt ${music_data_dir}/reco2utt |" \ + ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ + ark,scp:$label_dir/music_labels_${corrupted_data_id}.JOB.ark,$label_dir/music_labels_${corrupted_data_id}.JOB.scp +fi + +for n in `seq $reco_nj`; do + cat $label_dir/music_labels_${corrupted_data_id}.$n.scp +done | utils/filter_scp.pl ${corrupted_data_dir}/utt2spk > ${corrupted_data_dir}/music_labels.scp + +if [ $stage -le 12 ]; then + utils/split_data.sh --per-reco ${corrupted_data_dir} $reco_nj + + cat < $music_dir/speech_music_map +0 0 0 +0 1 3 +1 0 1 +1 1 2 +EOF + + $train_cmd JOB=1:$reco_nj $music_dir/log/get_speech_music_labels.JOB.log \ + intersect-int-vectors --mapping-in=$music_dir/speech_music_map --length-tolerance=2 \ + "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt ${corrupted_data_dir}/speech_labels.scp |" \ + "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt ${corrupted_data_dir}/music_labels.scp |" \ + ark,scp:$label_dir/speech_music_labels_${corrupted_data_id}.JOB.ark,$label_dir/speech_music_labels_${corrupted_data_id}.JOB.scp + + for n in `seq $reco_nj`; do + cat $label_dir/speech_music_labels_${corrupted_data_id}.$n.scp + done > $corrupted_data_dir/speech_music_labels.scp +fi + +exit 0 diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh new file mode 100755 index 00000000000..991bec96308 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh @@ -0,0 +1,209 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +stage=0 +corruption_stage=-10 +corrupt_only=false + +# Data options +data_dir=data/train_si284 # Excpecting non-whole data directory +num_data_reps=5 # Number of corrupted versions +snrs="20:10:15:5:0:-5" +foreground_snrs="20:10:15:5:0:-5" +background_snrs="20:10:15:5:0:-5" +overlap_snrs="5:2:1:0:-1:-2" +overlap_labels_dir=overlap_labels + +# Parallel options +nj=40 +cmd=queue.pl + +# Options for feature extraction +mfcc_config=conf/mfcc_hires_bp.conf +feat_suffix=hires_bp +energy_config=conf/log_energy.conf + +utt_vad_dir= + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +rvb_opts=() +# This is the config for the system using simulated RIRs and point-source noises +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") +rvb_opts+=(--speech-segments-set-parameters="$data_dir/wav.scp,$data_dir/segments") + +if [ $stage -le 0 ]; then + steps/segmentation/get_data_dir_with_segmented_wav.py \ + $data_dir ${data_dir}_seg +fi + +data_dir=${data_dir}_seg + +data_id=`basename ${data_dir}` + +corrupted_data_id=${data_id}_ovlp_corrupted +clean_data_id=${data_id}_ovlp_clean +noise_data_id=${data_id}_ovlp_noise + +utils/data/get_reco2dur.sh --cmd $cmd --nj 40 $data_dir + +if [ $stage -le 1 ]; then + python steps/data/make_corrupted_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="ovlp" \ + --overlap-snrs=$overlap_snrs \ + --speech-rvb-probability=1 \ + --overlapping-speech-addition-probability=1 \ + --num-replications=$num_data_reps \ + --min-overlapping-segments-per-minute=1 \ + --max-overlapping-segments-per-minute=1 \ + --output-additive-noise-dir=data/${noise_data_id} \ + --output-reverb-dir=data/${clean_data_id} \ + ${data_dir} data/${corrupted_data_id} +fi + +clean_data_dir=data/${clean_data_id} +corrupted_data_dir=data/${corrupted_data_id} +noise_data_dir=data/${noise_data_id} +orig_corrupted_data_dir=data/${corrupted_data_id} + +if false; then + if [ $stage -le 2 ]; then + for x in $clean_data_dir $corrupted_data_dir $noise_data_dir; do + utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp + done + fi + + corrupted_data_dir=${corrupted_data_dir}_sp + clean_data_dir=${clean_data_dir}_sp + noise_data_dir=${noise_data_dir}_sp + + corrupted_data_id=${corrupted_data_id}_sp + clean_data_id=${clean_data_id}_sp + noise_data_id=${noise_data_id}_sp +fi + +if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 ${corrupted_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 4 ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $nj \ + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir +else + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix +fi + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d log_energy/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/log_energy/storage log_energy/storage +fi + +if [ $stage -le 5 ]; then + utils/copy_data_dir.sh $clean_data_dir ${clean_data_dir}_log_energy + steps/make_mfcc.sh --mfcc-config conf/log_energy.conf \ + --cmd "$cmd" --nj $nj ${clean_data_dir}_log_energy \ + exp/make_log_energy/${clean_data_id} log_energy +fi + +if [ $stage -le 6 ]; then + utils/copy_data_dir.sh $noise_data_dir ${noise_data_dir}_log_energy + steps/make_mfcc.sh --mfcc-config conf/log_energy.conf \ + --cmd "$cmd" --nj $nj ${noise_data_dir}_log_energy \ + exp/make_log_energy/${noise_data_id} log_energy +fi + +targets_dir=log_snr +if [ $stage -le 7 ]; then + mkdir -p exp/make_log_snr/${corrupted_data_id} + + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $targets_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$targets_dir/storage $targets_dir/storage + fi + + # Get log-SNR targets + steps/segmentation/make_snr_targets.sh \ + --nj $nj --cmd "$cmd" \ + --target-type Snr --compress false \ + ${clean_data_dir}_log_energy ${noise_data_dir}_log_energy ${corrupted_data_dir} \ + exp/make_log_snr/${corrupted_data_id} $targets_dir +fi + +exit 0 + +if [ $stage -le 5 ]; then + # clean here is the reverberated first-speaker signal + utils/copy_data_dir.sh $clean_data_dir ${clean_data_dir}_$feat_suffix + clean_data_dir=${clean_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $nj \ + $clean_data_dir exp/make_${feat_suffix}/${clean_data_id} $mfccdir +else + clean_data_dir=${clean_data_dir}_$feat_suffix +fi + +if [ $stage -le 6 ]; then + # noise here is the reverberated second-speaker signal + utils/copy_data_dir.sh $noise_data_dir ${noise_data_dir}_$feat_suffix + noise_data_dir=${noise_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $nj \ + $noise_data_dir exp/make_${feat_suffix}/${noise_data_id} $mfccdir +else + noise_data_dir=${noise_data_dir}_$feat_suffix +fi + +targets_dir=irm_targets +if [ $stage -le 8 ]; then + mkdir -p exp/make_irm_targets/${corrupted_data_id} + + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $targets_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$targets_dir/storage $targets_dir/storage + fi + + # Get SNR targets only for the overlapped speech labels. + steps/segmentation/make_snr_targets.sh \ + --nj $nj --cmd "$cmd --max-jobs-run $max_jobs_run" \ + --target-type Irm --compress false --apply-exp true \ + --ali-rspecifier "ark,s,cs:cat ${corrupted_data_dir}/sad_seg.scp | segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames scp:- ark:- |" \ + overlapped_speech_labels.scp \ + --silence-phones 0 \ + ${clean_data_dir} ${noise_data_dir} ${corrupted_data_dir} \ + exp/make_irm_targets/${corrupted_data_id} $targets_dir +fi + +exit 0 diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh new file mode 100755 index 00000000000..19b4036c9aa --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh @@ -0,0 +1,236 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +stage=0 +corruption_stage=-10 +corrupt_only=false + +# Data options +data_dir=data/train_si284 # Expecting whole data directory. +speed_perturb=true +num_data_reps=5 # Number of corrupted versions +snrs="20:10:15:5:0:-5" +foreground_snrs="20:10:15:5:0:-5" +background_snrs="20:10:15:5:2:0:-2:-5" +base_rirs=simulated +speeds="0.9 1.0 1.1" +resample_data_dir=false + +# Parallel options +reco_nj=40 +cmd=queue.pl + +# Options for feature extraction +mfcc_config=conf/mfcc_hires_bp.conf +feat_suffix=hires_bp + +reco_vad_dir= # Output of prepare_unsad_data.sh. + # If provided, the speech labels and deriv weights will be + # copied into the output data directory. + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +data_id=`basename ${data_dir}` + +rvb_opts=() +if [ "$base_rirs" == "simulated" ]; then + # This is the config for the system using simulated RIRs and point-source noises + rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") + rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") + rvb_opts+=(--noise-set-parameters "0.1, RIRS_NOISES/pointsource_noises/background_noise_list") + rvb_opts+=(--noise-set-parameters "0.9, RIRS_NOISES/pointsource_noises/foreground_noise_list") +else + # This is the config for the JHU ASpIRE submission system + rvb_opts+=(--rir-set-parameters "1.0, RIRS_NOISES/real_rirs_isotropic_noises/rir_list") + rvb_opts+=(--noise-set-parameters RIRS_NOISES/real_rirs_isotropic_noises/noise_list) +fi + +if $resample_data_dir; then + sample_frequency=`cat $mfcc_config | perl -ne 'if (m/--sample-frequency=(\S+)/) { print $1; }'` + if [ -z "$sample_frequency" ]; then + sample_frequency=16000 + fi + + utils/data/resample_data_dir.sh $sample_frequency ${data_dir} || exit 1 + data_id=`basename ${data_dir}` + rvb_opts+=(--source-sampling-rate=$sample_frequency) +fi + +corrupted_data_id=${data_id}_corrupted +clean_data_id=${data_id}_clean +noise_data_id=${data_id}_noise + +if [ $stage -le 1 ]; then + python steps/data/reverberate_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="rev" \ + --foreground-snrs=$foreground_snrs \ + --background-snrs=$background_snrs \ + --speech-rvb-probability=1 \ + --pointsource-noise-addition-probability=1 \ + --isotropic-noise-addition-probability=1 \ + --num-replications=$num_data_reps \ + --max-noises-per-minute=2 \ + --output-additive-noise-dir=data/${noise_data_id} \ + --output-reverb-dir=data/${clean_data_id} \ + data/${data_id} data/${corrupted_data_id} +fi + +corrupted_data_dir=data/${corrupted_data_id} +clean_data_dir=data/${clean_data_id} +noise_data_dir=data/${noise_data_id} + +if $speed_perturb; then + if [ $stage -le 2 ]; then + ## Assuming whole data directories + for x in $corrupted_data_dir $clean_data_dir $noise_data_dir; do + cp $x/reco2dur $x/utt2dur + utils/data/perturb_data_dir_speed_random.sh --speeds "$speeds" $x ${x}_spr + done + fi + + corrupted_data_dir=${corrupted_data_dir}_spr + clean_data_dir=${clean_data_dir}_spr + noise_data_dir=${noise_data_dir}_spr + corrupted_data_id=${corrupted_data_id}_spr + clean_data_id=${clean_data_id}_spr + noise_data_id=${noise_data_id}_spr + + if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 \ + ${corrupted_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} + fi +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 4 ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $reco_nj \ + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir +else + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix +fi + +if [ $stage -le 5 ]; then + utils/copy_data_dir.sh $clean_data_dir ${clean_data_dir}_$feat_suffix + clean_data_dir=${clean_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $reco_nj \ + $clean_data_dir exp/make_${feat_suffix}/${clean_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $clean_data_dir exp/make_${feat_suffix}/${clean_data_id} $mfccdir +else + clean_data_dir=${clean_data_dir}_$feat_suffix +fi + +if [ $stage -le 6 ]; then + utils/copy_data_dir.sh $noise_data_dir ${noise_data_dir}_$feat_suffix + noise_data_dir=${noise_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $reco_nj \ + $noise_data_dir exp/make_${feat_suffix}/${noise_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $noise_data_dir exp/make_${feat_suffix}/${noise_data_id} $mfccdir +else + noise_data_dir=${noise_data_dir}_$feat_suffix +fi + +targets_dir=irm_targets +if [ $stage -le 7 ]; then + mkdir -p exp/make_log_snr/${corrupted_data_id} + + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $targets_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$targets_dir/storage $targets_dir/storage + fi + + idct_params=`cat $mfcc_config | perl -e ' + $num_mel_bins = 23; $num_ceps = 13; $cepstral_lifter = 22.0; + while (<>) { + chomp; + s/#.+//g; + if (m/^\s*$/) { next; } + if (m/--num-mel-bins=(\S+)/) { + $num_mel_bins = $1; + } elsif (m/--num-ceps=(\S+)/) { + $num_ceps = $1; + } elsif (m/--cepstral-lifter=(\S+)/) { + $cepstral_lifter = $1; + } + } + print "$num_mel_bins $num_ceps $cepstral_lifter";'` + + num_filters=`echo $idct_params | awk '{print $1}'` + num_ceps=`echo $idct_params | awk '{print $2}'` + cepstral_lifter=`echo $idct_params | awk '{print $3}'` + echo "$num_filters $num_ceps $cepstral_lifter" + + mkdir -p exp/make_irm_targets/$corrupted_data_id + utils/data/get_dct_matrix.py --get-idct-matrix=true \ + --num-filters=$num_filters --num-ceps=$num_ceps \ + --cepstral-lifter=$cepstral_lifter \ + exp/make_irm_targets/$corrupted_data_id/idct_matrix + + # Get log-SNR targets + steps/segmentation/make_snr_targets.sh \ + --nj $reco_nj --cmd "$cmd" \ + --target-type Irm --compress false \ + --transform-matrix exp/make_irm_targets/$corrupted_data_id/idct_matrix \ + ${clean_data_dir} ${noise_data_dir} ${corrupted_data_dir} \ + exp/make_irm_targets/${corrupted_data_id} $targets_dir +fi + + +if [ $stage -le 8 ]; then + if [ ! -z "$reco_vad_dir" ]; then + if [ ! -f $reco_vad_dir/speech_labels.scp ]; then + echo "$0: Could not find file $reco_vad_dir/speech_labels.scp" + exit 1 + fi + + cat $reco_vad_dir/speech_labels.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/speech_labels.scp + + cat $reco_vad_dir/deriv_weights.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/deriv_weights.scp + + cat $reco_vad_dir/deriv_weights_manual_seg.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/deriv_weights_for_irm_targets.scp + fi +fi + +exit 0 diff --git a/egs/aspire/s5/local/segmentation/do_corruption_whole_data_dir_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/do_corruption_whole_data_dir_overlapped_speech.sh new file mode 100755 index 00000000000..75dbce578b2 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_whole_data_dir_overlapped_speech.sh @@ -0,0 +1,284 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +stage=0 +corruption_stage=-10 +corrupt_only=false + +# Data options +data_dir=data/train_si284 # Excpecting non-whole data directory +speed_perturb=true +num_data_reps=5 # Number of corrupted versions +snrs="20:10:15:5:0:-5" +foreground_snrs="20:10:15:5:0:-5" +background_snrs="20:10:15:5:0:-5" +overlap_snrs="5:2:1:0:-1:-2" +# Whole-data directory corresponding to data_dir +whole_data_dir=data/train_si284_whole +overlap_labels_dir=overlap_labels + +# Parallel options +reco_nj=40 +nj=40 +cmd=queue.pl + +# Options for feature extraction +mfcc_config=conf/mfcc_hires_bp.conf +feat_suffix=hires_bp +energy_config=conf/log_energy.conf + +reco_vad_dir= # Output of prepare_unsad_data.sh. + # If provided, the speech labels and deriv weights will be + # copied into the output data directory. +utt_vad_dir= + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +rvb_opts=() +# This is the config for the system using simulated RIRs and point-source noises +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") +rvb_opts+=(--speech-segments-set-parameters="$data_dir/wav.scp,$data_dir/segments") + +whole_data_id=`basename ${whole_data_dir}` + +corrupted_data_id=${whole_data_id}_ovlp_corrupted +clean_data_id=${whole_data_id}_ovlp_clean +noise_data_id=${whole_data_id}_ovlp_noise + +if [ $stage -le 1 ]; then + python steps/data/make_corrupted_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="ovlp" \ + --overlap-snrs=$overlap_snrs \ + --speech-rvb-probability=1 \ + --overlapping-speech-addition-probability=1 \ + --num-replications=$num_data_reps \ + --min-overlapping-segments-per-minute=5 \ + --max-overlapping-segments-per-minute=20 \ + --output-additive-noise-dir=data/${noise_data_id} \ + --output-reverb-dir=data/${clean_data_id} \ + data/${whole_data_id} data/${corrupted_data_id} +fi + +if $dry_run; then + exit 0 +fi + +clean_data_dir=data/${clean_data_id} +corrupted_data_dir=data/${corrupted_data_id} +noise_data_dir=data/${noise_data_id} +orig_corrupted_data_dir=$corrupted_data_dir + +if $speed_perturb; then + if [ $stage -le 2 ]; then + ## Assuming whole data directories + for x in $clean_data_dir $corrupted_data_dir $noise_data_dir; do + cp $x/reco2dur $x/utt2dur + utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp + done + fi + + corrupted_data_dir=${corrupted_data_dir}_sp + clean_data_dir=${clean_data_dir}_sp + noise_data_dir=${noise_data_dir}_sp + + corrupted_data_id=${corrupted_data_id}_sp + clean_data_id=${clean_data_id}_sp + noise_data_id=${noise_data_id}_sp + + if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 --force true ${corrupted_data_dir} + utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} + utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} + fi +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 4 ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir +fi + +if [ $stage -le 5 ]; then + steps/make_mfcc.sh --mfcc-config $energy_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $clean_data_dir exp/make_log_energy/${clean_data_id} log_energy_feats +fi + +if [ $stage -le 6 ]; then + steps/make_mfcc.sh --mfcc-config $energy_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $noise_data_dir exp/make_log_energy/${noise_data_id} log_energy_feats +fi + +if [ -z "$reco_vad_dir" ]; then + echo "reco-vad-dir must be provided" + exit 1 +fi + +targets_dir=irm_targets +if [ $stage -le 8 ]; then + mkdir -p exp/make_irm_targets/${corrupted_data_id} + + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $targets_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$targets_dir/storage $targets_dir/storage + fi + + steps/segmentation/make_snr_targets.sh \ + --nj $nj --cmd "$train_cmd --max-jobs-run $max_jobs_run" \ + --target-type Irm --compress true --apply-exp false \ + ${clean_data_dir} ${noise_data_dir} ${corrupted_data_dir} \ + exp/make_irm_targets/${corrupted_data_id} $targets_dir +fi + +# Combine the VAD from the base recording and the VAD from the overlapping segments +# to create per-frame labels of the number of overlapping speech segments +# Unreliable segments are regions where no VAD labels were available for the +# overlapping segments. These can be later removed by setting deriv weights to 0. + +# Data dirs without speed perturbation +overlap_dir=exp/make_overlap_labels/${corrupted_data_id} +unreliable_dir=exp/make_overlap_labels/unreliable_${corrupted_data_id} +overlap_data_dir=$overlap_dir/overlap_data +unreliable_data_dir=$overlap_dir/unreliable_data + +mkdir -p $unreliable_dir + +if [ $stage -le 8 ]; then + cat $reco_vad_dir/sad_seg.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "ovlp" \ + | sort -k1,1 > ${corrupted_data_dir}/sad_seg.scp + utils/data/get_utt2num_frames.sh $corrupted_data_dir + utils/split_data.sh --per-reco ${orig_corrupted_data_dir} $reco_nj + + $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_overlap_seg.JOB.log \ + segmentation-init-from-overlap-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + "scp:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ + ark,t:$orig_corrupted_data_dir/overlapped_segments_info.txt \ + scp:$utt_vad_dir/sad_seg.scp ark:- ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \| \ + segmentation-copy --keep-label=1 ark:- ark:- \| \ + segmentation-get-stats --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark +fi + +if [ $stage -le 9 ]; then + mkdir -p $overlap_data_dir $unreliable_data_dir + cp $orig_corrupted_data_dir/wav.scp $overlap_data_dir + cp $orig_corrupted_data_dir/wav.scp $unreliable_data_dir + + # Create segments where there is definitely an overlap. + # Assume no more than 10 speakers overlap. + $train_cmd JOB=1:$reco_nj $overlap_dir/log/process_to_segments.JOB.log \ + segmentation-post-process --remove-labels=0:1 \ + ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-segments ark:- ark:$overlap_data_dir/utt2spk.JOB $overlap_data_dir/segments.JOB + + $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_unreliable_segments.JOB.log \ + segmentation-to-segments --single-speaker \ + ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \ + ark:$unreliable_data_dir/utt2spk.JOB $unreliable_data_dir/segments.JOB + + for n in `seq $reco_nj`; do cat $overlap_data_dir/utt2spk.$n; done > $overlap_data_dir/utt2spk + for n in `seq $reco_nj`; do cat $overlap_data_dir/segments.$n; done > $overlap_data_dir/segments + for n in `seq $reco_nj`; do cat $unreliable_data_dir/utt2spk.$n; done > $unreliable_data_dir/utt2spk + for n in `seq $reco_nj`; do cat $unreliable_data_dir/segments.$n; done > $unreliable_data_dir/segments + + utils/fix_data_dir.sh $overlap_data_dir + utils/fix_data_dir.sh $unreliable_data_dir + + if $speed_perturb; then + utils/data/perturb_data_dir_speed_3way.sh $overlap_data_dir ${overlap_data_dir}_sp + utils/data/perturb_data_dir_speed_3way.sh $unreliable_data_dir ${unreliable_data_dir}_sp + fi +fi + +if $speed_perturb; then + overlap_data_dir=${overlap_data_dir}_sp + unreliable_data_dir=${unreliable_data_dir}_sp +fi + +# make $overlap_labels_dir an absolute pathname. +overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` + +if [ $stage -le 10 ]; then + utils/split_data.sh --per-reco ${overlap_data_dir} $reco_nj + + $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_overlap_speech_labels.JOB.log \ + utils/data/get_reco2utt.sh ${overlap_data_dir}/split${reco_nj}reco/JOB '&&' \ + segmentation-init-from-segments --shift-to-zero=false \ + ${overlap_data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt \ + ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ + ark,scp:$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.scp +fi + +for n in `seq $reco_nj`; do + cat $overlap_labels_dir/overlapped_speech_${corrupted_data_id}.$n.scp +done > ${corrupted_data_dir}/overlapped_speech_labels.scp + +if [ $stage -le 11 ]; then + utils/data/get_reco2utt.sh ${unreliable_data_dir} + + # First convert the unreliable segments into a recording-level segmentation. + # Initialize a segmentation from utt2num_frames and set to 0, the regions + # of unreliable segments. At this stage deriv weights is 1 for all but the + # unreliable segment regions. + # Initialize a segmentation from the VAD labels and retain only the speech segments. + # Intersect this with the deriv weights segmentation from above. At this stage + # deriv weights is 1 for only the regions where base VAD label is 1 and + # the overlapping segment is not unreliable. Convert this to deriv weights. + $train_cmd JOB=1:$reco_nj $unreliable_dir/log/get_deriv_weights.JOB.log\ + segmentation-init-from-segments --shift-to-zero=false \ + "utils/filter_scp.pl -f 2 ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/segments |" ark:- \| \ + segmentation-combine-segments-to-recordings ark:- "ark,t:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/reco2utt |" \ + ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ + "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/utt2num_frames | segmentation-init-from-lengths ark,t:- ark:- |" \ + ark:- ark:- \| \ + segmentation-intersect-segments --mismatch-label=0 \ + "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/sad_seg.scp | segmentation-post-process --remove-labels=0:2:3 scp:- ark:- |" \ + ark:- ark:- \| \ + segmentation-post-process --remove-labels=0 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.ark,$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.scp + + for n in `seq $reco_nj`; do + cat $overlap_labels_dir/deriv_weights_for_overlapped_speech.${n}.scp + done > $corrupted_data_dir/deriv_weights_for_overlapped_speech.scp +fi + +exit 0 diff --git a/egs/aspire/s5/local/segmentation/make_musan_music.py b/egs/aspire/s5/local/segmentation/make_musan_music.py new file mode 100755 index 00000000000..5d13078de63 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/make_musan_music.py @@ -0,0 +1,69 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse +import os + + +def _get_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--use-vocals", type=str, default="false", + choices=["true", "false"], + help="If true, also add music with vocals in the " + "output music-set-parameters") + parser.add_argument("root_dir", type=str, + help="Root directory of MUSAN corpus") + parser.add_argument("music_list", type=argparse.FileType('w'), + help="Convert music list into noise-set-paramters " + "for steps/data/reverberate_data_dir.py") + + args = parser.parse_args() + + args.use_vocals = True if args.use_vocals == "true" else False + return args + + +def read_vocals(annotations): + vocals = {} + for line in open(annotations): + parts = line.strip().split() + if parts[2] == "Y": + vocals[parts[0]] = True + return vocals + + +def write_music(utt, file_path, music_list): + print ('{utt} {file_path}'.format( + utt=utt, file_path=file_path), file=music_list) + + +def prepare_music_set(root_dir, use_vocals, music_list): + vocals = {} + music_dir = os.path.join(root_dir, "music") + for root, dirs, files in os.walk(music_dir): + if os.path.exists(os.path.join(root, "ANNOTATIONS")): + vocals = read_vocals(os.path.join(root, "ANNOTATIONS")) + + for f in files: + file_path = os.path.join(root, f) + if f.endswith(".wav"): + utt = str(f).replace(".wav", "") + if not use_vocals and utt in vocals: + continue + write_music(utt, file_path, music_list) + music_list.close() + + +def main(): + args = _get_args() + + try: + prepare_music_set(args.root_dir, args.use_vocals, + args.music_list) + finally: + args.music_list.close() + + +if __name__ == '__main__': + main() diff --git a/egs/aspire/s5/local/segmentation/make_sad_tdnn_configs.py b/egs/aspire/s5/local/segmentation/make_sad_tdnn_configs.py new file mode 100755 index 00000000000..e859a3593ce --- /dev/null +++ b/egs/aspire/s5/local/segmentation/make_sad_tdnn_configs.py @@ -0,0 +1,616 @@ +#!/usr/bin/env python + +# we're using python 3.x style print but want it to work in python 2.x, +from __future__ import print_function +import os +import argparse +import shlex +import sys +import warnings +import copy +import imp +import ast + +nodes = imp.load_source('', 'steps/nnet3/components.py') +import libs.common as common_lib + +def GetArgs(): + # we add compulsary arguments as named arguments for readability + parser = argparse.ArgumentParser(description="Writes config files and variables " + "for TDNNs creation and training", + epilog="See steps/nnet3/tdnn/train.sh for example.") + + # Only one of these arguments can be specified, and one of them has to + # be compulsarily specified + feat_group = parser.add_mutually_exclusive_group(required = True) + feat_group.add_argument("--feat-dim", type=int, + help="Raw feature dimension, e.g. 13") + feat_group.add_argument("--feat-dir", type=str, + help="Feature directory, from which we derive the feat-dim") + + # only one of these arguments can be specified + ivector_group = parser.add_mutually_exclusive_group(required = False) + ivector_group.add_argument("--ivector-dim", type=int, + help="iVector dimension, e.g. 100", default=0) + ivector_group.add_argument("--ivector-dir", type=str, + help="iVector dir, which will be used to derive the ivector-dim ", default=None) + + num_target_group = parser.add_mutually_exclusive_group(required = True) + num_target_group.add_argument("--num-targets", type=int, + help="number of network targets (e.g. num-pdf-ids/num-leaves)") + num_target_group.add_argument("--ali-dir", type=str, + help="alignment directory, from which we derive the num-targets") + num_target_group.add_argument("--tree-dir", type=str, + help="directory with final.mdl, from which we derive the num-targets") + num_target_group.add_argument("--output-node-parameters", type=str, action='append', + dest='output_node_para_array', + help = "Define output nodes' and their parameters like output-suffix, dim, objective-type etc") + # CNN options + parser.add_argument('--cnn.layer', type=str, action='append', dest = "cnn_layer", + help="CNN parameters at each CNN layer, e.g. --filt-x-dim=3 --filt-y-dim=8 " + "--filt-x-step=1 --filt-y-step=1 --num-filters=256 --pool-x-size=1 --pool-y-size=3 " + "--pool-z-size=1 --pool-x-step=1 --pool-y-step=3 --pool-z-step=1, " + "when CNN layers are used, no LDA will be added", default = None) + parser.add_argument("--cnn.bottleneck-dim", type=int, dest = "cnn_bottleneck_dim", + help="Output dimension of the linear layer at the CNN output " + "for dimension reduction, e.g. 256." + "The default zero means this layer is not needed.", default=0) + + # General neural network options + parser.add_argument("--splice-indexes", type=str, required = True, + help="Splice indexes at each layer, e.g. '-3,-2,-1,0,1,2,3' " + "If CNN layers are used the first set of splice indexes will be used as input " + "to the first CNN layer and later splice indexes will be interpreted as indexes " + "for the TDNNs.") + parser.add_argument("--add-lda", type=str, action=common_lib.StrToBoolAction, + help="If \"true\" an LDA matrix computed from the input features " + "(spliced according to the first set of splice-indexes) will be used as " + "the first Affine layer. This affine layer's parameters are fixed during training. " + "This variable needs to be set to \"false\" when using dense-targets.\n" + "If --cnn.layer is specified this option will be forced to \"false\".", + default=True, choices = ["false", "true"]) + + parser.add_argument("--include-log-softmax", type=str, action=common_lib.StrToBoolAction, + help="add the final softmax layer ", default=True, choices = ["false", "true"]) + parser.add_argument("--add-final-sigmoid", type=str, action=common_lib.StrToBoolAction, + help="add a final sigmoid layer as alternate to log-softmax-layer. " + "Can only be used if include-log-softmax is false. " + "This is useful in cases where you want the output to be " + "like probabilities between 0 and 1. Typically the nnet " + "is trained with an objective such as quadratic", + default=False, choices = ["false", "true"]) + + parser.add_argument("--objective-type", type=str, + help = "the type of objective; i.e. quadratic or linear", + default="linear", choices = ["linear", "quadratic"]) + parser.add_argument("--xent-regularize", type=float, + help="For chain models, if nonzero, add a separate output for cross-entropy " + "regularization (with learning-rate-factor equal to the inverse of this)", + default=0.0) + parser.add_argument("--final-layer-normalize-target", type=float, + help="RMS target for final layer (set to <1 if final layer learns too fast", + default=1.0) + parser.add_argument("--subset-dim", type=int, default=0, + help="dimension of the subset of units to be sent to the central frame") + parser.add_argument("--pnorm-input-dim", type=int, + help="input dimension to p-norm nonlinearities") + parser.add_argument("--pnorm-output-dim", type=int, + help="output dimension of p-norm nonlinearities") + relu_dim_group = parser.add_mutually_exclusive_group(required = False) + relu_dim_group.add_argument("--relu-dim", type=int, + help="dimension of all ReLU nonlinearity layers") + relu_dim_group.add_argument("--relu-dim-final", type=int, + help="dimension of the last ReLU nonlinearity layer. Dimensions increase geometrically from the first through the last ReLU layer.", default=None) + parser.add_argument("--relu-dim-init", type=int, + help="dimension of the first ReLU nonlinearity layer. Dimensions increase geometrically from the first through the last ReLU layer.", default=None) + + parser.add_argument("--self-repair-scale-nonlinearity", type=float, + help="A non-zero value activates the self-repair mechanism in the sigmoid and tanh non-linearities of the LSTM", default=None) + + + parser.add_argument("--use-presoftmax-prior-scale", type=str, action=common_lib.StrToBoolAction, + help="if true, a presoftmax-prior-scale is added", + choices=['true', 'false'], default = True) + + # Options to convert input MFCC into Fbank features. This is useful when a + # LDA layer is not added (such as when using dense targets) + parser.add_argument("--cnn.cepstral-lifter", type=float, dest = "cepstral_lifter", + help="The factor used for determining the liftering vector in the production of MFCC. " + "User has to ensure that it matches the lifter used in MFCC generation, " + "e.g. 22.0", default=22.0) + + parser.add_argument("config_dir", + help="Directory to write config files and variables") + + print(' '.join(sys.argv)) + + args = parser.parse_args() + args = CheckArgs(args) + + return args + +def CheckArgs(args): + if not os.path.exists(args.config_dir): + os.makedirs(args.config_dir) + + ## Check arguments. + if args.feat_dir is not None: + args.feat_dim = common_lib.get_feat_dim(args.feat_dir) + + if args.ivector_dir is not None: + args.ivector_dim = common_lib.get_ivector_dim(args.ivector_dir) + + if not args.feat_dim > 0: + raise Exception("feat-dim has to be postive") + + if len(args.output_node_para_array) == 0: + if args.ali_dir is not None: + args.num_targets = common_lib.get_number_of_leaves_from_tree(args.ali_dir) + elif args.tree_dir is not None: + args.num_targets = common_lib.get_number_of_leaves_from_tree(args.tree_dir) + if not args.num_targets > 0: + print(args.num_targets) + raise Exception("num_targets has to be positive") + args.output_node_para_array.append( + "--dim={0} --objective-type={1} --include-log-softmax={2} --add-final-sigmoid={3} --xent-regularize={4}".format( + args.num_targets, args.objective_type, + "true" if args.include_log_softmax else "false", + "true" if args.add_final_sigmoid else "false", + args.xent_regularize)) + + if not args.ivector_dim >= 0: + raise Exception("ivector-dim has to be non-negative") + + if (args.subset_dim < 0): + raise Exception("--subset-dim has to be non-negative") + + if not args.relu_dim is None: + if not args.pnorm_input_dim is None or not args.pnorm_output_dim is None or not args.relu_dim_init is None: + raise Exception("--relu-dim argument not compatible with " + "--pnorm-input-dim or --pnorm-output-dim or --relu-dim-init options"); + args.nonlin_input_dim = args.relu_dim + args.nonlin_output_dim = args.relu_dim + args.nonlin_output_dim_final = None + args.nonlin_output_dim_init = None + args.nonlin_type = 'relu' + + elif not args.relu_dim_final is None: + if not args.pnorm_input_dim is None or not args.pnorm_output_dim is None: + raise Exception("--relu-dim-final argument not compatible with " + "--pnorm-input-dim or --pnorm-output-dim options") + if args.relu_dim_init is None: + raise Exception("--relu-dim-init argument should also be provided with --relu-dim-final") + if args.relu_dim_init > args.relu_dim_final: + raise Exception("--relu-dim-init has to be no larger than --relu-dim-final") + args.nonlin_input_dim = None + args.nonlin_output_dim = None + args.nonlin_output_dim_final = args.relu_dim_final + args.nonlin_output_dim_init = args.relu_dim_init + args.nonlin_type = 'relu' + + else: + if not args.relu_dim_init is None: + raise Exception("--relu-dim-final argument not compatible with " + "--pnorm-input-dim or --pnorm-output-dim options") + if not args.pnorm_input_dim > 0 or not args.pnorm_output_dim > 0: + raise Exception("--relu-dim not set, so expected --pnorm-input-dim and " + "--pnorm-output-dim to be provided."); + args.nonlin_input_dim = args.pnorm_input_dim + args.nonlin_output_dim = args.pnorm_output_dim + if (args.nonlin_input_dim < args.nonlin_output_dim) or (args.nonlin_input_dim % args.nonlin_output_dim != 0): + raise Exception("Invalid --pnorm-input-dim {0} and --pnorm-output-dim {1}".format(args.nonlin_input_dim, args.nonlin_output_dim)) + args.nonlin_output_dim_final = None + args.nonlin_output_dim_init = None + args.nonlin_type = 'pnorm' + + if args.add_lda and args.cnn_layer is not None: + args.add_lda = False + warnings.warn("--add-lda is set to false as CNN layers are used.") + + return args + +def AddConvMaxpLayer(config_lines, name, input, args): + if '3d-dim' not in input: + raise Exception("The input to AddConvMaxpLayer() needs '3d-dim' parameters.") + + input = nodes.AddConvolutionLayer(config_lines, name, input, + input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2], + args.filt_x_dim, args.filt_y_dim, + args.filt_x_step, args.filt_y_step, + args.num_filters, input['vectorization']) + + if args.pool_x_size > 1 or args.pool_y_size > 1 or args.pool_z_size > 1: + input = nodes.AddMaxpoolingLayer(config_lines, name, input, + input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2], + args.pool_x_size, args.pool_y_size, args.pool_z_size, + args.pool_x_step, args.pool_y_step, args.pool_z_step) + + return input + +# The ivectors are processed through an affine layer parallel to the CNN layers, +# then concatenated with the CNN output and passed to the deeper part of the network. +def AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, feat_dim, splice_indexes=[0], ivector_dim=0): + cnn_args = ParseCnnString(cnn_layer) + num_cnn_layers = len(cnn_args) + # We use an Idct layer here to convert MFCC to FBANK features + common_lib.write_idct_matrix(feat_dim, cepstral_lifter, config_dir.strip() + "/idct.mat") + prev_layer_output = {'descriptor': "input", + 'dimension': feat_dim} + prev_layer_output = nodes.AddFixedAffineLayer(config_lines, "Idct", prev_layer_output, config_dir.strip() + '/idct.mat') + + list = [('Offset({0}, {1})'.format(prev_layer_output['descriptor'],n) if n != 0 else prev_layer_output['descriptor']) for n in splice_indexes] + splice_descriptor = "Append({0})".format(", ".join(list)) + cnn_input_dim = len(splice_indexes) * feat_dim + prev_layer_output = {'descriptor': splice_descriptor, + 'dimension': cnn_input_dim, + '3d-dim': [len(splice_indexes), feat_dim, 1], + 'vectorization': 'yzx'} + + for cl in range(0, num_cnn_layers): + prev_layer_output = AddConvMaxpLayer(config_lines, "L{0}".format(cl), prev_layer_output, cnn_args[cl]) + + if cnn_bottleneck_dim > 0: + prev_layer_output = nodes.AddAffineLayer(config_lines, "cnn-bottleneck", prev_layer_output, cnn_bottleneck_dim, "") + + if ivector_dim > 0: + iv_layer_output = {'descriptor': 'ReplaceIndex(ivector, t, 0)', + 'dimension': ivector_dim} + iv_layer_output = nodes.AddAffineLayer(config_lines, "ivector", iv_layer_output, ivector_dim, "") + prev_layer_output['descriptor'] = 'Append({0}, {1})'.format(prev_layer_output['descriptor'], iv_layer_output['descriptor']) + prev_layer_output['dimension'] = prev_layer_output['dimension'] + iv_layer_output['dimension'] + + return prev_layer_output + +def PrintConfig(file_name, config_lines): + f = open(file_name, 'w') + f.write("\n".join(config_lines['components'])+"\n") + f.write("\n#Component nodes\n") + f.write("\n".join(config_lines['component-nodes'])+"\n") + f.close() + +def ParseCnnString(cnn_param_string_list): + cnn_parser = argparse.ArgumentParser(description="cnn argument parser") + + cnn_parser.add_argument("--filt-x-dim", required=True, type=int) + cnn_parser.add_argument("--filt-y-dim", required=True, type=int) + cnn_parser.add_argument("--filt-x-step", type=int, default = 1) + cnn_parser.add_argument("--filt-y-step", type=int, default = 1) + cnn_parser.add_argument("--num-filters", required=True, type=int) + cnn_parser.add_argument("--pool-x-size", type=int, default = 1) + cnn_parser.add_argument("--pool-y-size", type=int, default = 1) + cnn_parser.add_argument("--pool-z-size", type=int, default = 1) + cnn_parser.add_argument("--pool-x-step", type=int, default = 1) + cnn_parser.add_argument("--pool-y-step", type=int, default = 1) + cnn_parser.add_argument("--pool-z-step", type=int, default = 1) + + cnn_args = [] + for cl in range(0, len(cnn_param_string_list)): + cnn_args.append(cnn_parser.parse_args(shlex.split(cnn_param_string_list[cl]))) + + return cnn_args + +def ParseSpliceString(splice_indexes): + splice_array = [] + left_context = 0 + right_context = 0 + split_on_spaces = splice_indexes.split(); # we already checked the string is nonempty. + if len(split_on_spaces) < 1: + raise Exception("invalid splice-indexes argument, too short: " + + splice_indexes) + try: + for string in split_on_spaces: + this_splices = string.split(",") + if len(this_splices) < 1: + raise Exception("invalid splice-indexes argument, too-short element: " + + splice_indexes) + # the rest of this block updates left_context and right_context, and + # does some checking. + leftmost_splice = 10000 + rightmost_splice = -10000 + + int_list = [] + for s in this_splices: + try: + n = int(s) + if n < leftmost_splice: + leftmost_splice = n + if n > rightmost_splice: + rightmost_splice = n + int_list.append(n) + except ValueError: + #if len(splice_array) == 0: + # raise Exception("First dimension of splicing array must not have averaging [yet]") + try: + x = nodes.StatisticsConfig(s, { 'dimension':100, + 'descriptor': 'foo'} ) + int_list.append(s) + except Exception as e: + raise Exception("The following element of the splicing array is not a valid specifier " + "of statistics: {0}\nGot {1}".format(s, str(e))) + splice_array.append(int_list) + + if leftmost_splice == 10000 or rightmost_splice == -10000: + raise Exception("invalid element of --splice-indexes: " + string) + left_context += -leftmost_splice + right_context += rightmost_splice + except ValueError as e: + raise Exception("invalid --splice-indexes argument " + args.splice_indexes + " " + str(e)) + + left_context = max(0, left_context) + right_context = max(0, right_context) + + return {'left_context':left_context, + 'right_context':right_context, + 'splice_indexes':splice_array, + 'num_hidden_layers':len(splice_array) + } + +def AddPriorsAccumulator(config_lines, name, input): + components = config_lines['components'] + component_nodes = config_lines['component-nodes'] + + components.append("component name={0}_softmax type=SoftmaxComponent dim={1}".format(name, input['dimension'])) + component_nodes.append("component-node name={0}_softmax component={0}_softmax input={1}".format(name, input['descriptor'])) + + return {'descriptor': '{0}_softmax'.format(name), + 'dimension': input['dimension']} + +def AddFinalLayer(config_lines, input, output_dim, + ng_affine_options = " param-stddev=0 bias-stddev=0 ", + label_delay=None, + use_presoftmax_prior_scale = False, + prior_scale_file = None, + include_log_softmax = True, + add_final_sigmoid = False, + name_affix = None, + objective_type = "linear", + objective_scale = 1.0, + objective_scales_vec = None): + components = config_lines['components'] + component_nodes = config_lines['component-nodes'] + + if name_affix is not None: + final_node_prefix = 'Final-' + str(name_affix) + else: + final_node_prefix = 'Final' + + prev_layer_output = nodes.AddAffineLayer(config_lines, + final_node_prefix , input, output_dim, + ng_affine_options) + if include_log_softmax: + if use_presoftmax_prior_scale : + components.append('component name={0}-fixed-scale type=FixedScaleComponent scales={1}'.format(final_node_prefix, prior_scale_file)) + component_nodes.append('component-node name={0}-fixed-scale component={0}-fixed-scale input={1}'.format(final_node_prefix, + prev_layer_output['descriptor'])) + prev_layer_output['descriptor'] = "{0}-fixed-scale".format(final_node_prefix) + prev_layer_output = nodes.AddSoftmaxLayer(config_lines, final_node_prefix, prev_layer_output) + + elif add_final_sigmoid: + # Useful when you need the final outputs to be probabilities + # between 0 and 1. + # Usually used with an objective-type such as "quadratic" + prev_layer_output = nodes.AddSigmoidLayer(config_lines, final_node_prefix, prev_layer_output) + + # we use the same name_affix as a prefix in for affine/scale nodes but as a + # suffix for output node + if (objective_scale != 1.0 or objective_scales_vec is not None): + prev_layer_output = nodes.AddGradientScaleLayer(config_lines, final_node_prefix, prev_layer_output, objective_scale, objective_scales_vec) + + nodes.AddOutputLayer(config_lines, prev_layer_output, label_delay, suffix = name_affix, objective_type = objective_type) + +def AddOutputLayers(config_lines, prev_layer_output, output_nodes, + ng_affine_options = "", label_delay = 0): + + for o in output_nodes: + # make the intermediate config file for layerwise discriminative + # training + AddFinalLayer(config_lines, prev_layer_output, o.dim, + ng_affine_options, label_delay = label_delay, + include_log_softmax = o.include_log_softmax, + add_final_sigmoid = o.add_final_sigmoid, + objective_type = o.objective_type, + name_affix = o.output_suffix) + + if o.xent_regularize != 0.0: + nodes.AddFinalLayer(config_lines, prev_layer_output, o.dim, + include_log_softmax = True, + label_delay = label_delay, + name_affix = o.output_suffix + '_xent') + +# The function signature of MakeConfigs is changed frequently as it is intended for local use in this script. +def MakeConfigs(config_dir, splice_indexes_string, + cnn_layer, cnn_bottleneck_dim, cepstral_lifter, + feat_dim, ivector_dim, add_lda, + nonlin_type, nonlin_input_dim, nonlin_output_dim, subset_dim, + nonlin_output_dim_init, nonlin_output_dim_final, + use_presoftmax_prior_scale, final_layer_normalize_target, + output_nodes, self_repair_scale): + + parsed_splice_output = ParseSpliceString(splice_indexes_string.strip()) + + left_context = parsed_splice_output['left_context'] + right_context = parsed_splice_output['right_context'] + num_hidden_layers = parsed_splice_output['num_hidden_layers'] + splice_indexes = parsed_splice_output['splice_indexes'] + input_dim = len(parsed_splice_output['splice_indexes'][0]) + feat_dim + ivector_dim + + prior_scale_file = '{0}/presoftmax_prior_scale.vec'.format(config_dir) + + config_lines = {'components':[], 'component-nodes':[]} + + config_files={} + prev_layer_output = nodes.AddInputLayer(config_lines, feat_dim, splice_indexes[0], + ivector_dim) + + # Add the init config lines for estimating the preconditioning matrices + init_config_lines = copy.deepcopy(config_lines) + init_config_lines['components'].insert(0, '# Config file for initializing neural network prior to') + init_config_lines['components'].insert(0, '# preconditioning matrix computation') + + for o in output_nodes: + nodes.AddOutputLayer(init_config_lines, prev_layer_output, + objective_type = o.objective_type, suffix = o.output_suffix) + + config_files[config_dir + '/init.config'] = init_config_lines + + if cnn_layer is not None: + prev_layer_output = AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, + feat_dim, splice_indexes[0], ivector_dim) + + # add_lda needs to be set "false" when using dense targets, + # or if the task is not a simple classification task + # (e.g. regression, multi-task) + if add_lda: + prev_layer_output = nodes.AddLdaLayer(config_lines, "L0", prev_layer_output, config_dir + '/lda.mat') + + left_context = 0 + right_context = 0 + # we moved the first splice layer to before the LDA.. + # so the input to the first affine layer is going to [0] index + splice_indexes[0] = [0] + + if not nonlin_output_dim is None: + nonlin_output_dims = [nonlin_output_dim] * num_hidden_layers + elif nonlin_output_dim_init < nonlin_output_dim_final and num_hidden_layers == 1: + raise Exception("num-hidden-layers has to be greater than 1 if relu-dim-init and relu-dim-final is different.") + else: + # computes relu-dim for each hidden layer. They increase geometrically across layers + factor = pow(float(nonlin_output_dim_final) / nonlin_output_dim_init, 1.0 / (num_hidden_layers - 1)) if num_hidden_layers > 1 else 1 + nonlin_output_dims = [int(round(nonlin_output_dim_init * pow(factor, i))) for i in range(0, num_hidden_layers)] + assert(nonlin_output_dims[-1] >= nonlin_output_dim_final - 1 and nonlin_output_dims[-1] <= nonlin_output_dim_final + 1) # due to rounding error + nonlin_output_dims[-1] = nonlin_output_dim_final # It ensures that the dim of the last hidden layer is exactly the same as what is specified + + for i in range(0, num_hidden_layers): + # make the intermediate config file for layerwise discriminative training + + # prepare the spliced input + if not (len(splice_indexes[i]) == 1 and splice_indexes[i][0] == 0): + try: + zero_index = splice_indexes[i].index(0) + except ValueError: + zero_index = None + # I just assume the prev_layer_output_descriptor is a simple forwarding descriptor + prev_layer_output_descriptor = prev_layer_output['descriptor'] + subset_output = prev_layer_output + if subset_dim > 0: + # if subset_dim is specified the script expects a zero in the splice indexes + assert(zero_index is not None) + subset_node_config = ("dim-range-node name=Tdnn_input_{0} " + "input-node={1} dim-offset={2} dim={3}".format( + i, prev_layer_output_descriptor, 0, subset_dim)) + subset_output = {'descriptor' : 'Tdnn_input_{0}'.format(i), + 'dimension' : subset_dim} + config_lines['component-nodes'].append(subset_node_config) + appended_descriptors = [] + appended_dimension = 0 + for j in range(len(splice_indexes[i])): + if j == zero_index: + appended_descriptors.append(prev_layer_output['descriptor']) + appended_dimension += prev_layer_output['dimension'] + continue + try: + offset = int(splice_indexes[i][j]) + # it's an integer offset. + appended_descriptors.append('Offset({0}, {1})'.format( + subset_output['descriptor'], splice_indexes[i][j])) + appended_dimension += subset_output['dimension'] + except ValueError: + # it's not an integer offset, so assume it specifies the + # statistics-extraction. + stats = nodes.StatisticsConfig(splice_indexes[i][j], prev_layer_output) + stats_layer = stats.AddLayer(config_lines, "Tdnn_stats_{0}".format(i)) + appended_descriptors.append(stats_layer['descriptor']) + appended_dimension += stats_layer['dimension'] + + prev_layer_output = {'descriptor' : "Append({0})".format(" , ".join(appended_descriptors)), + 'dimension' : appended_dimension} + else: + # this is a normal affine node + pass + + if nonlin_type == "relu": + prev_layer_output = nodes.AddAffRelNormLayer(config_lines, "Tdnn_{0}".format(i), + prev_layer_output, nonlin_output_dims[i], + self_repair_scale=self_repair_scale, + norm_target_rms=1.0 if i < num_hidden_layers -1 else final_layer_normalize_target) + elif nonlin_type == "pnorm": + prev_layer_output = nodes.AddAffPnormLayer(config_lines, "Tdnn_{0}".format(i), + prev_layer_output, nonlin_input_dim, nonlin_output_dim, + norm_target_rms=1.0 if i < num_hidden_layers -1 else final_layer_normalize_target) + else: + raise Exception("Unknown nonlinearity type") + # a final layer is added after each new layer as we are generating + # configs for layer-wise discriminative training + + AddOutputLayers(config_lines, prev_layer_output, output_nodes) + + config_files['{0}/layer{1}.config'.format(config_dir, i + 1)] = config_lines + config_lines = {'components':[], 'component-nodes':[]} + + left_context += int(parsed_splice_output['left_context']) + right_context += int(parsed_splice_output['right_context']) + + # write the files used by other scripts like steps/nnet3/get_egs.sh + f = open(config_dir + "/vars", "w") + print('model_left_context=' + str(left_context), file=f) + print('model_right_context=' + str(right_context), file=f) + print('num_hidden_layers=' + str(num_hidden_layers), file=f) + print('add_lda=' + ('true' if add_lda else 'false'), file=f) + f.close() + + # printing out the configs + # init.config used to train lda-mllt train + for key in config_files.keys(): + PrintConfig(key, config_files[key]) + +def ParseOutputNodesParameters(para_array): + output_parser = argparse.ArgumentParser() + output_parser.add_argument('--output-suffix', type=str, action=common_lib.NullstrToNoneAction, + help = "Name of the output node. e.g. output-xent") + output_parser.add_argument('--dim', type=int, required=True, + help = "Dimension of the output node") + output_parser.add_argument("--include-log-softmax", type=str, action=common_lib.StrToBoolAction, + help="add the final softmax layer ", + default=True, choices = ["false", "true"]) + output_parser.add_argument("--add-final-sigmoid", type=str, action=common_lib.StrToBoolAction, + help="add a sigmoid layer as the final layer. Applicable only if skip-final-softmax is true.", + choices=['true', 'false'], default = False) + output_parser.add_argument("--objective-type", type=str, default="linear", + choices = ["linear", "quadratic","xent-per-dim"], + help = "the type of objective; i.e. quadratic or linear") + output_parser.add_argument("--xent-regularize", type=float, + help="For chain models, if nonzero, add a separate output for cross-entropy " + "regularization (with learning-rate-factor equal to the inverse of this)", + default=0.0) + + output_nodes = [ output_parser.parse_args(shlex.split(x)) for x in para_array ] + + return output_nodes + +def Main(): + args = GetArgs() + + output_nodes = ParseOutputNodesParameters(args.output_node_para_array) + + MakeConfigs(config_dir = args.config_dir, + feat_dim = args.feat_dim, ivector_dim = args.ivector_dim, + add_lda = args.add_lda, + cepstral_lifter = args.cepstral_lifter, + splice_indexes_string = args.splice_indexes, + cnn_layer = args.cnn_layer, + cnn_bottleneck_dim = args.cnn_bottleneck_dim, + nonlin_type = args.nonlin_type, + nonlin_input_dim = args.nonlin_input_dim, + nonlin_output_dim = args.nonlin_output_dim, + subset_dim = args.subset_dim, + nonlin_output_dim_init = args.nonlin_output_dim_init, + nonlin_output_dim_final = args.nonlin_output_dim_final, + use_presoftmax_prior_scale = args.use_presoftmax_prior_scale, + final_layer_normalize_target = args.final_layer_normalize_target, + output_nodes = output_nodes, + self_repair_scale = args.self_repair_scale_nonlinearity) + +if __name__ == "__main__": + Main() + + diff --git a/egs/aspire/s5/local/segmentation/prepare_ami.sh b/egs/aspire/s5/local/segmentation/prepare_ami.sh new file mode 100755 index 00000000000..7147a3004cb --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_ami.sh @@ -0,0 +1,223 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +. cmd.sh +. path.sh + +set -e +set -o pipefail +set -u + +stage=-1 + +dataset=dev +nj=18 + +. utils/parse_options.sh + +export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH + +src_dir=/export/a09/vmanoha1/workspace_asr_diarization/egs/ami/s5b # AMI src_dir +dir=exp/sad_ami_sdm1_${dataset}/ref + +mkdir -p $dir + +# Expecting user to have done run.sh to run the AMI recipe in $src_dir for +# both sdm and ihm microphone conditions + +if [ $stage -le 1 ]; then + ( + cd $src_dir + local/prepare_parallel_train_data.sh --train-set ${dataset} sdm1 + + awk '{print $1" "$2}' $src_dir/data/ihm/${dataset}/segments > \ + $src_dir/data/ihm/${dataset}/utt2reco + awk '{print $1" "$2}' $src_dir/data/sdm1/${dataset}/segments > \ + $src_dir/data/sdm1/${dataset}/utt2reco + + cat $src_dir/data/sdm1/${dataset}_ihmdata/ihmutt2utt | \ + utils/filter_scp.pl -f 1 $src_dir/data/ihm/${dataset}/utt2reco | \ + utils/apply_map.pl -f 1 $src_dir/data/ihm/${dataset}/utt2reco | \ + utils/filter_scp.pl -f 2 $src_dir/data/sdm1/${dataset}/utt2reco | \ + utils/apply_map.pl -f 2 $src_dir/data/sdm1/${dataset}/utt2reco | \ + sort -u > $src_dir/data/sdm1/${dataset}_ihmdata/ihm2sdm_reco + ) +fi + +[ ! -s $src_dir/data/sdm1/${dataset}_ihmdata/ihm2sdm_reco ] && echo "Empty $src_dir/data/sdm1/${dataset}_ihmdata/ihm2sdm_reco!" && exit 1 + +phone_map=$dir/phone_map +if [ $stage -le 2 ]; then + ( + cd $src_dir + utils/data/get_reco2utt.sh $src_dir/data/sdm1/${dataset} + + steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" \ + data/sdm1/${dataset}_ihmdata exp/sdm1/make_mfcc mfcc_sdm1 + steps/compute_cmvn_stats.sh \ + data/sdm1/${dataset}_ihmdata exp/sdm1/make_mfcc mfcc_sdm1 + utils/fix_data_dir.sh data/sdm1/${dataset}_ihmdata + ) + + steps/segmentation/get_sad_map.py \ + $src_dir/data/lang | utils/sym2int.pl -f 1 $src_dir/data/lang/phones.txt > \ + $phone_map +fi + +if [ $stage -le 3 ]; then + # Expecting user to have run local/run_cleanup_segmentation.sh in $src_dir + ( + cd $src_dir + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + data/sdm1/${dataset}_ihmdata data/lang \ + exp/ihm/tri3_cleaned \ + exp/sdm1/tri3_cleaned_${dataset}_ihmdata + ) +fi + +if [ $stage -le 4 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$train_cmd" \ + $src_dir/exp/sdm1/tri3_cleaned_${dataset}_ihmdata $phone_map $dir +fi + +echo "A 1" > $dir/channel_map +cat $src_dir/data/sdm1/${dataset}/reco2file_and_channel | \ + utils/apply_map.pl -f 3 $dir/channel_map > $dir/reco2file_and_channel + +utils/data/get_reco2utt.sh $src_dir/data/sdm1/${dataset}_ihmdata +cat $src_dir/data/sdm1/${dataset}_ihmdata/reco2utt | \ + awk 'BEGIN{i=1} {print $1" "i; i++;}' > \ + $src_dir/data/sdm1/${dataset}_ihmdata/reco.txt + +if [ $stage -le 5 ]; then + # Reference RTTM where SPEECH frames are obtainted by combining IHM VAD alignments + cat $src_dir/data/sdm1/${dataset}_ihmdata/reco.txt | \ + awk '{print $1" 1:"$2" 10000:10000 0:0"}' > $dir/ref_spk2label_map + + $train_cmd $dir/log/get_ref_spk_seg.log \ + segmentation-combine-segments --include-missing-utt-level-segmentations scp:$dir/sad_seg.scp \ + "ark:segmentation-init-from-segments --segment-label=10000 --shift-to-zero=false $src_dir/data/sdm1/${dataset}_ihmdata/segments ark:- |" \ + ark,t:$src_dir/data/sdm1/${dataset}_ihmdata/reco2utt ark:- \| \ + segmentation-copy --utt2label-map-rspecifier=ark,t:$dir/ref_spk2label_map \ + ark:- ark:- \| \ + segmentation-merge-recordings \ + "ark,t:utils/utt2spk_to_spk2utt.pl $src_dir/data/sdm1/${dataset}_ihmdata/ihm2sdm_reco |" \ + ark:- "ark:| gzip -c > $dir/ref_spk_seg.gz" +fi + +if [ $stage -le 6 ]; then + utils/data/get_reco2num_frames.sh --frame-shift 0.01 --frame-overlap 0.015 \ + --cmd queue.pl --nj $nj \ + $src_dir/data/sdm1/${dataset} + + ## Get a filter that selects only regions within the manual segments. + #$train_cmd $dir/log/get_manual_segments_regions.log \ + # segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/${dataset}/segments ark:- \| \ + # segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/${dataset}/reco2utt ark:- \| \ + # segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + # "ark:segmentation-init-from-lengths --label=0 ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames ark:- |" ark:- ark,t:- \| \ + # perl -ane '$F[3] = 10000; $F[$#F-1] = 10000; print join(" ", @F) . "\n";' \| \ + # segmentation-create-subsegments --filter-label=10000 --subsegment-label=10000 \ + # ark,t:- "ark:gunzip -c $dir/ref_spk_seg.gz |" ark:- \| \ + # segmentation-post-process --merge-labels=0:1 --merge-dst-label=1 ark:- ark:- \| \ + # segmentation-post-process --merge-labels=10000 --merge-dst-label=0 --merge-adjacent-segments \ + # --max-intersegment-length=10000 ark,t:- \ + # "ark:| gzip -c > $dir/manual_segments_regions.seg.gz" +fi + +if [ $stage -le 7 ]; then + $train_cmd $dir/log/get_overlap_sad_seg.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz |" \ + ark:/dev/null ark:/dev/null ark:- \| \ + classes-per-frame-to-labels --junk-label=10000 ark:- ark:- \| \ + segmentation-init-from-ali ark:- \ + "ark:| gzip -c > $dir/overlap_sad_seg.gz" +fi + +if [ $stage -le 8 ]; then + # To get the actual RTTM, we need to add no-score + $train_cmd $dir/log/get_ref_rttm.log \ + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-post-process --merge-labels=1:2 --merge-dst-label=1 \ + ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- $dir/ref.rttm + + # Get RTTM for overlapped speech detection with 3 classes + # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP + $train_cmd $dir/log/get_ref_rttm.log \ + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 --map-to-speech-and-sil=false ark:- $dir/overlapping_speech_ref.rttm +fi + + +#if [ $stage -le 8 ]; then +# # Get RTTM for overlapped speech detection with 3 classes +# # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP +# $train_cmd $dir/log/get_overlapping_rttm.log \ +# segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ +# "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ +# ark:/dev/null ark:- \| \ +# segmentation-init-from-ali ark:- ark:- \| \ +# segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ +# --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ +# segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ +# ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ +# segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ +# segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ +# --no-score-label=10000 ark:- $dir/overlapping_speech_ref.rttm +#fi + +# make $dir an absolute pathname. +dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}` + +if [ $stage -le 9 ]; then + # Get a filter that selects only regions of speech + $train_cmd $dir/log/get_speech_filter.log \ + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-post-process --merge-labels=1:2 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| \ + copy-vector ark,t: ark,scp:$dir/deriv_weights_for_overlapping_sad.ark,$dir/deriv_weights_for_overlapping_sad.scp + + # Get deriv weights + $train_cmd $dir/log/get_speech_filter.log \ + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-post-process --merge-labels=0:1:2 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| \ + copy-vector ark,t: ark,scp:$dir/deriv_weights.ark,$dir/deriv_weights.scp +fi + +if [ $stage -le 10 ]; then + $train_cmd $dir/log/get_overlapping_sad.log \ + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + ark:- ark,scp:$dir/overlapping_sad_labels.ark,$dir/overlapping_sad_labels.scp +fi + +if false && [ $stage -le 11 ]; then + utils/data/convert_data_dir_to_whole.sh \ + $src_dir/data/sdm1/${dataset} data/ami_sdm1_${dataset}_whole + utils/fix_data_dir.sh \ + data/ami_sdm1_${dataset}_whole + utils/copy_data_dir.sh \ + data/ami_sdm1_${dataset}_whole data/ami_sdm1_${dataset}_whole_hires_bp + utils/data/downsample_data_dir.sh 8000 data/ami_sdm1_${dataset}_whole_hires_bp + + steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_bp.conf --nj $nj \ + data/ami_sdm1_${dataset}_whole_hires_bp exp/make_hires_bp mfcc_hires_bp + steps/compute_cmvn_stats.sh --fake \ + data/ami_sdm1_${dataset}_whole_hires_bp exp/make_hires_bp mfcc_hires_bp + utils/fix_data_dir.sh \ + data/ami_sdm1_${dataset}_whole_hires_bp +fi diff --git a/egs/aspire/s5/local/segmentation/prepare_babel_data.sh b/egs/aspire/s5/local/segmentation/prepare_babel_data.sh new file mode 100644 index 00000000000..e70dc216980 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_babel_data.sh @@ -0,0 +1,105 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +# This script prepares Babel data for training speech activity detection, +# music detection. + +. path.sh +. cmd.sh + +set -e +set -o pipefail +set -u + +lang_id=assamese +subset= # Number of recordings to keep before speed perturbation and corruption. + # In limitedLP, this is about 120. So subset, if specified, must be lower that that. + +# All the paths below can be modified to any absolute path. +ROOT_DIR=/home/vimal/workspace_waveform/egs/babel/s5c_assamese/ + +stage=-1 + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + echo "This script is to serve as an example recipe." + echo "Edit the script to change variables if needed." + exit 1 +fi + +dir=exp/unsad/make_unsad_babel_${lang_id}_train # Work dir + +model_dir=$ROOT_DIR/exp/tri4 # Model directory used for decoding +sat_model_dir=$ROOT_DIR/exp/tri5 # Model directory used for getting alignments +lang=$ROOT_DIR/data/lang # Language directory +lang_test=$ROOT_DIR/data/lang # Language directory used to build graph + +mkdir -p $dir + +# Hard code the mapping from phones to SAD labels +# 0 for silence, 1 for speech, 2 for noise, 3 for unk +cat < $dir/babel_sad.map + 3 +_B 3 +_E 3 +_I 3 +_S 3 + 2 +_B 2 +_E 2 +_I 2 +_S 2 + 2 +_B 2 +_E 2 +_I 2 +_S 2 +SIL 0 +SIL_B 0 +SIL_E 0 +SIL_I 0 +SIL_S 0 +EOF + +# The original data directory which will be converted to a whole (recording-level) directory. +utils/copy_data_dir.sh $ROOT_DIR/data/train data/babel_${lang_id}_train +train_data_dir=data/babel_${lang_id}_train + +# Expecting the user to have done run.sh to have $model_dir, +# $sat_model_dir, $lang, $lang_test, $train_data_dir +local/segmentation/prepare_unsad_data.sh \ + --sad-map $dir/babel_sad.map \ + --config-dir $ROOT_DIR/conf --feat-type plp --add-pitch true \ + --reco-nj 40 --nj 100 --cmd "$train_cmd" \ + --sat-model-dir $sat_model_dir \ + --lang-test $lang_test \ + $train_data_dir $lang $model_dir $dir + +orig_data_dir=${train_data_dir}_sp + +data_dir=${train_data_dir}_whole + +if [ ! -z $subset ]; then + # Work on a subset + utils/subset_data_dir.sh ${data_dir} $subset \ + ${data_dir}_$subset + data_dir=${data_dir}_$subset +fi + +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp + +# Add noise from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_snr.sh \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir \ + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +# Add music from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_music.sh \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir \ + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf diff --git a/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh new file mode 100644 index 00000000000..a3e087d95ec --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh @@ -0,0 +1,112 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +# This script prepares Babel data for training speech activity detection, +# music detection, and overlapped speech detection systems. + +. path.sh +. cmd.sh + +set -e +set -o pipefail +set -u + +lang_id=assamese +subset=150 # Number of recordings to keep before speed perturbation and corruption +utt_subset=30000 # Number of utterances to keep after speed perturbation for adding overlapped-speech + +# All the paths below can be modified to any absolute path. +ROOT_DIR=/home/vimal/workspace_waveform/egs/babel/s5c_assamese/ + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + echo "This script is to serve as an example recipe." + echo "Edit the script to change variables if needed." + exit 1 +fi + +dir=exp/unsad/make_unsad_babel_${lang_id}_train # Work dir + +# The original data directory which will be converted to a whole (recording-level) directory. +train_data_dir=$ROOT_DIR/data/train + +model_dir=$ROOT_DIR/exp/tri4 # Model directory used for decoding +sat_model_dir=$ROOT_DIR/exp/tri5 # Model directory used for getting alignments +lang=$ROOT_DIR/data/lang # Language directory +lang_test=$ROOT_DIR/data/lang # Language directory used to build graph + +# Hard code the mapping from phones to SAD labels +# 0 for silence, 1 for speech, 2 for noise, 3 for unk +cat < $dir/babel_sad.map + 3 +_B 3 +_E 3 +_I 3 +_S 3 + 2 +_B 2 +_E 2 +_I 2 +_S 2 + 2 +_B 2 +_E 2 +_I 2 +_S 2 +SIL 0 +SIL_B 0 +SIL_E 0 +SIL_I 0 +SIL_S 0 +EOF + +# Expecting the user to have done run.sh to have $model_dir, +# $sat_model_dir, $lang, $lang_test, $train_data_dir +local/segmentation/prepare_unsad_data.sh \ + --sad-map $dir/babel_sad.map \ + --config-dir $ROOT_DIR/conf \ + --reco-nj 40 --nj 100 --cmd "$train_cmd" \ + --sat-model $sat_model_dir \ + --lang-test $lang_test \ + $train_data_dir $lang $model_dir $dir + +orig_data_dir=${train_data_dir}_sp + +data_dir=${train_data_dir}_whole + +if [ ! -z $subset ]; then + # Work on a subset + utils/subset_data_dir.sh ${data_dir} $subset \ + ${data_dir}_$subset + data_dir=${data_dir}_$subset +fi + +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp + +# Add noise from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir.sh \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir \ + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +# Add music from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_music.sh \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir \ + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +if [ ! -z $utt_subset ]; then + utils/subset_data_dir.sh ${orig_data_dir} $utt_subset \ + ${orig_data_dir}_`echo $utt_subset | perl -e 's/000$/k/'` + orig_data_dir=${orig_data_dir}_`echo $utt_subset | perl -e 's/000$/k/'` +fi + +# Add overlapping speech from $orig_data_dir/segments and create a new data directory +utt_vad_dir=$dir/`baseline $sat_model_dir`_ali_`basename $train_data_dir`_sp_vad_`basename $train_data_dir`_sp +local/segmentation/do_corruption_data_dir_overlapped_speech.sh \ + --data-dir ${orig_data_dir} \ + --utt-vad-dir $utt_vad_dir diff --git a/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh new file mode 100644 index 00000000000..4f55cc6929e --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh @@ -0,0 +1,101 @@ +#! /bin/bash + +# This script prepares Fisher data for training a speech activity detection +# and music detection system + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +. path.sh +. cmd.sh + +set -e -o pipefail + +if [ $# -ne 0 ]; then + echo "Usage: $0" + echo "This script is to serve as an example recipe." + echo "Edit the script to change variables if needed." + exit 1 +fi + +dir=exp/unsad/make_unsad_fisher_train_100k # Work dir +subset=900 + +# All the paths below can be modified to any absolute path. + +# The original data directory which will be converted to a whole (recording-level) directory. +train_data_dir=data/fisher_train_100k + +model_dir=exp/tri3a # Model directory used for decoding +sat_model_dir=exp/tri4a # Model directory used for getting alignments +lang=data/lang # Language directory +lang_test=data/lang_test # Language directory used to build graph + +# Hard code the mapping from phones to SAD labels +# 0 for silence, 1 for speech, 2 for noise, 3 for unk +cat < $dir/fisher_sad.map +sil 0 +sil_B 0 +sil_E 0 +sil_I 0 +sil_S 0 +laughter 2 +laughter_B 2 +laughter_E 2 +laughter_I 2 +laughter_S 2 +noise 2 +noise_B 2 +noise_E 2 +noise_I 2 +noise_S 2 +oov 3 +oov_B 3 +oov_E 3 +oov_I 3 +oov_S 3 +EOF + +if [ ! -d RIRS_NOISES/ ]; then + # Prepare MUSAN rirs and noises + wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip + unzip rirs_noises.zip +fi + +if [ ! -d RIRS_NOISES/music ]; then + # Prepare MUSAN music + local/segmentation/prepare_musan_music.sh /export/corpora/JHU/musan RIRS_NOISES/music +fi + +# Expecting the user to have done run.sh to have $model_dir, +# $sat_model_dir, $lang, $lang_test, $train_data_dir +local/segmentation/prepare_unsad_data.sh \ + --sad-map $dir/fisher_sad.map \ + --config-dir conf \ + --reco-nj 40 --nj 100 --cmd "$train_cmd" \ + --sat-model-dir $sat_model_dir \ + --lang-test $lang_test \ + $train_data_dir $lang $model_dir $dir + +data_dir=${train_data_dir}_whole + +if [ ! -z $subset ]; then + # Work on a subset + false && utils/subset_data_dir.sh ${data_dir} $subset \ + ${data_dir}_$subset + data_dir=${data_dir}_$subset +fi + +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp + +# Add noise from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_snr.sh \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir \ + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +# Add music from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_music.sh \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir \ + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf diff --git a/egs/aspire/s5/local/segmentation/prepare_fisher_data_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/prepare_fisher_data_overlapped_speech.sh new file mode 100644 index 00000000000..79a03fa9e9d --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_fisher_data_overlapped_speech.sh @@ -0,0 +1,113 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +# This script prepares Fisher data for training speech activity detection, +# music detection, and overlapped speech detection systems. + +. path.sh +. cmd.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + echo "This script is to serve as an example recipe." + echo "Edit the script to change variables if needed." + exit 1 +fi + +dir=exp/unsad/make_unsad_fisher_train_100k # Work dir +subset=60 # Number of recordings to keep before speed perturbation and corruption +utt_subset=75000 # Number of utterances to keep after speed perturbation for adding overlapped-speech + +# All the paths below can be modified to any absolute path. + +# The original data directory which will be converted to a whole (recording-level) directory. +train_data_dir=data/fisher_train_100k + +model_dir=exp/tri3a # Model directory used for decoding +sat_model_dir=exp/tri4a # Model directory used for getting alignments +lang=data/lang # Language directory +lang_test=data/lang_test # Language directory used to build graph + +# Hard code the mapping from phones to SAD labels +# 0 for silence, 1 for speech, 2 for noise, 3 for unk +cat < $dir/fisher_sad.map +sil 0 +sil_B 0 +sil_E 0 +sil_I 0 +sil_S 0 +laughter 2 +laughter_B 2 +laughter_E 2 +laughter_I 2 +laughter_S 2 +noise 2 +noise_B 2 +noise_E 2 +noise_I 2 +noise_S 2 +oov 3 +oov_B 3 +oov_E 3 +oov_I 3 +oov_S 3 +EOF + +# Expecting the user to have done run.sh to have $model_dir, +# $sat_model_dir, $lang, $lang_test, $train_data_dir +local/segmentation/prepare_unsad_data.sh \ + --sad-map $dir/fisher_sad.map \ + --config-dir conf \ + --reco-nj 40 --nj 100 --cmd "$train_cmd" \ + --sat-model $sat_model_dir \ + --lang-test $lang_test \ + $train_data_dir $lang $model_dir $dir + +orig_data_dir=${train_data_dir}_sp + +data_dir=${train_data_dir}_whole + +if [ ! -z $subset ]; then + # Work on a subset + utils/subset_data_dir.sh ${data_dir} $subset \ + ${data_dir}_$subset + data_dir=${data_dir}_$subset +fi + +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp + +# Add noise from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir.sh \ + --num-data-reps 5 \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +# Add music from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_music.sh \ + --num-data-reps 5 \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +if [ ! -z $utt_subset ]; then + utils/subset_data_dir.sh ${orig_data_dir} $utt_subset \ + ${orig_data_dir}_`echo $utt_subset | perl -e 's/000$/k/'` + orig_data_dir=${orig_data_dir}_`echo $utt_subset | perl -e 's/000$/k/'` +fi + +# Add overlapping speech from $orig_data_dir/segments and create a new data directory +utt_vad_dir=$dir/`baseline $sat_model_dir`_ali_`basename $train_data_dir`_sp_vad_`basename $train_data_dir`_sp +local/segmentation/do_corruption_data_dir_overlapped_speech.sh \ + --nj 40 --cmd queue.pl \ + --num-data-reps 1 \ + --data-dir ${orig_data_dir} \ + --utt-vad-dir $utt_vad_dir + +local/segmentation/prepare_unsad_overlapped_speech_labels.sh \ + --num-data-reps 1 --nj 40 --cmd queue.pl \ + ${orig_data_dir}_ovlp_corrupted_hires_bp \ + ${orig_data_dir}_ovlp_corrupted/overlapped_segments_info.txt \ + $utt_vad_dir exp/make_overlap_labels overlap_labels diff --git a/egs/aspire/s5/local/segmentation/prepare_musan_music.sh b/egs/aspire/s5/local/segmentation/prepare_musan_music.sh new file mode 100644 index 00000000000..16fb946b0c8 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_musan_music.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + echo " e.g.: $0 /export/corpora/JHU/musan RIRS_NOISES/music" + exit 1 +fi + +SRC_DIR=$1 +dir=$2 + +mkdir -p $dir + +local/segmentation/make_musan_music.py $SRC_DIR $dir/wav.scp + +wav-to-duration scp:$dir/wav.scp ark,t:$dir/reco2dur +steps/data/split_wavs_randomly.py $dir/wav.scp $dir/reco2dur \ + $dir/split_utt2dur $dir/split_wav.scp + +awk '{print $1" "int($2*100)}' $dir/split_utt2dur > $dir/split_utt2num_frames +steps/data/wav_scp2noise_list.py $dir/split_wav.scp $dir/music_list diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh new file mode 100755 index 00000000000..cccc7e2db84 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh @@ -0,0 +1,518 @@ +#!/bin/bash + +# This script prepares speech labels and deriv weights for +# training unsad network for speech activity detection and music detection. + +set -u +set -o pipefail +set -e + +. path.sh + +stage=-2 +cmd=queue.pl +reco_nj=40 +nj=100 + +# Options to be passed to get_sad_map.py +map_noise_to_sil=true # Map noise phones to silence label (0) +map_unk_to_speech=true # Map unk phones to speech label (1) +sad_map= # Initial mapping from phones to speech/non-speech labels. + # Overrides the default mapping using phones/silence.txt + # and phones/nonsilence.txt + +# Options for feature extraction +feat_type=mfcc # mfcc or plp +add_pitch=false # Add pitch features + +config_dir=conf +feat_config= +pitch_config= + +mfccdir=mfcc +plpdir=plp + +speed_perturb=true + +sat_model_dir= # Model directory used for getting alignments +lang_test= # Language directory used to build graph. + # If its not provided, $lang will be used instead. + +. utils/parse_options.sh + +if [ $# -ne 4 ]; then + echo "This script takes a data directory and creates a new data directory " + echo "and speech activity labels" + echo "for the purpose of training a Universal Speech Activity Detector." + echo "Usage: $0 [options] " + echo " e.g.: $0 data/train_100k data/lang exp/tri4a exp/vad_data_prep" + echo "" + echo "Main options (for others, see top of script file)" + echo " --config # config file containing options" + echo " --cmd (run.pl|/queue.pl ) # how to run jobs." + echo " --reco-nj <#njobs|4> # Split a whole data directory into these many pieces" + echo " --nj <#njobs|4> # Split a segmented data directory into these many pieces" + exit 1 +fi + +data_dir=$1 +lang=$2 +model_dir=$3 +dir=$4 + +if [ $feat_type != "plp" ] && [ $feat_type != "mfcc" ]; then + echo "$0: --feat-type must be plp or mfcc. Must match the model_dir used." + exit 1 +fi + +[ -z "$feat_config" ] && feat_config=$config_dir/$feat_type.conf +[ -z "$pitch_config" ] && pitch_config=$config_dir/pitch.conf + +extra_files= + +if $add_pitch; then + extra_files="$extra_files $pitch_config" +fi + +for f in $feat_config $extra_files; do + if [ ! -f $f ]; then + echo "$f could not be found" + exit 1 + fi +done + +mkdir -p $dir + +function make_mfcc { + local nj=$nj + local mfcc_config=$feat_config + local add_pitch=$add_pitch + local cmd=$cmd + local pitch_config=$pitch_config + + while [ $# -gt 0 ]; do + if [ $1 == "--nj" ]; then + nj=$2 + shift; shift; + elif [ $1 == "--mfcc-config" ]; then + mfcc_config=$2 + shift; shift; + elif [ $1 == "--add-pitch" ]; then + add_pitch=$2 + shift; shift; + elif [ $1 == "--cmd" ]; then + cmd=$2 + shift; shift; + elif [ $1 == "--pitch-config" ]; then + pitch_config=$2 + shift; shift; + else + break + fi + done + + if [ $# -ne 3 ]; then + echo "Usage: make_mfcc " + exit 1 + fi + + if $add_pitch; then + steps/make_mfcc_pitch.sh --cmd "$cmd" --nj $nj \ + --mfcc-config $mfcc_config --pitch-config $pitch_config $1 $2 $3 || exit 1 + else + steps/make_mfcc.sh --cmd "$cmd" --nj $nj \ + --mfcc-config $mfcc_config $1 $2 $3 || exit 1 + fi + +} + +function make_plp { + local nj=$nj + local mfcc_config=$feat_config + local add_pitch=$add_pitch + local cmd=$cmd + local pitch_config=$pitch_config + + while [ $# -gt 0 ]; do + if [ $1 == "--nj" ]; then + nj=$2 + shift; shift; + elif [ $1 == "--plp-config" ]; then + plp_config=$2 + shift; shift; + elif [ $1 == "--add-pitch" ]; then + add_pitch=$2 + shift; shift; + elif [ $1 == "--cmd" ]; then + cmd=$2 + shift; shift; + elif [ $1 == "--pitch-config" ]; then + pitch_config=$2 + shift; shift; + else + break + fi + done + + if [ $# -ne 3 ]; then + echo "Usage: make_plp " + exit 1 + fi + + if $add_pitch; then + steps/make_plp_pitch.sh --cmd "$cmd" --nj $nj \ + --plp-config $plp_config --pitch-config $pitch_config $1 $2 $3 || exit 1 + else + steps/make_plp.sh --cmd "$cmd" --nj $nj \ + --plp-config $plp_config $1 $2 $3 || exit 1 + fi +} + +frame_shift_info=`cat $feat_config | steps/segmentation/get_frame_shift_info_from_config.pl` || exit 1 + +frame_shift=`echo $frame_shift_info | awk '{print $1}'` +frame_overlap=`echo $frame_shift_info | awk '{print $2}'` + +data_id=$(basename $data_dir) +whole_data_dir=${data_dir}_whole +whole_data_id=${data_id}_whole + +if [ $stage -le -2 ]; then + steps/segmentation/get_sad_map.py \ + --init-sad-map="$sad_map" \ + --map-noise-to-sil=$map_noise_to_sil \ + --map-unk-to-speech=$map_unk_to_speech \ + $lang | utils/sym2int.pl -f 1 $lang/phones.txt > $dir/sad_map + + utils/data/convert_data_dir_to_whole.sh ${data_dir} ${whole_data_dir} + utils/data/get_utt2dur.sh ${whole_data_dir} +fi + +if $speed_perturb; then + plpdir=${plpdir}_sp + mfccdir=${mfccdir}_sp + + if [ $stage -le -1 ]; then + utils/data/perturb_data_dir_speed_3way.sh ${whole_data_dir} ${whole_data_dir}_sp + utils/data/perturb_data_dir_speed_3way.sh ${data_dir} ${data_dir}_sp + + if [ $feat_type == "mfcc" ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + make_mfcc --cmd "$cmd --max-jobs-run 40" --nj $nj \ + --mfcc-config $feat_config \ + --add-pitch $add_pitch --pitch-config $pitch_config \ + ${whole_data_dir}_sp exp/make_mfcc $mfccdir || exit 1 + steps/compute_cmvn_stats.sh \ + ${whole_data_dir}_sp exp/make_mfcc $mfccdir || exit 1 + elif [ $feat_type == "plp" ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $plpdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$plpdir/storage $plpdir/storage + fi + + make_plp --cmd "$cmd --max-jobs-run 40" --nj $nj \ + --plp-config $feat_config \ + --add-pitch $add_pitch --pitch-config $pitch_config \ + ${whole_data_dir}_sp exp/make_plp $plpdir || exit 1 + steps/compute_cmvn_stats.sh \ + ${whole_data_dir}_sp exp/make_plp $plpdir || exit 1 + else + echo "$0: Unknown feat-type $feat_type. Must be mfcc or plp." + exit 1 + fi + + utils/fix_data_dir.sh ${whole_data_dir}_sp + fi + + data_dir=${data_dir}_sp + whole_data_dir=${whole_data_dir}_sp + data_id=${data_id}_sp +fi + + +############################################################################### +# Compute length of recording +############################################################################### + +if [ $stage -le 0 ]; then + utils/subsegment_data_dir.sh $whole_data_dir ${data_dir}/segments ${data_dir}/tmp + cp $data_dir/tmp/feats.scp $data_dir + + if [ $feat_type == mfcc ]; then + steps/compute_cmvn_stats.sh ${data_dir} exp/make_mfcc/${data_id} $mfccdir + else + steps/compute_cmvn_stats.sh ${data_dir} exp/make_plp/${data_id} $plpdir + fi + + utils/fix_data_dir.sh $data_dir +fi + +if [ -z "$sat_model_dir" ]; then + ali_dir=${model_dir}_ali_${data_id} + if [ $stage -le 2 ]; then + steps/align_si.sh --nj $nj --cmd "$cmd" \ + ${data_dir} ${lang} ${model_dir} ${model_dir}_ali_${data_id} || exit 1 + fi +else + ali_dir=${sat_model_dir}_ali_${data_id} + #obtain the alignment of the perturbed data + if [ $stage -le 2 ]; then + steps/align_fmllr.sh --nj $nj --cmd "$cmd" \ + ${data_dir} ${lang} ${sat_model_dir} ${sat_model_dir}_ali_${data_id} || exit 1 + fi +fi + + +# All the data from this point is speed perturbed. + +data_id=$(basename $data_dir) +utils/split_data.sh $data_dir $nj + +############################################################################### +# Convert alignment for the provided segments into +# initial SAD labels at utterance-level in segmentation format +############################################################################### + +vad_dir=$dir/`basename ${ali_dir}`_vad_${data_id} +if [ $stage -le 3 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$cmd" \ + $ali_dir $dir/sad_map $vad_dir +fi + +[ ! -s $vad_dir/sad_seg.scp ] && echo "$0: $vad_dir/vad.scp is empty" && exit 1 + +if [ $stage -le 4 ]; then + utils/copy_data_dir.sh $data_dir $dir/${data_id}_manual_segments + + awk '{print $1" "$2}' $dir/${data_id}_manual_segments/segments | sort -k1,1 > $dir/${data_id}_manual_segments/utt2spk + utils/utt2spk_to_spk2utt.pl $dir/${data_id}_manual_segments/utt2spk | sort -k1,1 > $dir/${data_id}_manual_segments/spk2utt + + if [ $feat_type == mfcc ]; then + steps/compute_cmvn_stats.sh $dir/${data_id}_manual_segments exp/make_mfcc/${data_id}_manual_segments $mfccdir + else + steps/compute_cmvn_stats.sh $dir/${data_id}_manual_segments exp/make_plp/${data_id}_manual_segments $plpdir + fi + + utils/fix_data_dir.sh $dir/${data_id}_manual_segments || true # Might fail because utt2spk will be not sorted on both utts and spks +fi + + +#utils/split_data.sh --per-reco $data_dir $reco_nj +#segmentation-combine-segments ark,s:$vad_dir/sad_seg.scp +# "ark,s:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$ali_frame_shift --frame-overlap=$ali_frame_overlap ${data}/split${reco_nj}reco/JOB/segments ark:- |" \ +# "ark:cat ${data}/split${reco_nj}reco/JOB/segments | cut -d ' ' -f 1,2 | utils/utt2spk_to_spk2utt.pl | sort -k1,1 |" ark:- + +############################################################################### + + +# Create extended data directory that consists of the provided +# segments along with the segments outside it. +# This is basically dividing the whole recording into pieces +# consisting of pieces corresponding to the provided segments +# and outside the provided segments. + +############################################################################### +# Create segments outside of the manual segments +############################################################################### + +outside_data_dir=$dir/${data_id}_outside +if [ $stage -le 5 ]; then + rm -rf $outside_data_dir + mkdir -p $outside_data_dir/split${reco_nj}reco + + for f in wav.scp reco2file_and_channel stm glm; do + [ -f ${data_dir}/$f ] && cp ${data_dir}/$f $outside_data_dir + done + + steps/segmentation/split_data_on_reco.sh $data_dir $whole_data_dir $reco_nj + + for n in `seq $reco_nj`; do + dsn=$whole_data_dir/split${reco_nj}reco/$n + awk '{print $2}' $dsn/segments | \ + utils/filter_scp.pl /dev/stdin $whole_data_dir/utt2num_frames > \ + $dsn/utt2num_frames + mkdir -p $outside_data_dir/split${reco_nj}reco/$n + done + + $cmd JOB=1:$reco_nj $outside_data_dir/log/get_empty_segments.JOB.log \ + segmentation-init-from-segments --frame-shift=$frame_shift \ + --frame-overlap=$frame_overlap --shift-to-zero=false \ + ${data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + "ark,t:cut -d ' ' -f 1,2 ${data_dir}/split${reco_nj}reco/JOB/segments | utils/utt2spk_to_spk2utt.pl |" ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=0 \ + "ark:segmentation-init-from-lengths --label=1 ark,t:${whole_data_dir}/split${reco_nj}reco/JOB/utt2num_frames ark:- |" \ + ark:- ark:- \| \ + segmentation-post-process --remove-labels=0 --max-segment-length=1000 \ + --post-process-label=1 --overlap-length=50 \ + ark:- ark:- \| segmentation-to-segments --single-speaker=true \ + --frame-shift=$frame_shift --frame-overlap=$frame_overlap \ + ark:- ark,t:$outside_data_dir/split${reco_nj}reco/JOB/utt2spk \ + $outside_data_dir/split${reco_nj}reco/JOB/segments || exit 1 + + for n in `seq $reco_nj`; do + cat $outside_data_dir/split${reco_nj}reco/$n/utt2spk + done | sort -k1,1 > $outside_data_dir/utt2spk + + for n in `seq $reco_nj`; do + cat $outside_data_dir/split${reco_nj}reco/$n/segments + done | sort -k1,1 > $outside_data_dir/segments + + utils/fix_data_dir.sh $outside_data_dir + +fi + + +if [ $stage -le 6 ]; then + utils/data/subsegment_data_dir.sh $whole_data_dir $outside_data_dir/segments \ + $outside_data_dir/tmp + cp $outside_data_dir/tmp/feats.scp $outside_data_dir +fi + +extended_data_dir=$dir/${data_id}_extended +if [ $stage -le 7 ]; then + cp $dir/${data_id}_manual_segments/cmvn.scp ${outside_data_dir} || exit 1 + utils/fix_data_dir.sh $outside_data_dir + + utils/combine_data.sh $extended_data_dir $data_dir $outside_data_dir + + steps/segmentation/split_data_on_reco.sh $data_dir $extended_data_dir $reco_nj +fi + +############################################################################### +# Create graph for decoding +############################################################################### + +# TODO: By default, we use word LM. If required, we can think +# consider phone LM. +graph_dir=$model_dir/graph +if [ $stage -le 8 ]; then + if [ ! -d $graph_dir ]; then + utils/mkgraph.sh ${lang_test} $model_dir $graph_dir || exit 1 + fi +fi + +############################################################################### +# Decode extended data directory +############################################################################### + + +# Decode without lattice (get only best path) +if [ $stage -le 8 ]; then + steps/decode_nolats.sh --cmd "$cmd --mem 2G" --nj $nj \ + --max-active 1000 --beam 10.0 --write-words false \ + --write-alignments true \ + $graph_dir ${extended_data_dir} \ + ${model_dir}/decode_${data_id}_extended || exit 1 + cp ${model_dir}/final.mdl ${model_dir}/decode_${data_id}_extended +fi + +model_id=`basename $model_dir` + +# Get VAD based on the decoded best path +decode_vad_dir=$dir/${model_id}_decode_vad_${data_id} +if [ $stage -le 9 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$cmd" \ + ${model_dir}/decode_${data_id}_extended $dir/sad_map $decode_vad_dir +fi + +[ ! -s $decode_vad_dir/sad_seg.scp ] && echo "$0: $decode_vad_dir/vad.scp is empty" && exit 1 + +vad_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $vad_dir ${PWD}` + +if [ $stage -le 10 ]; then + segmentation-init-from-segments --frame-shift=$frame_shift \ + --frame-overlap=$frame_overlap --label=0 \ + $outside_data_dir/segments \ + ark,scp:$vad_dir/outside_sad_seg.ark,$vad_dir/outside_sad_seg.scp +fi + +reco_vad_dir=$dir/${model_id}_reco_vad_${data_id} +mkdir -p $reco_vad_dir +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $reco_vad_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$reco_vad_dir/storage $reco_vad_dir/storage +fi + +reco_vad_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $reco_vad_dir ${PWD}` + +echo $reco_nj > $reco_vad_dir/num_jobs + +if [ $stage -le 11 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/intersect_vad.JOB.log \ + segmentation-intersect-segments --mismatch-label=10 \ + "scp:cat $vad_dir/sad_seg.scp $vad_dir/outside_sad_seg.scp | sort -k1,1 | utils/filter_scp.pl $extended_data_dir/split${reco_nj}reco/JOB/utt2spk |" \ + "scp:utils/filter_scp.pl $extended_data_dir/split${reco_nj}reco/JOB/utt2spk $decode_vad_dir/sad_seg.scp |" \ + ark:- \| segmentation-post-process --remove-labels=10 \ + --merge-adjacent-segments --max-intersegment-length=10 ark:- ark:- \| \ + segmentation-combine-segments ark:- "ark:segmentation-init-from-segments --shift-to-zero=false $extended_data_dir/split${reco_nj}reco/JOB/segments ark:- |" \ + ark,t:$extended_data_dir/split${reco_nj}reco/JOB/reco2utt \ + ark,scp:$reco_vad_dir/sad_seg.JOB.ark,$reco_vad_dir/sad_seg.JOB.scp + for n in `seq $reco_nj`; do + cat $reco_vad_dir/sad_seg.$n.scp + done > $reco_vad_dir/sad_seg.scp +fi + +set +e +for n in `seq $reco_nj`; do + utils/create_data_link.pl $reco_vad_dir/deriv_weights.$n.ark + utils/create_data_link.pl $reco_vad_dir/deriv_weights_for_uncorrupted.$n.ark + utils/create_data_link.pl $reco_vad_dir/speech_labels.$n.ark +done +set -e + +if [ $stage -le 12 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_deriv_weights.JOB.log \ + segmentation-post-process --merge-labels=0:1:2:3 --merge-dst-label=1 \ + scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$reco_vad_dir/deriv_weights.JOB.ark,$reco_vad_dir/deriv_weights.JOB.scp + + for n in `seq $reco_nj`; do + cat $reco_vad_dir/deriv_weights.$n.scp + done > $reco_vad_dir/deriv_weights.scp +fi + +if [ $stage -le 13 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_deriv_weights_for_uncorrupted.JOB.log \ + segmentation-post-process --remove-labels=1:2:3 scp:$reco_vad_dir/sad_seg.JOB.scp \ + ark:- \| segmentation-post-process --merge-labels=0 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$reco_vad_dir/deriv_weights_for_uncorrupted.JOB.ark,$reco_vad_dir/deriv_weights_for_uncorrupted.JOB.scp + for n in `seq $reco_nj`; do + cat $reco_vad_dir/deriv_weights_for_uncorrupted.$n.scp + done > $reco_vad_dir/deriv_weights_for_uncorrupted.scp +fi + +if [ $stage -le 14 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_speech_labels.JOB.log \ + segmentation-copy --keep-label=1 scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames \ + ark:- ark,scp:$reco_vad_dir/speech_labels.JOB.ark,$reco_vad_dir/speech_labels.JOB.scp + for n in `seq $reco_nj`; do + cat $reco_vad_dir/speech_labels.$n.scp + done > $reco_vad_dir/speech_labels.scp +fi + +if [ $stage -le 15 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/convert_manual_segments_to_deriv_weights.JOB.log \ + segmentation-init-from-segments --shift-to-zero=false \ + $data_dir/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + ark:$data_dir/split${reco_nj}reco/JOB/reco2utt ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames \ + ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$reco_vad_dir/deriv_weights_manual_seg.JOB.ark,$reco_vad_dir/deriv_weights_manual_seg.JOB.scp + + for n in `seq $reco_nj`; do + cat $reco_vad_dir/deriv_weights_manual_seg.$n.scp + done > $reco_vad_dir/deriv_weights_manual_seg.scp +fi + +echo "$0: Finished creating corpus for training Universal SAD with data in $whole_data_dir and labels in $reco_vad_dir" diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_data_simple.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_data_simple.sh new file mode 100755 index 00000000000..f3d1a7707e8 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_data_simple.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +# This script prepares speech labels for +# training unsad network for speech activity detection and music detection. +# This is similar to the script prepare_unsad_data.sh, but directly +# uses existing alignments to create labels, instead of creating new alignments. + +set -e +set -o pipefail +set -u + +. path.sh + +stage=-2 +cmd=queue.pl + +# Options to be passed to get_sad_map.py +map_noise_to_sil=true # Map noise phones to silence label (0) +map_unk_to_speech=true # Map unk phones to speech label (1) +sad_map= # Initial mapping from phones to speech/non-speech labels. + # Overrides the default mapping using phones/silence.txt + # and phones/nonsilence.txt + +. utils/parse_options.sh + +if [ $# -ne 4 ]; then + echo "This script takes a data directory and alignment directory and " + echo "converts it into speech activity labels" + echo "for the purpose of training a Universal Speech Activity Detector.\n" + echo "Usage: $0 [options] " + echo " e.g.: $0 data/train_100k data/lang exp/tri4a_ali exp/vad_data_prep" + echo "" + echo "Main options (for others, see top of script file)" + echo " --config # config file containing options" + echo " --cmd (run.pl|/queue.pl ) # how to run jobs." + exit 1 +fi + +data_dir=$1 +lang=$2 +ali_dir=$3 +dir=$4 + +extra_files= + +for f in $data_dir/feats.scp $lang/phones.txt $lang/phones/silence.txt $lang/phones/nonsilence.txt $sad_map $ali_dir/ali.1.gz $ali_dir/final.mdl $ali_dir/tree $extra_files; do + if [ ! -f $f ]; then + echo "$f could not be found" + exit 1 + fi +done + +mkdir -p $dir + +data_id=$(basename $data_dir) + +if [ $stage -le 0 ]; then + # Get a mapping from the phones to the speech / non-speech labels + steps/segmentation/get_sad_map.py \ + --init-sad-map="$sad_map" \ + --map-noise-to-sil=$map_noise_to_sil \ + --map-unk-to-speech=$map_unk_to_speech \ + $lang | utils/sym2int.pl -f 1 $lang/phones.txt > $dir/sad_map +fi + +############################################################################### +# Convert alignment into SAD labels at utterance-level in segmentation format +############################################################################### + +vad_dir=$dir/`basename ${ali_dir}`_vad_${data_id} + +# Convert relative path to full path +vad_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir;' $vad_dir ${PWD}` + +if [ $stage -le 1 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$cmd" \ + $ali_dir $dir/sad_map $vad_dir +fi + +[ ! -s $vad_dir/sad_seg.scp ] && echo "$0: $vad_dir/sad_seg.scp is empty" && exit 1 + +############################################################################### +# Post-process the segmentation and create frame-level alignments and +# per-frame deriv weights. +############################################################################### + +if [ $stage -le 2 ]; then + # Create per-frame speech / non-speech labels. + nj=`cat $vad_dir/num_jobs` + + utils/data/get_utt2num_frames.sh --nj $nj --cmd "$cmd" $data_dir + + set +e + for n in `seq $nj`; do + utils/create_data_link.pl $vad_dir/speech_labels.$n.ark + done + set -e + + $cmd JOB=1:$nj $vad_dir/log/get_speech_labels.JOB.log \ + segmentation-copy --keep-label=1 scp:$vad_dir/sad_seg.JOB.scp ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$data_dir/utt2num_frames \ + ark:- ark,scp:$vad_dir/speech_labels.JOB.ark,$vad_dir/speech_labels.JOB.scp + + for n in `seq $nj`; do + cat $vad_dir/speech_labels.$n.scp + done > $vad_dir/speech_labels.scp + + cp $vad_dir/speech_labels.scp $data_dir +fi + +echo "$0: Finished creating corpus for training Universal SAD with data in $data_dir and labels in $vad_dir" diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh new file mode 100755 index 00000000000..6d21859d7fe --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh @@ -0,0 +1,283 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +num_data_reps=5 +nj=40 +cmd=queue.pl +snr_db_threshold=10 +stage=-1 + +. utils/parse_options.sh + +if [ $# -ne 5 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/fisher_train_100k_sp_75k_seg_ovlp_corrupted_hires_bp data/fisher_train_100k_sp_75k_seg_ovlp_corrupted exp/unsad/make_unsad_fisher_train_100k/tri4a_ali_fisher_train_100k_sp_vad_fisher_train_100k_sp exp/unsad overlap_labels" + exit 1 +fi + +corrupted_data_dir=$1 +orig_corrupted_data_dir=$2 +utt_vad_dir=$3 +tmpdir=$4 +overlap_labels_dir=$5 + +overlapped_segments_info=$orig_corrupted_data_dir/overlapped_segments_info.txt +corrupted_data_id=`basename $orig_corrupted_data_dir` + +for f in $corrupted_data_dir/feats.scp $overlapped_segments_info $utt_vad_dir/sad_seg.scp; do + [ ! -f $f ] && echo "Could not find file $f" && exit 1 +done + +overlap_dir=$tmpdir/make_overlap_labels_${corrupted_data_id} +unreliable_dir=$tmpdir/unreliable_${corrupted_data_id} + +mkdir -p $unreliable_dir + +# make $overlap_labels_dir an absolute pathname. +overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` + +# Combine the VAD from the base recording and the VAD from the overlapping segments +# to create per-frame labels of the number of overlapping speech segments +# Unreliable segments are regions where no VAD labels were available for the +# overlapping segments. These can be later removed by setting deriv weights to 0. + +if [ $stage -le 1 ]; then + for n in `seq $num_data_reps`; do + cat $utt_vad_dir/sad_seg.scp | \ + awk -v n=$n '{print "ovlp"n"_"$0}' + done | sort -k1,1 > ${corrupted_data_dir}/sad_seg.scp + utils/data/get_utt2num_frames.sh $corrupted_data_dir + utils/split_data.sh ${corrupted_data_dir} $nj + + # 1) segmentation-init-from-additive-signals-info converts the informtation + # written out but by steps/data/make_corrupted_data_dir.py in overlapped_segments_info.txt + # and converts it to segments. It then adds those segments to the + # segments already present ($corrupted_data_dir/sad_seg.scp) + # 2) Retain only the speech segments (label 1) from these. + # 3) Convert this to overlap stats using segmentation-get-stats, which + # writes for each frame the number of overlapping segments. + # 4) Convert this per-frame "alignment" information to segmentation + # ($overlap_dir/overlap_seg.*.gz). + $cmd JOB=1:$nj $overlap_dir/log/get_overlap_seg.JOB.log \ + segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + --additive-signals-segmentation-rspecifier=scp:$utt_vad_dir/sad_seg.scp \ + --unreliable-segmentation-wspecifier="ark:| gzip -c > $unreliable_dir/unreliable_seg.JOB.gz" \ + "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${nj}/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ + ark,t:$orig_corrupted_data_dir/overlapped_segments_info.txt ark:- \| \ + segmentation-copy --keep-label=1 ark:- ark:- \| \ + segmentation-get-stats --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- "ark:| gzip -c > $overlap_dir/overlap_seg.JOB.gz" +fi + +if [ $stage -le 2 ]; then + # Retain labels >2, i.e. regions where more than 1 speaker overlap. + # Write this out in alignment format as "overlapped_speech_labels" + $cmd JOB=1:$nj $overlap_dir/log/get_overlapped_speech_labels.JOB.log \ + gunzip -c $overlap_dir/overlap_seg.JOB.gz \| \ + segmentation-post-process --remove-labels=0:1 ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ + ark,scp:$overlap_labels_dir/overlapped_speech_labels_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_labels_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/overlapped_speech_labels_${corrupted_data_id}.$n.scp + done > ${corrupted_data_dir}/overlapped_speech_labels.scp +fi + +if [ $stage -le 3 ]; then + # 1) Initialize a segmentation where all the frames have label 1 using + # segmentation-init-from-length. + # 2) Use the program segmentation-create-subsegments to set to 0 + # the regions of unreliable segments read from unreliable_seg.*.gz. + # This is the initial deriv weights. At this stage deriv weights is 1 for all + # but the unreliable segment regions. + # 3) Initialize a segmentation from the overlap labels (overlap_seg.*.gz) + # and retain regions where there is speech from at least one speaker. + # 4) Intersect this with the deriv weights segmentation from above. + # At this stage deriv weights is 1 for only the regions where there is + # at least one speaker and the the overlapping segment is not unreliable. + # Convert this to deriv weights. + $cmd JOB=1:$nj $unreliable_dir/log/get_deriv_weights.JOB.log \ + utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/utt2num_frames \| \ + segmentation-init-from-lengths ark,t:- ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ + ark:- "ark,s,cs:gunzip -c $unreliable_dir/unreliable_seg.JOB.gz | segmentation-to-segments ark:- - | segmentation-init-from-segments - ark:- |" ark:- \| \ + segmentation-intersect-segments --mismatch-label=0 \ + "ark:gunzip -c $overlap_dir/overlap_seg.JOB.gz | segmentation-post-process --remove-labels=0 --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- |" \ + ark,s,cs:- ark:- \| segmentation-post-process --remove-labels=0 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$overlap_labels_dir/deriv_weights_for_overlapped_speech_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/deriv_weights_for_overlapped_speech_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/deriv_weights_for_overlapped_speech_$corrupted_data_id.${n}.scp + done > $corrupted_data_dir/deriv_weights_for_overlapped_speech.scp +fi + +if [ $stage -le 4 ]; then + # Find regions where there is at least one speaker speaking. + $cmd JOB=1:$nj $overlap_dir/log/get_speech_labels.JOB.log \ + gunzip -c $overlap_dir/overlap_seg.JOB.gz \| \ + segmentation-post-process --remove-labels=0 --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| \ + vector-to-feat ark:- \ + ark,scp:$overlap_labels_dir/speech_feat_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/speech_feat_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/speech_feat_${corrupted_data_id}.$n.scp + done > ${corrupted_data_dir}/speech_feat.scp +fi + +if [ $stage -le 5 ]; then + # Deriv weights speech / non-speech labels is 1 everywhere but the + # unreliable regions. + $cmd JOB=1:$nj $unreliable_dir/log/get_deriv_weights.JOB.log \ + utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/utt2num_frames \| \ + segmentation-init-from-lengths ark,t:- ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ + ark:- "ark,s,cs:gunzip -c $unreliable_dir/unreliable_seg.JOB.gz | segmentation-to-segments ark:- - | segmentation-init-from-segments - ark:- |" ark:- \| \ + segmentation-post-process --remove-labels=0 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$overlap_labels_dir/deriv_weights_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/deriv_weights_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/deriv_weights_$corrupted_data_id.${n}.scp + done > $corrupted_data_dir/deriv_weights.scp +fi + +snr_threshold=`perl -e "print $snr_db_threshold / 10.0 * log(10.0)"` + +cat < $overlap_dir/invert_labels.map +0 1 +1 0 +EOF + +if [ $stage -le 6 ]; then + if [ ! -f $corrupted_data_dir/log_snr.scp ]; then + echo "$0: Could not find $corrupted_data_dir/log_snr.scp. Run local/segmentation/do_corruption_data_dir_overlapped_speech.sh." + exit 1 + fi + + $cmd JOB=1:$nj $overlap_dir/log/fix_overlapped_speech_labels.JOB.log \ + copy-matrix --apply-power=1 \ + "scp:utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/log_snr.scp |" \ + ark:- \| extract-column ark:- ark,t:- \| \ + steps/segmentation/quantize_vector.pl $snr_threshold \| \ + segmentation-init-from-ali ark,t:- ark:- \| \ + segmentation-copy --label-map=$overlap_dir/invert_labels.map ark:- ark:- \| \ + segmentation-intersect-segments --mismatch-label=1000 \ + "ark:utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/overlapped_speech_labels.scp | segmentation-init-from-ali scp:- ark:- | segmentation-copy --keep-label=1 ark:- ark:- |" ark:- ark:- \| \ + segmentation-copy --keep-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark,scp:$overlap_labels_dir/overlapped_speech_labels_fixed_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_labels_fixed_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/overlapped_speech_labels_fixed_${corrupted_data_id}.$n.scp + done > $corrupted_data_dir/overlapped_speech_labels_fixed.scp +fi + +exit 0 + +####exit 1 +#### +####if [ $stage -le 9 ]; then +#### mkdir -p $overlap_data_dir $unreliable_data_dir +#### cp $orig_corrupted_data_dir/wav.scp $overlap_data_dir +#### cp $orig_corrupted_data_dir/wav.scp $unreliable_data_dir +#### +#### # Create segments where there is definitely an overlap. +#### # Assume no more than 10 speakers overlap. +#### $cmd JOB=1:$nj $overlap_dir/log/process_to_segments.JOB.log \ +#### segmentation-post-process --remove-labels=0:1 \ +#### ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark ark:- \| \ +#### segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ +#### segmentation-to-segments ark:- ark:$overlap_data_dir/utt2spk.JOB $overlap_data_dir/segments.JOB +#### +#### $cmd JOB=1:$nj $overlap_dir/log/get_unreliable_segments.JOB.log \ +#### segmentation-to-segments --single-speaker \ +#### ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \ +#### ark:$unreliable_data_dir/utt2spk.JOB $unreliable_data_dir/segments.JOB +#### +#### for n in `seq $nj`; do cat $overlap_data_dir/utt2spk.$n; done > $overlap_data_dir/utt2spk +#### for n in `seq $nj`; do cat $overlap_data_dir/segments.$n; done > $overlap_data_dir/segments +#### for n in `seq $nj`; do cat $unreliable_data_dir/utt2spk.$n; done > $unreliable_data_dir/utt2spk +#### for n in `seq $nj`; do cat $unreliable_data_dir/segments.$n; done > $unreliable_data_dir/segments +#### +#### utils/fix_data_dir.sh $overlap_data_dir +#### utils/fix_data_dir.sh $unreliable_data_dir +#### +#### if $speed_perturb; then +#### utils/data/perturb_data_dir_speed_3way.sh $overlap_data_dir ${overlap_data_dir}_sp +#### utils/data/perturb_data_dir_speed_3way.sh $unreliable_data_dir ${unreliable_data_dir}_sp +#### fi +####fi +#### +####if $speed_perturb; then +#### overlap_data_dir=${overlap_data_dir}_sp +#### unreliable_data_dir=${unreliable_data_dir}_sp +####fi +#### +##### make $overlap_labels_dir an absolute pathname. +####overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` +#### +####if [ $stage -le 10 ]; then +#### utils/split_data.sh ${overlap_data_dir} $nj +#### +#### $cmd JOB=1:$nj $overlap_dir/log/get_overlap_speech_labels.JOB.log \ +#### utils/data/get_reco2utt.sh ${overlap_data_dir}/split${reco_nj}reco/JOB '&&' \ +#### segmentation-init-from-segments --shift-to-zero=false \ +#### ${overlap_data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ +#### segmentation-combine-segments-to-recordings ark:- ark,t:${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt \ +#### ark:- \| \ +#### segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ +#### ark,scp:$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.scp +####fi +#### +####for n in `seq $reco_nj`; do +#### cat $overlap_labels_dir/overlapped_speech_${corrupted_data_id}.$n.scp +####done > ${corrupted_data_dir}/overlapped_speech_labels.scp +#### +####if [ $stage -le 11 ]; then +#### utils/data/get_reco2utt.sh ${unreliable_data_dir} +#### +#### # First convert the unreliable segments into a recording-level segmentation. +#### # Initialize a segmentation from utt2num_frames and set to 0, the regions +#### # of unreliable segments. At this stage deriv weights is 1 for all but the +#### # unreliable segment regions. +#### # Initialize a segmentation from the VAD labels and retain only the speech segments. +#### # Intersect this with the deriv weights segmentation from above. At this stage +#### # deriv weights is 1 for only the regions where base VAD label is 1 and +#### # the overlapping segment is not unreliable. Convert this to deriv weights. +#### $cmd JOB=1:$reco_nj $unreliable_dir/log/get_deriv_weights.JOB.log\ +#### segmentation-init-from-segments --shift-to-zero=false \ +#### "utils/filter_scp.pl -f 2 ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/segments |" ark:- \| \ +#### segmentation-combine-segments-to-recordings ark:- "ark,t:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/reco2utt |" \ +#### ark:- \| \ +#### segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ +#### "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/utt2num_frames | segmentation-init-from-lengths ark,t:- ark:- |" \ +#### ark:- ark:- \| \ +#### segmentation-intersect-segments --mismatch-label=0 \ +#### "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/sad_seg.scp | segmentation-post-process --remove-labels=0:2:3 scp:- ark:- |" \ +#### ark:- ark:- \| \ +#### segmentation-post-process --remove-labels=0 ark:- ark:- \| \ +#### segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ +#### steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ +#### ark,scp:$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.ark,$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.scp +#### +#### for n in `seq $reco_nj`; do +#### cat $overlap_labels_dir/deriv_weights_for_overlapped_speech.${n}.scp +#### done > $corrupted_data_dir/deriv_weights_for_overlapped_speech.scp +####fi +#### +####exit 0 diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh new file mode 100755 index 00000000000..80810afd619 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh @@ -0,0 +1,157 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +num_data_reps=5 +nj=40 +cmd=queue.pl +snr_db_threshold=10 +stage=-1 + +. utils/parse_options.sh + +if [ $# -ne 5 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/fisher_train_100k_sp_75k_seg_ovlp_corrupted_hires_bp data/fisher_train_100k_sp_75k_seg_ovlp_corrupted exp/unsad/make_unsad_fisher_train_100k/tri4a_ali_fisher_train_100k_sp_vad_fisher_train_100k_sp exp/unsad overlapping_sad_labels" + exit 1 +fi + +corrupted_data_dir=$1 +orig_corrupted_data_dir=$2 +utt_vad_dir=$3 +tmpdir=$4 +overlap_labels_dir=$5 + +overlapped_segments_info=$orig_corrupted_data_dir/overlapped_segments_info.txt +corrupted_data_id=`basename $orig_corrupted_data_dir` + +for f in $corrupted_data_dir/feats.scp $overlapped_segments_info $utt_vad_dir/sad_seg.scp; do + [ ! -f $f ] && echo "Could not find file $f" && exit 1 +done + +overlap_dir=$tmpdir/make_overlapping_sad_labels_${corrupted_data_id} + +# make $overlap_labels_dir an absolute pathname. +overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` +mkdir -p $overlap_labels_dir + +# Combine the VAD from the base recording and the VAD from the overlapping segments +# to create per-frame labels of the number of overlapping speech segments +# Unreliable segments are regions where no VAD labels were available for the +# overlapping segments. These can be later removed by setting deriv weights to 0. + +if [ $stage -le 1 ]; then + for n in `seq $num_data_reps`; do + cat $utt_vad_dir/sad_seg.scp | \ + awk -v n=$n '{print "ovlp"n"_"$0}' + done | sort -k1,1 > ${corrupted_data_dir}/sad_seg.scp + utils/data/get_utt2num_frames.sh $corrupted_data_dir + utils/split_data.sh ${corrupted_data_dir} $nj + + # 1) segmentation-init-from-additive-signals-info converts the informtation + # written out but by steps/data/make_corrupted_data_dir.py in overlapped_segments_info.txt + # and converts it to segments. It then adds those segments to the + # segments already present ($corrupted_data_dir/sad_seg.scp) + # 2) Retain only the speech segments (label 1) from these. + # 3) Convert this to overlap stats using segmentation-get-stats, which + # writes for each frame the number of overlapping segments. + # 4) Convert this per-frame "alignment" information to segmentation + # ($overlap_dir/overlap_seg.*.gz). + $cmd JOB=1:$nj $overlap_dir/log/get_overlapping_sad_seg.JOB.log \ + segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + --junk-label=10000 \ + --additive-signals-segmentation-rspecifier=scp:$utt_vad_dir/sad_seg.scp \ + "ark,t:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt $orig_corrupted_data_dir/overlapped_segments_info.txt |" \ + ark:- \| \ + segmentation-merge "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${nj}/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" ark:- ark:- \| \ + segmentation-get-stats --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark:/dev/null ark:/dev/null ark:- \| \ + classes-per-frame-to-labels --junk-label=10000 ark:- ark:- \| \ + segmentation-init-from-ali ark:- \ + "ark:| gzip -c > $overlap_dir/overlap_sad_seg.JOB.gz" +fi + +if [ $stage -le 2 ]; then + # Call labels >2, i.e. regions where more than 1 speaker overlap as overlapping speech. labels = 1 is single speaker and labels = 0 is silence. + # Write this out in alignment format as "overlapping_sad_labels" + $cmd JOB=1:$nj $overlap_dir/log/get_overlapping_sad_labels.JOB.log \ + gunzip -c $overlap_dir/overlap_sad_seg.JOB.gz \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ + ark,scp:$overlap_labels_dir/overlapping_sad_labels_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapping_sad_labels_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/overlapping_sad_labels_${corrupted_data_id}.$n.scp + done > ${corrupted_data_dir}/overlapping_sad_labels.scp +fi + +if [ $stage -le 3 ]; then + # Find regions where there is at least one speaker speaking. + $cmd JOB=1:$nj $overlap_dir/log/get_speech_feat.JOB.log \ + gunzip -c $overlap_dir/overlap_sad_seg.JOB.gz \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| \ + vector-to-feat ark:- \ + ark,scp:$overlap_labels_dir/speech_feat_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/speech_feat_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/speech_feat_${corrupted_data_id}.$n.scp + done > ${corrupted_data_dir}/speech_feat.scp +fi + +if [ $stage -le 4 ]; then + # Deriv weights is 1 everywhere but the + # unreliable regions. + $cmd JOB=1:$nj $overlap_dir/log/get_deriv_weights.JOB.log \ + gunzip -c $overlap_dir/overlap_sad_seg.JOB.gz \| \ + segmentation-post-process --merge-labels=0:1:2 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --merge-labels=10000 --merge-dst-label=0 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$overlap_labels_dir/deriv_weights_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/deriv_weights_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/deriv_weights_$corrupted_data_id.${n}.scp + done > $corrupted_data_dir/deriv_weights.scp +fi + +snr_threshold=`perl -e "print $snr_db_threshold / 10.0 * log(10.0)"` + +cat < $overlap_dir/invert_labels.map +0 2 +1 1 +EOF + +if [ $stage -le 5 ]; then + if [ ! -f $corrupted_data_dir/log_snr.scp ]; then + echo "$0: Could not find $corrupted_data_dir/log_snr.scp. Run local/segmentation/do_corruption_data_dir_overlapped_speech.sh." + exit 1 + fi + + $cmd JOB=1:$nj $overlap_dir/log/fix_overlapping_sad_labels.JOB.log \ + copy-matrix --apply-power=1 \ + "scp:utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/log_snr.scp |" \ + ark:- \| extract-column ark:- ark,t:- \| \ + steps/segmentation/quantize_vector.pl $snr_threshold \| \ + segmentation-init-from-ali ark,t:- ark:- \| \ + segmentation-copy --label-map=$overlap_dir/invert_labels.map ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + "ark:utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/overlapping_sad_labels.scp | segmentation-init-from-ali scp:- ark:- |" ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark,scp:$overlap_labels_dir/overlapping_sad_labels_fixed_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapping_sad_labels_fixed_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/overlapping_sad_labels_fixed_${corrupted_data_id}.$n.scp + done > $corrupted_data_dir/overlapping_sad_labels_fixed.scp +fi + +exit 0 diff --git a/egs/aspire/s5/local/segmentation/run_fisher.sh b/egs/aspire/s5/local/segmentation/run_fisher.sh new file mode 100644 index 00000000000..e39ef5f3a91 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/run_fisher.sh @@ -0,0 +1,23 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +local/segmentation/prepare_fisher_data.sh + +utils/combine_data.sh --extra-files "speech_feat.scp deriv_weights.scp deriv_weights_manual_seg.scp music_labels.scp" \ + data/fisher_train_100k_whole_all_corrupted_sp_hires_bp \ + data/fisher_train_100k_whole_corrupted_sp_hires_bp \ + data/fisher_train_100k_whole_music_corrupted_sp_hires_bp + +local/segmentation/train_stats_sad_music.sh \ + --train-data-dir data/fisher_train_100k_whole_all_corrupted_sp_hires_bp \ + --speech-feat-scp data/fisher_train_100k_whole_corrupted_sp_hires_bp/speech_feat.scp \ + --deriv-weights-scp data/fisher_train_100k_whole_corrupted_sp_hires_bp/deriv_weights.scp \ + --music-labels-scp data/fisher_train-100k_whole_music_corrupted_sp_hires_bp/music_labels.scp \ + --max-param-change 0.2 \ + --num-epochs 2 --affix k \ + --splice-indexes "-3,-2,-1,0,1,2,3 -6,0,mean+count(-99:3:9:99) -9,0,3 0" + +local/segmentation/run_segmentation_ami.sh \ + --nnet-dir exp/nnet3_sad_snr/nnet_tdnn_k_n4 diff --git a/egs/aspire/s5/local/segmentation/run_fisher_babel.sh b/egs/aspire/s5/local/segmentation/run_fisher_babel.sh new file mode 100644 index 00000000000..bdf6d3585f7 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/run_fisher_babel.sh @@ -0,0 +1,2 @@ + +utils/combine_data.sh diff --git a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh new file mode 100755 index 00000000000..48677598728 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh @@ -0,0 +1,452 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +. cmd.sh +. path.sh + +set -e +set -o pipefail +set -u + +stage=-1 +nnet_dir=exp/nnet3_sad_snr/nnet_tdnn_k_n4 +extra_left_context=100 +extra_right_context=20 +task=SAD +iter=final + +segmentation_stage=-1 +sil_prior=0.7 +speech_prior=0.3 +min_silence_duration=30 +min_speech_duration=10 +frame_subsampling_factor=3 +ali_dir=/export/a09/vmanoha1/workspace_asr_diarization/egs/ami/s5b/exp/ihm/nnet3_cleaned/tdnn_sp_ali_dev_ihmdata_oraclespk + +. utils/parse_options.sh + +export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH + +src_dir=/export/a09/vmanoha1/workspace_asr_diarization/egs/ami/s5b # AMI src_dir +dir=exp/sad_ami_sdm1_dev/ref + +mkdir -p $dir + +# Expecting user to have done run.sh to run the AMI recipe in $src_dir for +# both sdm and ihm microphone conditions + +if [ $stage -le 1 ]; then + ( + cd $src_dir + local/prepare_parallel_train_data.sh --train-set dev sdm1 + + awk '{print $1" "$2}' $src_dir/data/ihm/dev/segments > \ + $src_dir/data/ihm/dev/utt2reco + awk '{print $1" "$2}' $src_dir/data/sdm1/dev/segments > \ + $src_dir/data/sdm1/dev/utt2reco + + cat $src_dir/data/sdm1/dev_ihmdata/ihmutt2utt | \ + utils/apply_map.pl -f 1 $src_dir/data/ihm/dev/utt2reco | \ + utils/apply_map.pl -f 2 $src_dir/data/sdm1/dev/utt2reco | \ + sort -u > $src_dir/data/sdm1/dev_ihmdata/ihm2sdm_reco + ) +fi + +if [ $stage -le 2 ]; then + ( + cd $src_dir + + utils/copy_data_dir.sh $src_dir/data/sdm1/dev_ihmdata \ + $src_dir/data/sdm1/dev_ihmdata_oraclespk + + cut -d ' ' -f 1,2 $src_dir/data/ihm/dev/segments | \ + utils/apply_map.pl -f 1 $src_dir/data/sdm1/dev_ihmdata/ihmutt2utt > \ + $src_dir/data/sdm1/dev_ihmdata_oraclespk/utt2spk.temp + + cat $src_dir/data/sdm1/dev_ihmdata_oraclespk/utt2spk.temp | \ + awk '{print $1" "$2"-"$1}' > \ + $src_dir/data/sdm1/dev_ihmdata_oraclespk/utt2newutt + + utils/apply_map.pl -f 1 $src_dir/data/sdm1/dev_ihmdata_oraclespk/utt2newutt \ + < $src_dir/data/sdm1/dev_ihmdata_oraclespk/utt2spk.temp > \ + $src_dir/data/sdm1/dev_ihmdata_oraclespk/utt2spk + + for f in feats.scp segments text; do + utils/apply_map.pl -f 1 $src_dir/data/sdm1/dev_ihmdata_oraclespk/utt2newutt \ + < $src_dir/data/sdm1/dev_ihmdata/$f > \ + $src_dir/data/sdm1/dev_ihmdata_oraclespk/$f + done + + rm $src_dir/data/sdm1/dev_ihmdata_oraclespk/{spk2utt,cmvn.scp} + utils/fix_data_dir.sh \ + $src_dir/data/sdm1/dev_ihmdata_oraclespk + + utils/data/get_reco2utt.sh $src_dir/data/sdm1/dev_ihmdata_oraclespk + ) +fi + +phone_map=$dir/phone_map +if [ $stage -le 2 ]; then + steps/segmentation/get_sad_map.py \ + $src_dir/data/lang | utils/sym2int.pl -f 1 $src_dir/data/lang/phones.txt > \ + $phone_map +fi + +if [ -z $ali_dir ]; then + if [ $stage -le 3 ]; then + # Expecting user to have run local/run_cleanup_segmentation.sh in $src_dir + ( + cd $src_dir + steps/align_fmllr.sh --nj 18 --cmd "$train_cmd" \ + data/sdm1/dev_ihmdata_oraclespk data/lang \ + exp/ihm/tri3_cleaned \ + exp/sdm1/tri3_cleaned_dev_ihmdata_oraclespk + ) + fi + ali_dir=exp/sdm1/tri3_cleaned_ali_dev_ihmdata_oraclespk +fi + +if [ $stage -le 4 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$train_cmd" \ + $ali_dir $phone_map $dir +fi + +echo "A 1" > $dir/channel_map +cat $src_dir/data/sdm1/dev/reco2file_and_channel | \ + utils/apply_map.pl -f 3 $dir/channel_map > $dir/reco2file_and_channel + +# Map each IHM recording to a unique integer id. +# This will be the "speaker label" as each recording is assumed to have a +# single speaker. +cat $src_dir/data/sdm1/dev_ihmdata_oraclespk/reco2utt | \ + awk 'BEGIN{i=1} {print $1" "1":"i" 100000:100000"; i++;}' > \ + $src_dir/data/sdm1/dev_ihmdata_oraclespk/reco.txt + +if [ $stage -le 5 ]; then + utils/data/get_reco2num_frames.sh --frame-shift 0.01 --frame-overlap 0.015 \ + --cmd "$train_cmd" --nj 18 \ + $src_dir/data/sdm1/dev + + # Get a filter that changes the first and the last segment region outside + # the manual segmentation (usually some preparation lines) that are not + # transcribed. + $train_cmd $dir/log/interior_regions.log \ + segmentation-init-from-segments --shift-to-zero=false --frame-overlap=0.0 $src_dir/data/sdm1/dev/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/dev/reco2utt ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + "ark:segmentation-init-from-lengths --label=0 ark,t:$src_dir/data/sdm1/dev/reco2num_frames ark:- |" ark:- ark,t:- \| \ + perl -ane '$F[3] = 100000; $F[$#F-1] = 100000; print join(" ", @F) . "\n";' \| \ + segmentation-post-process --merge-labels=0:1 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --merge-labels=100000 --merge-dst-label=0 --merge-adjacent-segments \ + --max-intersegment-length=1000000 ark,t:- \ + "ark:| gzip -c > $dir/interior_regions.seg.gz" + + $train_cmd $dir/log/get_manual_segments_regions.log \ + segmentation-init-from-segments --shift-to-zero=false --frame-overlap=0.0 $src_dir/data/sdm1/dev/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/dev/reco2utt ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + "ark:segmentation-init-from-lengths --label=100000 ark,t:$src_dir/data/sdm1/dev/reco2num_frames ark:- |" ark:- ark:- \| \ + segmentation-post-process --merge-labels=100000 --merge-dst-label=0 --merge-adjacent-segments \ + --max-intersegment-length=1000000 ark,t:- \ + "ark:| gzip -c > $dir/manual_segments_regions.seg.gz" +fi + +if [ $stage -le 6 ]; then + # Reference RTTM where SPEECH frames are obtainted by combining IHM VAD alignments + $train_cmd $dir/log/get_ref_spk_seg.log \ + segmentation-combine-segments --include-missing-utt-level-segmentations scp:$dir/sad_seg.scp \ + "ark:segmentation-init-from-segments --shift-to-zero=false --frame-overlap=0.0 --label=100000 $src_dir/data/sdm1/dev_ihmdata_oraclespk/segments ark:- |" \ + ark,t:$src_dir/data/sdm1/dev_ihmdata_oraclespk/reco2utt ark:- \| \ + segmentation-post-process --remove-labels=0 ark:- ark:- \| \ + segmentation-copy --utt2label-map-rspecifier=ark,t:$src_dir/data/sdm1/dev_ihmdata_oraclespk/reco.txt \ + ark:- ark:- \| \ + segmentation-merge-recordings \ + "ark,t:utils/utt2spk_to_spk2utt.pl $src_dir/data/sdm1/dev_ihmdata/ihm2sdm_reco |" \ + ark:- "ark:| gzip -c > $dir/ref_spk_seg.gz" +fi + +if [ $stage -le 7 ]; then + # To get the actual RTTM, we need to add no-score + $train_cmd $dir/log/get_ref_spk_rttm_manual_seg.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-copy --keep-label=0 "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-labels=0 --merge-dst-label=100000 ark:- ark:- \| \ + segmentation-merge "ark:gunzip -c $dir/ref_spk_seg.gz |" ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --map-to-speech-and-sil=false --no-score-label=100000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/ref_spk_manual_seg.rttm + + $train_cmd $dir/log/get_ref_spk_rttm_interior.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-copy --keep-label=0 "ark:gunzip -c $dir/interior_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-labels=0 --merge-dst-label=100000 ark:- ark:- \| \ + segmentation-merge "ark:gunzip -c $dir/ref_spk_seg.gz |" ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --map-to-speech-and-sil=false --no-score-label=100000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/ref_spk_interior.rttm + + $train_cmd $dir/log/get_ref_rttm_manual_seg.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=100000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=100000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/ref_manual_seg.rttm + + $train_cmd $dir/log/get_ref_rttm_interior.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=100000 \ + ark:- "ark:gunzip -c $dir/interior_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=100000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/ref_interior.rttm + + # Get RTTM for overlapped speech detection with 3 classes + # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP + $train_cmd $dir/log/get_overlapping_rttm_manual_seg.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=100000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=100000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/overlapping_speech_ref_manual_seg.rttm + + $train_cmd $dir/log/get_overlapping_rttm_manual_seg.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=100000 \ + ark:- "ark:gunzip -c $dir/interior_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=100000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl'>' $dir/overlapping_speech_ref_interior.rttm +fi + +exit 0 + +if [ $stage -le 8 ]; then + # Get a filter that selects only regions of speech + $train_cmd $dir/log/get_speech_filter.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=0 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 \ + ark:- "ark:| gzip -c > $dir/manual_segments_speech_regions.seg.gz" +fi + +hyp_dir=${nnet_dir}/segmentation_ami_sdm1_dev_whole_bp/ami_sdm1_dev + +if [ $stage -le 9 ]; then + steps/segmentation/do_segmentation_data_dir.sh --reco-nj 18 \ + --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --do-downsampling true \ + --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ + --output-name output-speech --frame-subsampling-factor $frame_subsampling_factor --iter $iter \ + --stage $segmentation_stage \ + $src_dir/data/sdm1/dev $nnet_dir mfcc_hires_bp $hyp_dir +fi + +sad_dir=${nnet_dir}/sad_ami_sdm1_dev_whole_bp/ +hyp_dir=${hyp_dir}_seg + +if [ $stage -le 10 ]; then + utils/data/get_reco2utt.sh $src_dir/data/sdm1/dev_ihmdata_oraclespk + utils/data/get_reco2utt.sh $hyp_dir + + segmentation-init-from-segments --shift-to-zero=false --frame-overlap=0.0 $hyp_dir/segments ark:- | \ + segmentation-combine-segments-to-recordings ark:- ark,t:$hyp_dir/reco2utt ark:- | \ + segmentation-to-ali --length-tolerance=48 --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + ark:- ark:- | \ + segmentation-init-from-ali ark:- ark:- | \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel ark:- $hyp_dir/sys.rttm + + #steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \ + # $hyp_dir/utt2spk \ + # $hyp_dir/segments \ + # $dir/reco2file_and_channel \ + # /dev/stdout | spkr2sad.pl > $hyp_dir/sys.rttm +fi + +if [ $stage -le 11 ]; then + cat < $likes_dir/log_likes.JOB.gz" + cp $sad_dir/num_jobs $likes_dir + fi + else + if [ $stage -le 12 ]; then + steps/segmentation/do_segmentation_data_dir_generic.sh --reco-nj 18 \ + --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --do-downsampling true \ + --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ + --segmentation-config conf/segmentation_ovlp.conf \ + --output-name output-overlapping_sad \ + --min-durations 30:10:10 --priors 0.5:0.35:0.15 \ + --sad-name ovlp_sad --segmentation-name segmentation_ovlp_sad \ + --frame-subsampling-factor $frame_subsampling_factor --iter $iter \ + --stage $segmentation_stage \ + $src_dir/data/sdm1/dev $nnet_dir mfcc_hires_bp $hyp_dir + fi + + likes_dir=${nnet_dir}/ovlp_sad_ami_sdm1_dev_whole_bp/ + fi + + hyp_dir=${hyp_dir}_seg + mkdir -p $hyp_dir + + seg_dir=${nnet_dir}/segmentation_ovlp_sad_ami_sdm1_dev_whole_bp/ + lang=${seg_dir}/lang + + if [ $stage -le 14 ]; then + mkdir -p $lang + steps/segmentation/internal/prepare_sad_lang.py \ + --phone-transition-parameters="--phone-list=1 --min-duration=10 --end-transition-probability=0.1" \ + --phone-transition-parameters="--phone-list=2 --min-duration=3 --end-transition-probability=0.1" \ + --phone-transition-parameters="--phone-list=3 --min-duration=3 --end-transition-probability=0.1" $lang + cp $lang/phones.txt $lang/words.txt + + feat_dim=2 # dummy. We don't need this. + $train_cmd $seg_dir/log/create_transition_model.log gmm-init-mono \ + $lang/topo $feat_dim - $seg_dir/tree \| \ + copy-transition-model --binary=false - $seg_dir/trans.mdl || exit 1 +fi + + if [ $stage -le 15 ]; then + + cat > $lang/word2prior < $lang/G.fst +fi + + if [ $stage -le 16 ]; then + $train_cmd $seg_dir/log/make_vad_graph.log \ + steps/segmentation/internal/make_sad_graph.sh --iter trans \ + $lang $seg_dir $seg_dir/graph_test || exit 1 + fi + + if [ $stage -le 17 ]; then + steps/segmentation/decode_sad.sh \ + --acwt 1 --beam 10 --max-active 7000 --iter trans \ + $seg_dir/graph_test $likes_dir $seg_dir + fi + + if [ $stage -le 18 ]; then + cat < $hyp_dir/labels_map +1 0 +2 1 +3 2 +EOF + gunzip -c $seg_dir/ali.*.gz | \ + segmentation-init-from-ali ark:- ark:- | \ + segmentation-copy --frame-subsampling-factor=$frame_subsampling_factor \ + --label-map=$hyp_dir/labels_map ark:- ark:- | \ + segmentation-to-rttm --map-to-speech-and-sil=false \ + --reco2file-and-channel=$dir/reco2file_and_channel ark:- $hyp_dir/sys.rttm + fi + # Get RTTM for overlapped speech detection with 3 classes + # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP + $train_cmd $dir/log/get_overlapping_rttm.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- $dir/overlapping_speech_ref.rttm + + if [ $stage -le 19 ]; then + cat < \ + $src_dir/data/ihm/train/utt2reco + awk '{print $1" "$2}' $src_dir/data/sdm1/train/segments > \ + $src_dir/data/sdm1/train/utt2reco + + cat $src_dir/data/sdm1/train_ihmdata/ihmutt2utt | \ + utils/apply_map.pl -f 1 $src_dir/data/ihm/train/utt2reco | \ + utils/apply_map.pl -f 2 $src_dir/data/sdm1/train/utt2reco | \ + sort -u > $src_dir/data/sdm1/train_ihmdata/ihm2sdm_reco + ) +fi + +if [ $stage -le 2 ]; then + ( + cd $src_dir + + utils/copy_data_dir.sh $src_dir/data/sdm1/train_ihmdata \ + $src_dir/data/sdm1/train_ihmdata_oraclespk + + cat $src_dir/data/ihm/train/utt2spk | \ + utils/apply_map.pl -f 1 $src_dir/data/sdm1/train_ihmdata/ihmutt2utt > \ + $src_dir/data/sdm1/train_ihmdata_oraclespk/utt2spk.temp + + cat $src_dir/data/sdm1/train_ihmdata_oraclespk/utt2spk.temp | \ + awk '{print $1" "$2"-"$1}' > \ + $src_dir/data/sdm1/train_ihmdata_oraclespk/utt2newutt + + utils/apply_map.pl -f 1 $src_dir/data/sdm1/train_ihmdata_oraclespk/utt2newutt \ + < $src_dir/data/sdm1/train_ihmdata_oraclespk/utt2spk.temp > \ + $src_dir/data/sdm1/train_ihmdata_oraclespk/utt2spk + + for f in feats.scp segments text; do + utils/apply_map.pl -f 1 $src_dir/data/sdm1/train_ihmdata_oraclespk/utt2newutt \ + < $src_dir/data/sdm1/train_ihmdata/$f > \ + $src_dir/data/sdm1/train_ihmdata_oraclespk/$f + done + + rm $src_dir/data/sdm1/train_ihmdata_oraclespk/{spk2utt,cmvn.scp} + utils/fix_data_dir.sh \ + $src_dir/data/sdm1/train_ihmdata_oraclespk + + utils/data/get_reco2utt.sh $src_dir/data/sdm1/train_ihmdata_oraclespk + ) +fi + +phone_map=$dir/phone_map +if [ $stage -le 2 ]; then + steps/segmentation/get_sad_map.py \ + $src_dir/data/lang | utils/sym2int.pl -f 1 $src_dir/data/lang/phones.txt > \ + $phone_map +fi + +if [ -z $ali_dir ]; then + if [ $stage -le 3 ]; then + # Expecting user to have run local/run_cleanup_segmentation.sh in $src_dir + ( + cd $src_dir + steps/align_fmllr.sh --nj 18 --cmd "$train_cmd" \ + data/sdm1/train_ihmdata_oraclespk data/lang \ + exp/ihm/tri3_cleaned \ + exp/sdm1/tri3_cleaned_train_ihmdata_oraclespk + ) + fi + ali_dir=exp/sdm1/tri3_cleaned_ali_train_ihmdata_oraclespk +fi + +if [ $stage -le 4 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$train_cmd" \ + $ali_dir $phone_map $dir +fi + +echo "A 1" > $dir/channel_map +cat $src_dir/data/sdm1/train/reco2file_and_channel | \ + utils/apply_map.pl -f 3 $dir/channel_map > $dir/reco2file_and_channel + +# Map each IHM recording to a unique integer id. +# This will be the "speaker label" as each recording is assumed to have a +# single speaker. +cat $src_dir/data/sdm1/train_ihmdata_oraclespk/reco2utt | \ + awk 'BEGIN{i=1} {print $1" "1":"i; i++;}' > \ + $src_dir/data/sdm1/train_ihmdata_oraclespk/reco.txt +if [ $stage -le 5 ]; then + utils/data/get_reco2num_frames.sh --frame-shift 0.01 --frame-overlap 0.015 \ + --cmd "$train_cmd" --nj 18 \ + $src_dir/data/sdm1/train + + # Get a filter that changes the first and the last segment region outside + # the manual segmentation (usually some preparation lines) that are not + # transcribed. + $train_cmd $dir/log/interior_regions.log \ + segmentation-init-from-segments --shift-to-zero=false --frame-overlap=0.0 $src_dir/data/sdm1/train/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/train/reco2utt ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + "ark:segmentation-init-from-lengths --label=0 ark,t:$src_dir/data/sdm1/train/reco2num_frames ark:- |" ark:- ark,t:- \| \ + perl -ane '$F[3] = 10000; $F[$#F-1] = 10000; print join(" ", @F) . "\n";' \| \ + segmentation-post-process --merge-labels=0:1 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --merge-labels=10000 --merge-dst-label=0 --merge-adjacent-segments \ + --max-intersegment-length=1000000 ark,t:- \ + "ark:| gzip -c > $dir/interior_regions.seg.gz" + + $train_cmd $dir/log/get_manual_segments_regions.log \ + segmentation-init-from-segments --shift-to-zero=false --frame-overlap=0.0 $src_dir/data/sdm1/train/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/train/reco2utt ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + "ark:segmentation-init-from-lengths --label=0 ark,t:$src_dir/data/sdm1/train/reco2num_frames ark:- |" ark:- ark,t:- \| \ + perl -ane '$F[3] = 10000; $F[$#F-1] = 10000; print join(" ", @F) . "\n";' \| \ + segmentation-post-process --merge-labels=0:1 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --merge-labels=10000 --merge-dst-label=0 --merge-adjacent-segments \ + --max-intersegment-length=10000 ark,t:- \ + "ark:| gzip -c > $dir/manual_segments_regions.seg.gz" +fi + +if [ $stage -le 6 ]; then + # Reference RTTM where SPEECH frames are obtainted by combining IHM VAD alignments + $train_cmd $dir/log/get_ref_spk_seg.log \ + segmentation-combine-segments scp:$dir/sad_seg.scp \ + "ark:segmentation-init-from-segments --shift-to-zero=false --frame-overlap=0.0 $src_dir/data/sdm1/train_ihmdata_oraclespk/segments ark:- |" \ + ark,t:$src_dir/data/sdm1/train_ihmdata_oraclespk/reco2utt ark:- \| \ + segmentation-copy --keep-label=1 ark:- ark:- \| \ + segmentation-copy --utt2label-map-rspecifier=ark,t:$src_dir/data/sdm1/train_ihmdata/reco.txt \ + ark:- ark:- \| \ + segmentation-merge-recordings \ + "ark,t:utils/utt2spk_to_spk2utt.pl $src_dir/data/sdm1/train_ihmdata/ihm2sdm_reco |" \ + ark:- "ark:| gzip -c > $dir/ref_spk_seg.gz" +fi + +if [ $stage -le 7 ]; then + # To get the actual RTTM, we need to add no-score + $train_cmd $dir/log/get_ref_spk_rttm_manual_seg.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-copy --keep-label=0 "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-labels=0 --merge-dst-label=10000 ark:- ark:- \| \ + segmentation-merge "ark:gunzip -c $dir/ref_spk_seg.gz |" ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --map-to-speech-and-sil=false --no-score-label=10000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/ref_spk_manual_seg.rttm + + $train_cmd $dir/log/get_ref_spk_rttm_interior.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-copy --keep-label=0 "ark:gunzip -c $dir/interior_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-labels=0 --merge-dst-label=10000 ark:- ark:- \| \ + segmentation-merge "ark:gunzip -c $dir/ref_spk_seg.gz |" ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --map-to-speech-and-sil=false --no-score-label=10000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/ref_spk_interior.rttm + + $train_cmd $dir/log/get_ref_rttm_manual_seg.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/train/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/ref_manual_seg.rttm + + $train_cmd $dir/log/get_ref_rttm_interior.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/train/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/interior_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/ref_interior.rttm + + # Get RTTM for overlapped speech detection with 3 classes + # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP + $train_cmd $dir/log/get_overlapping_rttm_manual_seg.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/train/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl '>' $dir/overlapping_speech_ref_manual_seg.rttm + + $train_cmd $dir/log/get_overlapping_rttm_manual_seg.log \ + export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH '&&' \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/train/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/interior_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- - \| \ + rttmSmooth.pl -s 0 \| rttmSort.pl'>' $dir/overlapping_speech_ref_interior.rttm +fi + +exit 0 + +if [ $stage -le 8 ]; then + # Get a filter that selects only regions of speech + $train_cmd $dir/log/get_speech_filter.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/train/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=0 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 \ + ark:- "ark:| gzip -c > $dir/manual_segments_speech_regions.seg.gz" +fi + +hyp_dir=${nnet_dir}/segmentation_ovlp_ami_sdm1_train_whole_bp/ami_sdm1_train + +if [ $stage -le 12 ]; then + steps/segmentation/do_segmentation_data_dir_generic.sh --reco-nj 18 \ + --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --do-downsampling true \ + --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ + --segmentation-config conf/segmentation_ovlp.conf \ + --output-name output-overlapping_sad \ + --min-durations 30:10:10 --priors 0.5:0.35:0.15 \ + --sad-name ovlp_sad --segmentation-name segmentation_ovlp_sad \ + --frame-subsampling-factor $frame_subsampling_factor --iter $iter \ + --stage $segmentation_stage \ + $src_dir/data/sdm1/train $nnet_dir mfcc_hires_bp $hyp_dir +fi + +likes_dir=${nnet_dir}/ovlp_sad_ami_sdm1_train_whole_bp/ + +hyp_dir=${hyp_dir}_seg +mkdir -p $hyp_dir + +seg_dir=${nnet_dir}/segmentation_ovlp_sad_ami_sdm1_train_whole_bp/ +lang=${seg_dir}/lang + +if [ $stage -le 14 ]; then +mkdir -p $lang +steps/segmentation/internal/prepare_sad_lang.py \ + --phone-transition-parameters="--phone-list=1 --min-duration=10 --end-transition-probability=0.1" \ + --phone-transition-parameters="--phone-list=2 --min-duration=3 --end-transition-probability=0.1" \ + --phone-transition-parameters="--phone-list=3 --min-duration=3 --end-transition-probability=0.1" $lang +cp $lang/phones.txt $lang/words.txt + +feat_dim=2 # dummy. We don't need this. +$train_cmd $seg_dir/log/create_transition_model.log gmm-init-mono \ + $lang/topo $feat_dim - $seg_dir/tree \| \ + copy-transition-model --binary=false - $seg_dir/trans.mdl || exit 1 +fi + +if [ $stage -le 15 ]; then + +cat > $lang/word2prior < $lang/G.fst +fi + +if [ $stage -le 16 ]; then + $train_cmd $seg_dir/log/make_vad_graph.log \ + steps/segmentation/internal/make_sad_graph.sh --iter trans \ + $lang $seg_dir $seg_dir/graph_test || exit 1 +fi + +if [ $stage -le 17 ]; then + steps/segmentation/decode_sad.sh \ + --acwt 1 --beam 10 --max-active 7000 \ + $seg_dir/graph_test $likes_dir $seg_dir +fi + +if [ $stage -le 18 ]; then + cat < $hyp_dir/labels_map +1 0 +2 1 +3 2 +EOF + gunzip -c $seg_dir/ali.*.gz | \ + segmentation-init-from-ali ark:- ark:- | \ + segmentation-copy --frame-subsampling-factor=$frame_subsampling_factor \ + --label-map=$hyp_dir/labels_map ark:- ark:- | \ + segmentation-to-rttm --map-to-speech-and-sil=false \ + --reco2file-and-channel=$dir/reco2file_and_channel ark:- $hyp_dir/sys.rttm +fi +# Get RTTM for overlapped speech detection with 3 classes +# 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP +$train_cmd $dir/log/get_overlapping_rttm.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/train/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- $dir/overlapping_speech_ref.rttm + +if [ $stage -le 19 ]; then + cat < 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_ovlp_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 input=Append(input@-2, input@-1, input, input@1, input@2) dim=256 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=256 + fast-lstmp-layer name=lstm1 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=256 + fast-lstmp-layer name=lstm2 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-6 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.05 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=lstm2 max-change=0.75 learning-rate-factor=0.5 + + output-layer name=output-overlapping_sad include-log-softmax=true dim=3 objective-scale=$ovlp_scale input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapping_sad.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_sad_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapping_sad --target-type=sparse --dim=3 --targets-scp=$ovlp_sad_data_dir/overlapping_sad_labels_fixed.scp --deriv-weights-scp=$ovlp_sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1a.sh new file mode 100644 index 00000000000..4f0754d8355 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1a.sh @@ -0,0 +1,267 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component in the 3rd layer. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=80 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +music_data_dir=data/train_aztec_unsad_whole_music_corrupted_sp_hires_bp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn5 + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25)"` input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn5 + + output name=output-temp input=Append(input@-2,input@-1,input,input@1,input@2) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1b.sh new file mode 100644 index 00000000000..cbbb016607a --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1b.sh @@ -0,0 +1,265 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component in the 3rd layer. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=80 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +music_data_dir=data/train_aztec_unsad_whole_music_corrupted_sp_hires_bp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn3 + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25)"` input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1c.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1c.sh new file mode 100644 index 00000000000..53c2a7a47ac --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1c.sh @@ -0,0 +1,265 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component in the 3rd layer. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=80 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +music_data_dir=data/train_aztec_unsad_whole_music_corrupted_sp_hires_bp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=tdnn3 + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1e.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1e.sh new file mode 100644 index 00000000000..dfb1297c895 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1e.sh @@ -0,0 +1,269 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + #--targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1f.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1f.sh new file mode 100644 index 00000000000..782a31132c6 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1f.sh @@ -0,0 +1,291 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + +cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + #--targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1g.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1g.sh new file mode 100644 index 00000000000..eea5956e005 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1g.sh @@ -0,0 +1,291 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1g + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + +cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + #--targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1h.sh new file mode 100644 index 00000000000..d9e1966bf6a --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1h.sh @@ -0,0 +1,291 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1h + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + +cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + #--targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1i.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1i.sh new file mode 100644 index 00000000000..be568eefd97 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1i.sh @@ -0,0 +1,308 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1i + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1h.sh new file mode 100644 index 00000000000..ae85a93a7fc --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1h.sh @@ -0,0 +1,306 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1h + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + relu-renorm-layer name=tdnn3-snr input=Append(lstm1@-12,lstm1@0,lstm1@12,tdnn3) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn3 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn3-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1i.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1i.sh new file mode 100644 index 00000000000..b6c43a92992 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1i.sh @@ -0,0 +1,315 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1i + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(lstm1@-6,lstm1@0,lstm1@6,lstm1@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1j.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1j.sh new file mode 100644 index 00000000000..bf397565148 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1j.sh @@ -0,0 +1,312 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1i, but removes the speech-music output. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1j + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(lstm1@-6,lstm1@0,lstm1@6,lstm1@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1k.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1k.sh new file mode 100644 index 00000000000..cb585523f74 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1k.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1k + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(lstm2@-6,lstm2@0,lstm2@6,lstm2@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1l.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1l.sh new file mode 100644 index 00000000000..d8910053e61 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1l.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1k + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(lstm2@-6,lstm2@0,lstm2@6,lstm2@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_overlap_1a.sh new file mode 100755 index 00000000000..adc4fc81c08 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_overlap_1a.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This scripts is similar to 1f but adds max-change=0.75 and learning-rate-factor=0.02 to the final affine. +# And changed relu-dim to 512. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=512 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=512 + relu-renorm-layer name=tdnn4 dim=512 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_1a.sh new file mode 100755 index 00000000000..52a15686d28 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_1a.sh @@ -0,0 +1,259 @@ +#!/bin/bash + +# This is a script to train a LSTM for overlapped speech activity detection +# and SAD. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 input=Append(input@-2, input@-1, input, input@1, input@2) dim=512 + lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=512 + lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-6 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.05 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=lstm2 max-change=0.75 learning-rate-factor=0.5 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 objective-scale=$ovlp_scale input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1a.sh new file mode 100644 index 00000000000..d003f746c4b --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1a.sh @@ -0,0 +1,192 @@ +#!/bin/bash + +# This is a script to train a LSTM for overlapped speech activity detection +# and SAD. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=128 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=8 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +data_dir=data/ami_sdm1_train_whole_hires_bp +labels_scp=exp/sad_ami_sdm1_train/ref/overlapping_sad_labels.scp +deriv_weights_scp=exp/sad_ami_sdm1_train/ref/deriv_weights_for_overlapping_sad.scp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); $n = ($n > 4000 ? 4000 : $n); print ($n < 6 ? 6 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); $n = ($n > 4000 ? 4000 : $n); print ($n < 6 ? 6 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_ovlp_sad_ami/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 input=Append(input@-2, input@-1, input, input@1, input@2) dim=256 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=256 + lstmp-layer name=lstm1 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6) dim=256 + lstmp-layer name=lstm2 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-6 + + output-layer name=output-overlapping_sad include-log-softmax=true dim=3 input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapping_sad.txt learning-rate-factor=0.05 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapping_sad new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_overlapping_sad + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_overlapping_sad/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_overlapping_sad/storage $dir/egs_overlapping_sad/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-overlapping_sad --target-type=sparse --dim=3 --targets-scp=$labels_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"ali-to-post scp:- ark: |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_overlapping_sad + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=false --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$data_dir \ + --targets-scp="$labels_scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1b.sh new file mode 100644 index 00000000000..3aa4f28f99a --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1b.sh @@ -0,0 +1,192 @@ +#!/bin/bash + +# This is a script to train a LSTM for overlapped speech activity detection +# and SAD. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=128 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=8 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +data_dir=data/ami_sdm1_train_whole_hires_bp +labels_scp=exp/sad_ami_sdm1_train/ref/overlapping_sad_labels.scp +deriv_weights_scp=exp/sad_ami_sdm1_train/ref/deriv_weights_for_overlapping_sad.scp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); $n = ($n > 4000 ? 4000 : $n); print ($n < 6 ? 6 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); $n = ($n > 4000 ? 4000 : $n); print ($n < 6 ? 6 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_ovlp_sad_ami/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 input=Append(input@-2, input@-1, input, input@1, input@2) dim=256 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=256 + lstmp-layer name=lstm1 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6) dim=256 + lstmp-layer name=lstm2 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-6 + + output-layer name=output-overlapping_sad include-log-softmax=true dim=3 input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapping_sad.txt learning-rate-factor=0.05 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapping_sad new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_overlapping_sad + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_overlapping_sad/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_overlapping_sad/storage $dir/egs_overlapping_sad/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-overlapping_sad --target-type=sparse --dim=3 --targets-scp=$labels_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"ali-to-post scp:- ark: |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_overlapping_sad + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=false --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$data_dir \ + --targets-scp="$labels_scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1a.sh new file mode 100755 index 00000000000..e63c5d8a063 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1a.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +# This is a script to train a lstm for overlapped speech activity detection. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 dim=256 input=Append(input@-2, input@-1, input, input@1, input@2) + lstmp-layer name=lstm1 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=256 + lstmp-layer name=lstm2 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-6 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1b.sh new file mode 100755 index 00000000000..15235882f90 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1b.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +# This is a script to train a LSTM for overlapped speech activity detection. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 dim=512 input=Append(input@-2, input@-1, input, input@1, input@2) + lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=512 + lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-6 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1f.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1f.sh new file mode 100755 index 00000000000..2201f9fd8d1 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1f.sh @@ -0,0 +1,200 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1g.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1g.sh new file mode 100755 index 00000000000..81febb5fa09 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1g.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This scripts is similar to 1f but adds max-change=0.75 and learning-rate-factor=0.1 to the final affine. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.1 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1h.sh new file mode 100755 index 00000000000..adc4fc81c08 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1h.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This scripts is similar to 1f but adds max-change=0.75 and learning-rate-factor=0.02 to the final affine. +# And changed relu-dim to 512. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=512 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=512 + relu-renorm-layer name=tdnn4 dim=512 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1i.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1i.sh new file mode 100755 index 00000000000..dcd11ad2aa6 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1i.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This scripts is similar to 1f but adds max-change=0.75 and learning-rate-factor=0.02 to the final affine. +# Similar to 1g but moved stats pooling to higher layer. Changed splicing to -12 from -9. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=90 # Maximum left context in egs apart from TDNN's left context +extra_right_context=15 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=512 + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1) dim=512 + stats-layer name=tdnn3_stats config=mean+count(-96:6:12:96) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-12,tdnn2,tdnn2@6, tdnn3_stats) dim=512 + relu-renorm-layer name=tdnn4 dim=512 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1a.sh new file mode 100755 index 00000000000..8242b83c747 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1a.sh @@ -0,0 +1,172 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +splice_indexes="-3,-2,-1,0,1,2,3 -6,0,mean+count(-99:3:9:99) -9,0,3 0" +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +speech_feat_scp= +music_labels_scp= + +deriv_weights_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_hidden_layers=`echo $splice_indexes | perl -ane 'print scalar @F'` || exit 1 +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix}_n${num_hidden_layers} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + # This is disabled for now. + # fixed-affine-layer name=lda input=Append(-3,-2,-1,0,1,2,3) affine-transform-file=$dir/configs/lda.mat + # the first splicing is moved before the lda layer, so no splicing here + # relu-renorm-layer name=tdnn1 dim=625 + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=256 + stats-layer name=tdnn2.stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(Offset(tdnn1, -6), tdnn1, tdnn2.stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn4 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn4 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ +fi + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$train_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=20000 \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --dir=$dir/egs + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=20 \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=64 \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh new file mode 100755 index 00000000000..163ea6df14d --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh @@ -0,0 +1,185 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +speech_feat_scp= +music_labels_scp= + +deriv_weights_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3, tdnn2_stats) dim=256 + stats-layer name=tdnn3_stats config=mean+count(-108:9:27:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9, tdnn3_stats) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn4 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn4 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$train_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=20000 \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --dir=$dir/egs + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=20 \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=64 \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh new file mode 100755 index 00000000000..a013fcc49a7 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +speech_feat_scp= +music_labels_scp= + +deriv_weights_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3, tdnn2_stats) dim=256 + stats-layer name=tdnn3_stats config=mean+count(-108:9:27:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9, tdnn3_stats) dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$train_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=20000 \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --dir=$dir/egs + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=20 \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=64 \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh new file mode 100755 index 00000000000..703865b8ad5 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh @@ -0,0 +1,229 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1d, but add add-log-stddev to norm layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=79 # Maximum left context in egs apart from TDNN's left context +extra_right_context=11 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=79 +min_extra_right_context=11 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_aztec_small_unsad_whole_all_corrupted_sp_hires_bp + +speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/speech_feat.scp +deriv_weights_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights.scp +music_labels_scp=data/train_aztec_small_unsad_whole_music_corrupted_sp_hires_bp/music_labels.scp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 add-log-stddev=true + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3, tdnn2_stats) dim=256 add-log-stddev=true + stats-layer name=tdnn3_stats config=mean+count(-108:9:27:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9, tdnn3_stats) dim=256 add-log-stddev=true + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` +speech_data_dir=$dir/`basename $train_data_dir`_speech +music_data_dir=$dir/`basename $train_data_dir`_music + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + + . $dir/configs/vars + + utils/subset_data_dir.sh --utt-list $speech_feat_scp ${train_data_dir} $dir/`basename ${train_data_dir}`_speech + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$speech_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + utils/subset_data_dir.sh --utt-list $music_labels_scp ${train_data_dir} $dir/`basename ${train_data_dir}`_music + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + steps/nnet3/multilingual/get_egs.sh \ + --minibatch-size $[chunk_width * num_chunk_per_minibatch] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1f.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1f.sh new file mode 100755 index 00000000000..0afdd0072ac --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1f.sh @@ -0,0 +1,227 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=79 # Maximum left context in egs apart from TDNN's left context +extra_right_context=11 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_aztec_small_unsad_whole_all_corrupted_sp_hires_bp + +speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/speech_feat.scp +deriv_weights_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights.scp +music_labels_scp=data/train_aztec_small_unsad_whole_music_corrupted_sp_hires_bp/music_labels.scp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3) dim=256 add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9) dim=256 add-log-stddev=true + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` +speech_data_dir=$dir/`basename $train_data_dir`_speech +music_data_dir=$dir/`basename $train_data_dir`_music + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + + . $dir/configs/vars + + utils/subset_data_dir.sh --utt-list $speech_feat_scp ${train_data_dir} $dir/`basename ${train_data_dir}`_speech + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$speech_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + utils/subset_data_dir.sh --utt-list $music_labels_scp ${train_data_dir} $dir/`basename ${train_data_dir}`_music + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + steps/nnet3/multilingual/get_egs.sh \ + --minibatch-size $[chunk_width * num_chunk_per_minibatch] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1g.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1g.sh new file mode 100755 index 00000000000..e411b94c893 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1g.sh @@ -0,0 +1,234 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component in the 3rd layer. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=79 # Maximum left context in egs apart from TDNN's left context +extra_right_context=11 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +music_data_dir=data/train_aztec_unsad_whole_music_corrupted_sp_hires_bp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 add-log-stddev=true + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3, tdnn2_stats) dim=256 add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9) dim=256 add-log-stddev=true + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn3 + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn3 objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25)"` + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1h.sh new file mode 100644 index 00000000000..e585f27e5fd --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1h.sh @@ -0,0 +1,310 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1h + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + stats-layer name=tdnn2_stats config=mean+count(-108:6:18:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-12,tdnn2@0,tdnn2@12,tdnn2_stats) dim=$relu_dim + stats-layer name=tdnn3_stats config=mean+count(-108:12:36:108) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-12,tdnn3@0,tdnn3@12,tdnn3_stats) dim=$relu_dim + relu-renorm-layer name=tdnn4-snr input=Append(tdnn3@-12,tdnn3@0,tdnn3@12,tdnn3_stats) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn4 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn4 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn4 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn4-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1i.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1i.sh new file mode 100644 index 00000000000..3ddcdd795db --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1i.sh @@ -0,0 +1,310 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1i + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil,amharic}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil,amharic}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + stats-layer name=tdnn2_stats config=mean+count(-108:6:18:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-12,tdnn2@0,tdnn2@12,tdnn2_stats) dim=$relu_dim + stats-layer name=tdnn3_stats config=mean+count(-108:12:36:108) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-12,tdnn3@0,tdnn3@12,tdnn3_stats) dim=$relu_dim + relu-renorm-layer name=tdnn4-snr input=Append(tdnn3@-12,tdnn3@0,tdnn3@12,tdnn3_stats) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn4 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn4 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn4 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn4-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1j.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1j.sh new file mode 100644 index 00000000000..059fbf7b1a9 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1j.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1j + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + stats-layer name=tdnn3_stats config=mean+stddev+count(-99:3:9:99) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn3_stats) add-log-stddev=true dim=$relu_dim + stats-layer name=tdnn4_stats config=mean+stddev+count(-108:6:18:108) + relu-renorm-layer name=tdnn5 input=Append(tdnn4@-12,tdnn4@0,tdnn4@12,tdnn4@24,tdnn4_stats) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1k.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1k.sh new file mode 100644 index 00000000000..48425e50386 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1k.sh @@ -0,0 +1,317 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1k + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + stats-layer name=tdnn3_stats config=mean+stddev+count(-99:3:9:99) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn3_stats) add-log-stddev=true dim=$relu_dim + stats-layer name=tdnn4_stats config=mean+stddev+count(-108:6:18:108) + relu-renorm-layer name=tdnn5 input=Append(tdnn4@-12,tdnn4@0,tdnn4@12,tdnn4@24,tdnn4_stats) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1l.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1l.sh new file mode 100644 index 00000000000..689c31e623a --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1l.sh @@ -0,0 +1,318 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1l + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + stats-layer name=tdnn3_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn3_stats) add-log-stddev=true dim=$relu_dim + stats-layer name=tdnn4_stats config=mean+count(-108:6:18:108) + relu-renorm-layer name=tdnn5 input=Append(tdnn4@-12,tdnn4@0,tdnn4@12,tdnn4@24,tdnn4_stats) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh new file mode 100755 index 00000000000..c8a7c887fef --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh @@ -0,0 +1,206 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=1 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +# target options +train_data_dir=data/train_aztec_small_unsad_a +speech_feat_scp=data/train_aztec_small_unsad_a/speech_feat.scp +deriv_weights_scp=data/train_aztec_small_unsad_a/deriv_weights.scp + +#train_data_dir=data/train_aztec_small_unsad_whole_sad_ovlp_corrupted_sp +#speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/speech_feat.scp +#deriv_weights_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights.scp +#data/train_aztec_small_unsad_whole_all_corrupted_sp_hires_bp + +# Only for SAD +snr_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/irm_targets.scp +deriv_weights_for_irm_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/deriv_weights_manual_seg.scp + +# Only for overlapped speech detection +deriv_weights_for_overlapped_speech_scp=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp/deriv_weights_for_overlapped_speech.scp +overlapped_speech_labels_scp=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp/overlapped_speech_labels.scp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $train_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + + relu-renorm-layer name=pre-final-speech dim=256 input=tdnn3 + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e 'print (1.0/6)'` + + relu-renorm-layer name=pre-final-snr dim=256 input=tdnn3 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print 1.0/$num_snr_bins"` + + relu-renorm-layer name=pre-final-overlapped_speech dim=256 input=tdnn3 + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$train_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=20000 \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$snr_scp --deriv-weights-scp=$deriv_weights_for_irm_scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$overlapped_speech_labels_scp --deriv-weights-scp=$deriv_weights_for_overlapped_speech_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --dir=$dir/egs + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=128 \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-overlapped_speech\ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $overlapped_speech_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-overlapped_speech.vec.JOB + eval vector-sum $dir/post_output-overlapped_speech.vec.{`seq -s, 100`} $dir/post_output-overlapped_speech.vec +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh new file mode 100755 index 00000000000..b562a83f6c3 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh @@ -0,0 +1,240 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print ($num_frames_ovlp / $num_frames_sad) ** 0.25"` input=tdnn4 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_ovlp / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn4 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1c.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1c.sh new file mode 100755 index 00000000000..7041b0b3e9b --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1c.sh @@ -0,0 +1,239 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print ($num_frames_ovlp / $num_frames_sad) ** 0.25"` input=tdnn4 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_ovlp / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn4 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1d.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1d.sh new file mode 100755 index 00000000000..a361435baa1 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1d.sh @@ -0,0 +1,262 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=d + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print ($num_frames_ovlp / $num_frames_sad) ** 0.25"` input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_ovlp / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn4 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1f.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1f.sh new file mode 100755 index 00000000000..7048c40f62b --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1f.sh @@ -0,0 +1,272 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=d + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=tdnn4 max-change=0.75 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 objective-scale=$ovlp_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1g.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1g.sh new file mode 100755 index 00000000000..72e26b5347b --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1g.sh @@ -0,0 +1,275 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This script is same as 1e but adds max-change=0.75 for snr and overlapped_speech outputs +# and learning rate factor 0.1 for the final affine components. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=g + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=tdnn4 max-change=0.75 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 objective-scale=$ovlp_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.1 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1h.sh new file mode 100755 index 00000000000..fb1616b9ac7 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1h.sh @@ -0,0 +1,276 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This script is same as 1e but adds max-change=0.75 for snr and overlapped_speech outputs +# and learning rate factor 0.01 for the final affine components. +# Decreased learning rate factor of overlapped speech to 0.025 and 0.05 for speech. +# Changed relu-dim to 512 + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=g + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=512 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=512 + relu-renorm-layer name=tdnn4 dim=512 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.05 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=tdnn4 max-change=0.75 learning-rate-factor=0.5 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 objective-scale=$ovlp_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.025 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/path.sh b/egs/aspire/s5/path.sh index 1a6fb5f891b..7fb6d91c543 100755 --- a/egs/aspire/s5/path.sh +++ b/egs/aspire/s5/path.sh @@ -2,4 +2,5 @@ export KALDI_ROOT=`pwd`/../../.. export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh +export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH export LC_ALL=C diff --git a/egs/aurora4/s5/local/run_sgmm.sh b/egs/aurora4/s5/local/run_sgmm.sh deleted file mode 100755 index 62be4d83774..00000000000 --- a/egs/aurora4/s5/local/run_sgmm.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/bash - -# This script is invoked from ../run.sh -# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity. - -. cmd.sh - -# SGMM system on si84 data [sgmm5a]. Note: the system we aligned from used the si284 data for -# training, but this shouldn't have much effect. - -( - steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1; - - steps/train_ubm.sh --cmd "$train_cmd" \ - 400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \ - exp/ubm5b/final.ubm exp/sgmm5a || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5a/graph_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \ - --use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm5a exp/sgmm5a_ali_si84 || exit 1; - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1/decode_tgpr_dev93_it$iter & - done - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - --update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1_m0.9 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter & - done - -) & - - -( -# The next commands are the same thing on all the si284 data. - -# SGMM system on the si284 data [sgmm5b] - steps/train_ubm.sh --cmd "$train_cmd" \ - 600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5b || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5b exp/sgmm5b/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5b/graph_tgpr data/test_dev93 exp/sgmm5b/decode_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \ - exp/sgmm5b/graph_tgpr data/test_eval92 exp/sgmm5b/decode_tgpr_eval92 - - utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm5b exp/sgmm5b/graph_bd_tgpr || exit 1; - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \ - exp/sgmm5b/graph_bd_tgpr data/test_dev93 exp/sgmm5b/decode_bd_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \ - exp/sgmm5b/graph_bd_tgpr data/test_eval92 exp/sgmm5b/decode_bd_tgpr_eval92 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \ - --use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm5b exp/sgmm5b_ali_si284 - - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 exp/sgmm5b_mmi_b0.1 - - for iter in 1 2 3 4; do - for test in dev93 eval92; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_${test} data/lang_test_tgpr data/test_${test} exp/sgmm5b/decode_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_tgpr_${test}_it$iter & - - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_tgpr data/test_${test} exp/sgmm5b/decode_bd_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter & - done - done -) & - - - -# Train quinphone SGMM system. - -steps/train_sgmm.sh --cmd "$train_cmd" \ - --context-opts "--context-width=5 --central-position=2" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5c || exit 1; - -# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93. -steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93 - diff --git a/egs/babel/s5/RESULTS b/egs/babel/s5/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/egs/babel/s5/local/decode_helper.sh b/egs/babel/s5/local/decode_helper.sh index 3be49854038..59b2fdad3c9 100755 --- a/egs/babel/s5/local/decode_helper.sh +++ b/egs/babel/s5/local/decode_helper.sh @@ -18,15 +18,6 @@ elif [ "$1" == "FMLLR" ]; then utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 -elif [ "$1" == "SGMM" ]; then - utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 - - steps/decode_sgmm.sh --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR \ - $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1; - - steps/decode_sgmm.sh --use-fmllr true --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR\ - $MODELDIR/graph $DEVDIR $MODELDIR/decode_fmllr || exit 1; - fi diff --git a/egs/babel/s5/local/make_pitch.sh b/egs/babel/s5/local/make_pitch.sh index 107016d78a9..f3597f504dd 100755 --- a/egs/babel/s5/local/make_pitch.sh +++ b/egs/babel/s5/local/make_pitch.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) # Bagher BabaAli @@ -50,7 +50,7 @@ mkdir -p $expdir/log || exit 1; scp=$data/wav.scp -[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. +[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. ( # this is for back compatiblity: cd $KALDI_ROOT/tools @@ -92,7 +92,7 @@ done basename=`basename $data` wavdir=$pitchdir/temp_wav_$basename mkdir -p $wavdir - + if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then wav_scp=$expdir/wav.scp cat $data/segments | awk -v dir=$wavdir '{key=$1; printf("%s %s/%s.wav\n", key, dir, key);}' \ @@ -104,7 +104,7 @@ if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then else # create a fake segments file that takes the whole file; this is an easy way # to copy to static wav files. Note: probably this has not been tested. - cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments + cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments segments=$expdir/fake_segments fi if [ $stage -le 0 ]; then @@ -155,11 +155,11 @@ if [ $stage -le 1 ]; then fi # I don't want to put a separate script in svn just for this, so creating a temporary -# script file in the experimental directory. Quotes around 'EOF' disable any +# script file in the experimental directory. Quotes around 'EOF' disable any # interpretation in the here-doc. cat <<'EOF' > $expdir/convert.sh #!/bin/bash -sacc_flist=$1 +sacc_flist=$1 scpfile=$2 [ $# -ne 2 ] && echo "Usage: convert.sh " && exit 1; @@ -247,7 +247,7 @@ exit 0; # rm $expdir/.error 2>/dev/null # # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" +# # mkdir -p "$expdir/$n" # # done # # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ @@ -297,8 +297,8 @@ exit 0; # rm $expdir/wav.*.scp $expdir/segments.* 2>/dev/null -# nf=`cat $data/pitchs.scp | wc -l` -# nu=`cat $data/utt2spk | wc -l` +# nf=`cat $data/pitchs.scp | wc -l` +# nu=`cat $data/utt2spk | wc -l` # if [ $nf -ne $nu ]; then # echo "It seems not all of the feature files were successfully ($nf != $nu);" # echo "consider using utils/fix_data_dir.sh $data" diff --git a/egs/babel/s5/path.sh b/egs/babel/s5/path.sh index 498423857fd..a45a39d1f6a 100755 --- a/egs/babel/s5/path.sh +++ b/egs/babel/s5/path.sh @@ -1,6 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -. /export/babel/data/software/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +. /export/babel/data/software/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export LC_ALL=C - diff --git a/egs/babel/s5b/RESULTS b/egs/babel/s5b/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/egs/babel/s5b/local/decode_helper.sh b/egs/babel/s5b/local/decode_helper.sh index 3be49854038..59b2fdad3c9 100755 --- a/egs/babel/s5b/local/decode_helper.sh +++ b/egs/babel/s5b/local/decode_helper.sh @@ -18,15 +18,6 @@ elif [ "$1" == "FMLLR" ]; then utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 -elif [ "$1" == "SGMM" ]; then - utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 - - steps/decode_sgmm.sh --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR \ - $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1; - - steps/decode_sgmm.sh --use-fmllr true --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR\ - $MODELDIR/graph $DEVDIR $MODELDIR/decode_fmllr || exit 1; - fi diff --git a/egs/babel/s5b/local/make_pitch.sh b/egs/babel/s5b/local/make_pitch.sh deleted file mode 100755 index 107016d78a9..00000000000 --- a/egs/babel/s5b/local/make_pitch.sh +++ /dev/null @@ -1,307 +0,0 @@ -#!/bin/bash - -# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) -# Bagher BabaAli -# Apache 2.0 -# To be run from .. (one directory up from here) -# This makes two-dimension p(voicing) and pitch features for some data/ directory. - -# Begin configuration section. -nj=4 -cmd=run.pl -stage=0 -pitch_config= -interpolate_pitch_opts= -process_pitch_opts= -cleanup=true -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -if [ -f path.sh ]; then . ./path.sh; fi -. parse_options.sh || exit 1; - -if [ $# != 3 ]; then - echo "Usage: make_pitch.sh [options] "; - echo "Makes two dimensional [p(voicing), pitch] features, based on SAcC pitch" - echo "extractor followed by some normalization and smoothing" - echo "E.g.: make_pitch.sh data/train_pitch exp/make_pitch_train plp/" - echo "Options: " - echo " --pitch-config # config passed to compute-pitch-feats " - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - exit 1; -fi - -data=$1 -expdir=$2 -pitchdir=$3 - -# make $pitchdir an absolute pathname. -pitchdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $pitchdir ${PWD}` -# make $expdir an absolute pathname. -expdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $expdir ${PWD}` - -# use "name" as part of name of the archive. -name=`basename $data` - -mkdir -p $pitchdir || exit 1; -mkdir -p $expdir/log || exit 1; - -scp=$data/wav.scp - -[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. - -( # this is for back compatiblity: - cd $KALDI_ROOT/tools - if [ -d sacc ] && [ ! -d pitch_trackers/sacc ]; then - echo "Linking sacc directory to new location." - mkdir -p pitch_trackers - cd pitch_trackers - ln -s ../sacc .. - fi -) - -sacc_dir=$KALDI_ROOT/tools/pitch_trackers/sacc/SAcC_GLNXA64/ -# make $sacc_dir an absolute pathname. -sacc_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $sacc_dir ${PWD}` - -sacc_script=$sacc_dir/run_SAcC.sh -sacc_config=$sacc_dir/conf/Babelnet_sr8k_bpo6_sb24_k10.config - -if [ ! -f $sacc_script ]; then - echo "*Expecting the script $sacc_script to exist" - echo "*cd to $KALDI_ROOT/tools/, and run extras/install_sacc.sh" - echo "*Re-run this script when it is installed." - exit 1; -fi - -required="$scp $pitch_config $sacc_config" - -for f in $required; do - if [ ! -f $f ]; then - echo "make_pitch.sh: no such file $f" - exit 1; - fi -done - -# note: in general, the double-parenthesis construct in bash "((" is "C-style -# syntax" where we can get rid of the $ for variable names, and omit spaces. -# The "for" loop in this style is a special construct. - -basename=`basename $data` -wavdir=$pitchdir/temp_wav_$basename -mkdir -p $wavdir - -if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then - wav_scp=$expdir/wav.scp - cat $data/segments | awk -v dir=$wavdir '{key=$1; printf("%s %s/%s.wav\n", key, dir, key);}' \ - > $wav_scp || exit 1; - - if [ -f $data/segments ]; then - echo "$0 [info]: segments file exists: creating temporary wav files in $wavdir" - segments=$data/segments - else - # create a fake segments file that takes the whole file; this is an easy way - # to copy to static wav files. Note: probably this has not been tested. - cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments - segments=$expdir/fake_segments - fi - if [ $stage -le 0 ]; then - echo "Extracting wav-file segments (or just converting to wav format)" - $cmd $expdir/log/extract-segments.log \ - extract-segments scp:$data/wav.scp $segments scp:$wav_scp || exit 1; - fi -else - echo "No segments file exists, and wav scp is plain: using wav files as input." - wav_scp=$data/wav.scp -fi - -wav_checked_scp=$expdir/wav_checked.scp -cat $wav_scp | \ - perl -ane '@A=split; if (-f $A[1]) { print; }' >$wav_checked_scp -nl_orig=`cat $wav_scp | wc -l` -nl_new=`cat $wav_checked_scp | wc -l` - -echo "After removing non-existent files, number of utterances decreased from $nl_orig to $nl_new"; -[ $nl_new -eq 0 ] && exit 1; - -# now $wav_scp is an scp file for the per-utterance wav files. - -# Split up the wav files into multiple lists. -split_wavs="" -for ((n=1; n<=nj; n++)); do - split_wavs="$split_wavs $expdir/split_wavs.$n.scp" -done -utils/split_scp.pl $wav_checked_scp $split_wavs || exit 1; - -# For each wav file, create corresponding temporary pitch file, in the -# format the SAcC outputs: [ 0 frame pitch p(voicing) ] -temp_pitchdir=$pitchdir/temp_pitch_$basename -mkdir -p $temp_pitchdir - -for ((n=1; n<=nj; n++)); do - mkdir -p $temp_pitchdir/$n - cat $expdir/split_wavs.$n.scp | awk -v pdir=$temp_pitchdir -v n=$n \ - '{key=$1; wavfile=$2; printf("%s,%s/%s/%s.pitch\n", wavfile, pdir, n, key);}' \ - > $expdir/sacc_flist.$n || exit 1 -done - -if [ $stage -le 1 ]; then - # Need to do this in director $sacc_dir as some of the things in its config - # are relative pathnames. - $cmd JOB=1:$nj $d/$expdir/log/sacc.JOB.log \ - cd $sacc_dir '&&' $sacc_script $expdir/sacc_flist.JOB $sacc_config || exit 1; -fi - -# I don't want to put a separate script in svn just for this, so creating a temporary -# script file in the experimental directory. Quotes around 'EOF' disable any -# interpretation in the here-doc. -cat <<'EOF' > $expdir/convert.sh -#!/bin/bash -sacc_flist=$1 -scpfile=$2 -[ $# -ne 2 ] && echo "Usage: convert.sh " && exit 1; - -for f in `cat $sacc_flist | cut -d, -f2`; do - g=`echo $f | sed s:.pitch$:.mat:` - if [ -f $f ]; then - cat $f | awk 'BEGIN{printf("[ "); } {print $4, $3;} END{ print "]"; }' > $g - rm $f - fi -done -cat $sacc_flist | cut -d, -f2 | \ - perl -ane 'm:/([^/]+)\.pitch$: || die "Bad line $_"; $key=$1; s/\.pitch$/\.mat/; print "$key $_";' > $scpfile -EOF -chmod +x $expdir/convert.sh - -if [ $stage -le 2 ]; then - echo "Converting format from .pitch to .mat (kaldi-readable format)" - $cmd JOB=1:$nj $expdir/log/convert.JOB.log \ - $expdir/convert.sh $expdir/sacc_flist.JOB $expdir/mat.scp.JOB || exit 1; -fi - -if [ $stage -le 3 ]; then - echo "Doing final processing (interpolation, smoothing, etc.) on pitch features" - $cmd JOB=1:$nj $expdir/log/process.JOB.log \ - interpolate-pitch $interpolate_pitch_opts scp:$expdir/mat.scp.JOB ark:- \| \ - process-pitch-feats $process_pitch_opts ark:- \ - ark,scp:$pitchdir/${basename}_pitch.JOB.ark,$pitchdir/${basename}_pitch.JOB.scp || exit 1; -fi - -echo "Creating $data/feats.scp" -for ((n=1; n<=nj; n++)); do cat $pitchdir/${basename}_pitch.$n.scp; done > $data/feats.scp - -if $cleanup; then - echo "Removing temporary files" - rm -r $wavdir $temp_pitchdir -fi - -echo "Finished extracting pitch features for $basename" - -debug=~/temp2.m -echo "A = [" > $debug -copy-feats scp:$data/feats.scp ark,t:- | grep -v ']' | grep -v '\[' | awk '{if (NF == 2) { print; }}' | head -n 200000 \ - >> $debug - -cat <<'EOF' >>$debug -]; -pov = A(:, 1); -pitch = A(:, 2); -subplot(2, 2, 1); -hist(pov, 30); -legend('pov') -subplot(2, 2, 2); -hist(pitch, 30); -legend('pitch') - -len=size(pov, 1); -povD = pov(1:len-1) - pov(2:len); -subplot(2, 2, 3); -hist(povD, 30); -legend('delta-pov') - -pitchD = pitch(1:len-1) - pitch(2:len); -pitchD = max(pitchD, -0.05); -pitchD = min(pitchD, 0.05); -subplot(2, 2, 4); -hist(pitchD, 50); -legend('delta-pitch'); - -print -deps 'C.eps' -EOF - -exit 0; - - -# Here's - -#copy-feats scp:plp/train_pitch_pitch.10.scp ark,t:- | grep -v ']' | grep -v '\[' | awk '{if (NF == 2) { print; }}' | head -n 200000 > ~/temp2.m - -# -### data goes here. -#]; - - - -# rm $expdir/.error 2>/dev/null - -# # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" -# # done - -# # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# # extract-segments scp:$scp $expdir/segments.JOB ark:- \| \ -# # compute-pitch-feats --verbose=2 --config=$pitch_config ark:- \ -# # ark,scp:$pitchdir/raw_pitch_$name.JOB.ark,$pitchdir/raw_pitch_$name.JOB.scp \ -# # `pwd`/$expdir/JOB || exit 1; - -# $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# extract-segments scp:$scp $expdir/segments.JOB ark:- \| \ -# local/SAcC.sh $expdir/wav.JOB.scp $pitchdir $name.JOB || exit 1; - -# else -# echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." -# split_scps="" -# for ((n=1; n<=nj; n++)); do -# split_scps="$split_scps $expdir/wav.$n.scp" -# done - -# utils/split_scp.pl $scp $split_scps || exit 1; - -# # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" -# # done - -# # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# # compute-pitch-feats --verbose=2 --config=$pitch_config scp:$expdir/wav.JOB.scp \ -# # ark,scp:$pitchdir/raw_pitch_$name.JOB.ark,$pitchdir/raw_pitch_$name.JOB.scp \ -# # $expdir/JOB || exit 1; - -# pushd $sacc_dir -# $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# cd $sacclocal/SAcC.sh $expdir/wav.JOB.scp $pitchdir $name.JOB || exit 1; -# fi - - -# if [ -f $expdir/.error.$name ]; then -# echo "Error producing pitch features for $name:" -# tail $expdir/make_pitch.*.log -# exit 1; -# fi - -# # concatenate the .scp files together. -# for ((n=1; n<=nj; n++)); do -# cat $pitchdir/raw_pitch_$name.$n.scp >> $data/pitchs.scp || exit 1; -# done > $data/pitchs.scp - -# rm $expdir/wav.*.scp $expdir/segments.* 2>/dev/null - -# nf=`cat $data/pitchs.scp | wc -l` -# nu=`cat $data/utt2spk | wc -l` -# if [ $nf -ne $nu ]; then -# echo "It seems not all of the feature files were successfully ($nf != $nu);" -# echo "consider using utils/fix_data_dir.sh $data" -# fi - -# echo "Succeeded creating PITCH features for $name" diff --git a/egs/babel/s5b/path.sh b/egs/babel/s5b/path.sh index c8fdbad6ff7..2d7dba09015 100755 --- a/egs/babel/s5b/path.sh +++ b/egs/babel/s5b/path.sh @@ -1,5 +1,4 @@ export KALDI_ROOT=`pwd`/../../.. -. /export/babel/data/software/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +. /export/babel/data/software/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export LC_ALL=C - diff --git a/egs/babel/s5c/RESULTS b/egs/babel/s5c/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/egs/babel/s5c/conf/lang/101-cantonese-limitedLP.official.conf b/egs/babel/s5c/conf/lang/101-cantonese-limitedLP.official.conf index e5d60c12367..9efcdc6a164 100644 --- a/egs/babel/s5c/conf/lang/101-cantonese-limitedLP.official.conf +++ b/egs/babel/s5c/conf/lang/101-cantonese-limitedLP.official.conf @@ -92,7 +92,7 @@ oovSymbol="" lexiconFlags="--romanized --oov " # Scoring protocols (dummy GLM file to appease the scoring script) -glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm +glmFile=dummy.glm lexicon_file=/export/babel/data/101-cantonese/release-babel101b-v0.4c_sub-train1/conversational/reference_materials/lexicon.sub-train1.txt cer=1 diff --git a/egs/babel/s5c/conf/lang/105-turkish-limitedLP.official.conf b/egs/babel/s5c/conf/lang/105-turkish-limitedLP.official.conf index ae4cb55f4d5..014b519f3b7 100644 --- a/egs/babel/s5c/conf/lang/105-turkish-limitedLP.official.conf +++ b/egs/babel/s5c/conf/lang/105-turkish-limitedLP.official.conf @@ -3,7 +3,7 @@ #speech corpora files location train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training -train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.official.list +train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.list train_nj=16 #RADICAL DEV data files diff --git a/egs/babel/s5c/local/CHECKPOINT.sh b/egs/babel/s5c/local/CHECKPOINT.sh index 91b64d7fe1a..ed0ddd18399 100755 --- a/egs/babel/s5c/local/CHECKPOINT.sh +++ b/egs/babel/s5c/local/CHECKPOINT.sh @@ -1,11 +1,11 @@ #!/bin/bash function GETAPPROVAL { - until false ; do + until false ; do echo "Do you want to run the command (y/n)?" read -n 1 WISH - - if [ "$WISH" == "y" ]; then + + if [ "$WISH" == "y" ]; then return true; elif [ "$WISH" == "n" ]; then return false; @@ -21,11 +21,11 @@ function ESCAPE_PARAMS { if [[ "$v" == *"<"* ]]; then out="$out \"$v\"" - elif [[ "$v" == *">"* ]] ; then + elif [[ "$v" == *">"* ]] ; then out="$out \"$v\"" - elif [[ "$v" == *"|"* ]] ; then + elif [[ "$v" == *"|"* ]] ; then out="$out \'$v\'" - elif [[ "$v" == *" "* ]] ; then + elif [[ "$v" == *" "* ]] ; then out="$out \"$v\"" else out="$out $v" @@ -76,7 +76,7 @@ function CHECKPOINT { if [ !$INTERACTIVE_CHECKPOINT ] ; then eval `ESCAPE_PARAMS "$@"` - else + else APPROVAL=GETAPPROVAL if $APPROVAL ; then eval `ESCAPE_PARAMS $@` @@ -87,7 +87,7 @@ function CHECKPOINT { echo -e ${COLOR_RED}"CHECKPOINT FAILURE: The command returned non-zero status" >&2 echo -e " rerun the script with the parameter -c $LAST_GOOD_NAME=$COUNTER" >&2 echo -e "COMMAND">&2 - echo -e " " "$@" ${COLOR_RED} >&2 + echo -e " " "$@" ${COLOR_RED} >&2 exit 1 fi @@ -97,7 +97,7 @@ function CHECKPOINT { echo -e "$@"${COLOR_DEFAULT} >&2 fi - COUNTER=$(( $COUNTER + 1 )) + COUNTER=$(( $COUNTER + 1 )) eval export $COUNTER_NAME=$COUNTER } diff --git a/egs/babel/s5c/local/ali_to_rttm.sh b/egs/babel/s5c/local/ali_to_rttm.sh index 63cf8f44dc4..09df9a15805 100755 --- a/egs/babel/s5c/local/ali_to_rttm.sh +++ b/egs/babel/s5c/local/ali_to_rttm.sh @@ -42,7 +42,7 @@ if [ $# != 3 ]; then exit 1; fi -set -e +set -e set -o pipefail set -u @@ -65,7 +65,7 @@ fi $cmd $dir/log/align_to_words.log \ ali-to-phones $dir/final.mdl "ark:gunzip -c $dir/ali.*.gz|" ark,t:- \| \ phones-to-prons $lang/L_align.fst $wbegin $wend ark:- "ark,s:utils/sym2int.pl -f 2- --map-oov '$oov' $lang/words.txt <$data/text|" ark,t:- \| \ - prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:$dir/align.txt + prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:$dir/align.txt echo "$0: done writing alignments." diff --git a/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl b/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl index 198da36da5a..a4c80cef345 100755 --- a/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl +++ b/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl @@ -26,7 +26,7 @@ Allowed options: EOU -GetOptions(); +GetOptions(); @ARGV >= 2 || die $Usage; @@ -77,7 +77,7 @@ if ($count == 0) { $output .= "$value"; $count ++; next; - } + } if ($count == 6) { $output .= ", ..."; last; diff --git a/egs/babel/s5c/local/apply_g2p.sh b/egs/babel/s5c/local/apply_g2p.sh index f47274cb21c..385b1f3536e 100755 --- a/egs/babel/s5c/local/apply_g2p.sh +++ b/egs/babel/s5c/local/apply_g2p.sh @@ -2,7 +2,7 @@ # Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 -# Begin configuration section. +# Begin configuration section. iters=5 stage=0 encoding='utf-8' @@ -82,15 +82,15 @@ cat $output/output.* > $output/output #Remap the words from output file back to the original casing #Conversion of some of thems might have failed, so we have to be careful #and use the transform_map file we generated beforehand -#Also, because the sequitur output is not readily usable as lexicon (it adds +#Also, because the sequitur output is not readily usable as lexicon (it adds #one more column with ordering of the pron. variants) convert it into the proper lexicon form output_lex=$output/lexicon.lex if [ ! -z $icu_transform ] ; then #also, the transform is generally N -> 1, i.e. we have to take #extra care of words that might have been mapped into the same one - perl -e 'open(WORDS, $ARGV[0]) or die "Could not open file $ARGV[0]"; - while() { chomp; @F=split; - if ($MAP{$F[0]} ) { push @{$MAP{$F[0]}}, $F[1]; } + perl -e 'open(WORDS, $ARGV[0]) or die "Could not open file $ARGV[0]"; + while() { chomp; @F=split; + if ($MAP{$F[0]} ) { push @{$MAP{$F[0]}}, $F[1]; } else { $MAP{$F[0]} = [$F[1]]; } } close(WORDS); @@ -101,7 +101,7 @@ if [ ! -z $icu_transform ] ; then next; } foreach $word (@{$MAP{$F[0]}} ) { - print "$word\t$F[2]\t$F[3]\n"; + print "$word\t$F[2]\t$F[3]\n"; } } close(LEX); diff --git a/egs/babel/s5c/local/apply_map_tab_preserving.pl b/egs/babel/s5c/local/apply_map_tab_preserving.pl index 2a3238c04a3..b57262f1930 100755 --- a/egs/babel/s5c/local/apply_map_tab_preserving.pl +++ b/egs/babel/s5c/local/apply_map_tab_preserving.pl @@ -12,8 +12,8 @@ # this version preserves tabs. if (@ARGV > 0 && $ARGV[0] eq "-f") { - shift @ARGV; - $field_spec = shift @ARGV; + shift @ARGV; + $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } @@ -26,7 +26,7 @@ } } if (!defined $field_begin && !defined $field_end) { - die "Bad argument to -f option: $field_spec"; + die "Bad argument to -f option: $field_spec"; } } @@ -70,7 +70,7 @@ $field_offset = 0; for ($n = 0; $n < @A; $n++) { @B = split(" ", $A[$n]); - + for ($x = 0; $x < @B; $x++) { $y = $x + $field_offset; if ( (!defined $field_begin || $y >= $field_begin) @@ -78,12 +78,12 @@ $b = $B[$x]; if (!defined $map{$b}) { if (!$permissive) { - die "apply_map.pl: undefined key $a\n"; + die "apply_map.pl: undefined key $a\n"; } else { print STDERR "apply_map.pl: warning! missing key $a\n"; } } else { - $B[$x] = $map{$b}; + $B[$x] = $map{$b}; } } } diff --git a/egs/babel/s5c/local/augment_original_stm.pl b/egs/babel/s5c/local/augment_original_stm.pl index 4c58ccc6271..c5ad87fd286 100755 --- a/egs/babel/s5c/local/augment_original_stm.pl +++ b/egs/babel/s5c/local/augment_original_stm.pl @@ -8,7 +8,7 @@ #As a result, the scoring will be done on per-speaker basis as well #As the segment from segment mapping generally do not correspond to #the segmentation of the original STM file, it combines the files -#segments and utt2spk to work out the correct speaker ID for +#segments and utt2spk to work out the correct speaker ID for #the reference segment #In case of overlay, it will either use the previous speaker or #prints out an error message diff --git a/egs/babel/s5c/local/best_path_weights.sh b/egs/babel/s5c/local/best_path_weights.sh index 8e88a3610a4..52782ee3655 100755 --- a/egs/babel/s5c/local/best_path_weights.sh +++ b/egs/babel/s5c/local/best_path_weights.sh @@ -16,19 +16,19 @@ # limitations under the License. -# This script combines frame-level posteriors from different decode -# directories. The first decode directory is assumed to be the primary +# This script combines frame-level posteriors from different decode +# directories. The first decode directory is assumed to be the primary # and is used to get the best path. The posteriors from other decode -# directories are interpolated with the posteriors of the best path. -# The output is a new directory with final.mdl, tree from the primary -# decode-dir and the best path alignments and weights in a decode-directory +# directories are interpolated with the posteriors of the best path. +# The output is a new directory with final.mdl, tree from the primary +# decode-dir and the best path alignments and weights in a decode-directory # with the same basename as the primary directory. # This is typically used to get better posteriors for semisupervised training # of DNN -# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg +# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg # exp/sgmm_mmi_b0.1/decode_fmllr_train_unt.seg_it4 exp/combine_dnn_sgmm -# Here the final.mdl and tree are copied from exp/tri6_nnet to -# exp/combine_dnn_sgmm. best_path_ali.*.gz obtained from the primary dir and +# Here the final.mdl and tree are copied from exp/tri6_nnet to +# exp/combine_dnn_sgmm. best_path_ali.*.gz obtained from the primary dir and # the interpolated posteriors in weights.*.gz are placed in # exp/combine_dnn_sgmm/decode_train_unt.seg @@ -115,7 +115,7 @@ for i in `seq 0 $[num_sys-1]`; do echo $nj > $out_decode/num_jobs else if [ $nj != `cat $decode_dir/num_jobs` ]; then - echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" exit 1; fi fi diff --git a/egs/babel/s5c/local/check_models.sh b/egs/babel/s5c/local/check_models.sh index d02fc4e561a..88b3dacc94b 100755 --- a/egs/babel/s5c/local/check_models.sh +++ b/egs/babel/s5c/local/check_models.sh @@ -4,7 +4,7 @@ check_model () { model=$1 if [ -s $model ]; then echo $model - else + else dir=`dirname $model` latest_model=`ls -lt $dir/{?,??}.mdl 2>/dev/null | head -1 | awk '{print $9}'` echo "*$model is not there, latest is: $latest_model" diff --git a/egs/babel/s5c/local/check_wers.sh b/egs/babel/s5c/local/check_wers.sh index ebd6bb28790..10e1a89ee3a 100755 --- a/egs/babel/s5c/local/check_wers.sh +++ b/egs/babel/s5c/local/check_wers.sh @@ -4,7 +4,7 @@ check_wer () { dir=$1 - if [ -d $dir ]; then + if [ -d $dir ]; then seen_dir=false for ddir in $dir/decode*; do if [ -d $ddir ]; then @@ -34,7 +34,7 @@ for n in `seq 10`; do fi done -if [ $# != 0 ]; then +if [ $# != 0 ]; then echo "Usage: local/check_wers.sh [--final] [--char]" exit 1; fi diff --git a/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh b/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh index a8fcc39eba5..f320cfa19cd 100755 --- a/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh +++ b/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh @@ -30,12 +30,12 @@ mkdir -p $datadir echo "Converting `basename $database` to kaldi directory $datadir " cat $database | perl -pe 's:.+(BABEL):BABEL:; s:\}\s+\{FROM\s+: :; s:\}\s+\{TO\s+: :; s:\}.+::;' | \ - perl -ne '@K = split; - $utteranceID = @K[0]; - $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; - $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; - $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; - $utteranceID .= sprintf ("_%06i", (100*@K[2])); + perl -ne '@K = split; + $utteranceID = @K[0]; + $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; + $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; + $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; + $utteranceID .= sprintf ("_%06i", (100*@K[2])); printf("%s %s %.2f %.2f\n", $utteranceID, @K[0], @K[1], @K[2]);' | sort > $datadir/segments if [ ! -z $filelist ] ; then @@ -66,12 +66,12 @@ perl -ne '{chomp; @K=split; $utt{@K[1]}.=" @K[0]";} # 4. Create the wav.scp file: sph2pipe=`which sph2pipe || which $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe` if [ $? -ne 0 ] ; then - echo "Could not find sph2pipe binary. Add it to PATH" + echo "Could not find sph2pipe binary. Add it to PATH" exit 1; fi sox=`which sox` if [ $? -ne 0 ] ; then - echo "Could not find sox binary. Add it to PATH" + echo "Could not find sox binary. Add it to PATH" exit 1; fi @@ -84,19 +84,19 @@ echo "Creating the $datadir/wav.scp file" elif [ -f $audiopath/audio/$file.wav ] ; then echo "$file $sox $audiopath/audio/$file.wav -r 8000 -c 1 -b 16 -t wav - downsample |" else - echo "Audio file $audiopath/audio/$file.sph does not exist!" >&2 + echo "Audio file $audiopath/audio/$file.sph does not exist!" >&2 exit 1 fi - done | sort -u > $datadir/wav.scp - if [ $? -ne 0 ] ; then - echo "Error producing the wav.scp file" + done | sort -u > $datadir/wav.scp + if [ $? -ne 0 ] ; then + echo "Error producing the wav.scp file" exit 1 fi -) || exit 1 +) || exit 1 l1=`wc -l $datadir/wav.scp | cut -f 1 -d ' ' ` echo "wav.scp contains $l1 files" -if [ ! -z $filelist ] ; then +if [ ! -z $filelist ] ; then l2=`wc -l $filelist | cut -f 1 -d ' '` echo "filelist `basename $filelist` contains $l2 files" diff --git a/egs/babel/s5c/local/create_shadow_dataset.sh b/egs/babel/s5c/local/create_shadow_dataset.sh index 6783ee49770..49467ed28c1 100755 --- a/egs/babel/s5c/local/create_shadow_dataset.sh +++ b/egs/babel/s5c/local/create_shadow_dataset.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University +# Copyright 2012 Johns Hopkins University # Apache 2.0. stage=0 @@ -29,8 +29,8 @@ if [ $stage -le 1 ] ; then #zkombinovat ecf echo "Combining ECF files..." perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -87,8 +87,8 @@ if [ $stage -le 2 ] ; then #zkombinovat kwlist echo "Combining the KWLIST files" perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -107,7 +107,7 @@ if [ $stage -le 2 ] ; then if ( $src1->{language} ne $src2->{language} ) { die "KWLIST languages differ in the source kwlist.xml files"; } - + $tgt->{ecf_filename} = ""; $tgt->{language}=$src1->{language}; $tgt->{compareNormalize}=$src1->{compareNormalize}; @@ -143,8 +143,8 @@ fi if [ $stage -le 3 ] ; then echo "Making KWLIST maps" perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; diff --git a/egs/babel/s5c/local/cstr_ndx2flist.pl b/egs/babel/s5c/local/cstr_ndx2flist.pl index d19db421a9f..79daa1a99db 100755 --- a/egs/babel/s5c/local/cstr_ndx2flist.pl +++ b/egs/babel/s5c/local/cstr_ndx2flist.pl @@ -16,7 +16,7 @@ # limitations under the License. # This is modified from the script in standard Kaldi recipe to account -# for the way the WSJ data is structured on the Edinburgh systems. +# for the way the WSJ data is structured on the Edinburgh systems. # - Arnab Ghoshal, 12/1/12 # This program takes as its standard input an .ndx file from the WSJ corpus that looks @@ -25,7 +25,7 @@ #;; #;; Index for WSJ0 SI-short Sennheiser training data #;; Data is read WSJ sentences, Sennheiser mic. -#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts +#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts #;; per speaker TI) = 7236 utts #;; #11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 diff --git a/egs/babel/s5c/local/ctm2segments.pl b/egs/babel/s5c/local/ctm2segments.pl index 26a786c88b9..55a8bd84fc8 100755 --- a/egs/babel/s5c/local/ctm2segments.pl +++ b/egs/babel/s5c/local/ctm2segments.pl @@ -45,21 +45,21 @@ chop $line; my @entries = split(/ /, $line); die "Cannot parse line \"$line\"" if scalar @entries != 6; - + ($filename, my $chann_id, my $beg, my $end, my $word, my $conf) = @entries; - - $total_seconds += $end * 1.0; - + + $total_seconds += $end * 1.0; + if ($conf >= $cf_needed ) { if ( $words ne "" ) { #print "Extend segment\n"; $words .= " $word"; - $seg_end = $beg * 1.0 + $end*1.0; + $seg_end = $beg * 1.0 + $end*1.0; } else { #start a new segment #print "Start segment\n"; $seg_start = $beg; - $seg_end = $beg * 1.0 + $end*1.0; + $seg_end = $beg * 1.0 + $end*1.0; $words = $word; } } else { @@ -75,14 +75,14 @@ $extracted_seconds+= ($seg_end - $seg_start); $seg_start -= $extend_segments; - $seg_end += $extend_segments; + $seg_end += $extend_segments; my $spk_id=$filename_parts[3] . "_" . $channel; my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); my $last_part = sprintf("%06d", $seg_start * 100); $utt_id .= "_" . $last_part; #print $utt_id . " $beg \n"; - + #14350_A_20121123_042710_001337 #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 @@ -111,14 +111,14 @@ $extracted_seconds+= ($seg_end - $seg_start); $seg_start -= $extend_segments; - $seg_end += $extend_segments; + $seg_end += $extend_segments; my $spk_id=$filename_parts[3] . "_" . $channel; my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); my $last_part = sprintf("%06d", $seg_start * 100); $utt_id .= "_" . $last_part; #print $utt_id . " $beg \n"; - + #14350_A_20121123_042710_001337 #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 diff --git a/egs/babel/s5c/local/datasets/basic_kws.sh b/egs/babel/s5c/local/datasets/basic_kws.sh index 35d6e379658..ed6995b3080 100644 --- a/egs/babel/s5c/local/datasets/basic_kws.sh +++ b/egs/babel/s5c/local/datasets/basic_kws.sh @@ -1,13 +1,13 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. if [ "${dataset_kind}" == "supervised" ] ; then - mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" optional_variables="my_subset_ecf" else - mandatory_variables="my_ecf_file my_kwlist_file" + mandatory_variables="my_ecf_file my_kwlist_file" optional_variables="my_subset_ecf" fi @@ -23,6 +23,6 @@ if [ ! -f ${dataset_dir}/kws/.done ] ; then fi local/kws_setup.sh --case_insensitive $case_insensitive \ "${kws_flags[@]}" "${icu_opt[@]}" \ - $my_ecf_file $my_kwlist_file data/lang ${dataset_dir} || exit 1 - touch ${dataset_dir}/kws/.done + $my_ecf_file $my_kwlist_file $lang ${dataset_dir} || exit 1 + touch ${dataset_dir}/kws/.done fi diff --git a/egs/babel/s5c/local/datasets/extra_kws.sh b/egs/babel/s5c/local/datasets/extra_kws.sh index cb90968a1dc..32031270b36 100644 --- a/egs/babel/s5c/local/datasets/extra_kws.sh +++ b/egs/babel/s5c/local/datasets/extra_kws.sh @@ -1,13 +1,13 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. if [ "${dataset_kind}" == "supervised" ] ; then - mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" optional_variables="my_subset_ecf" else - mandatory_variables="my_ecf_file my_kwlist_file" + mandatory_variables="my_ecf_file my_kwlist_file" optional_variables="my_subset_ecf" fi @@ -17,7 +17,7 @@ function register_extraid { local dataset_dir=$1 local extraid=$2 echo "Registering $extraid" - echo $extraid >> $dataset_dir/extra_kws_tasks; + echo $extraid >> $dataset_dir/extra_kws_tasks; sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks } @@ -31,7 +31,7 @@ function setup_oov_search { local data_dir=$1 local source_dir=$2 local extraid=$3 - + local kwsdatadir=$data_dir/${extraid}_kws mkdir -p $kwsdatadir @@ -50,7 +50,7 @@ function setup_oov_search { paste \ <(cat $kwlist | grep -o -P "(?<=kwid=\").*(?=\")") \ <(cat $kwlist | grep -o -P "(?<=).*(?=)" | uconv -f utf-8 -t utf-8 -x Any-Lower) \ - >$kwsdatadir/keywords.txt + >$kwsdatadir/keywords.txt cut -f 2 $kwsdatadir/keywords.txt | \ sed 's/\s\s*/\n/g' | sort -u > $kwsdatadir/oov.txt @@ -61,7 +61,7 @@ function setup_oov_search { if [ ! -f exp/conf_matrix/.done ] ; then local/generate_confusion_matrix.sh --cmd "$decode_cmd" --nj $my_nj \ exp/sgmm5_denlats/dengraph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix || return 1 - touch exp/conf_matrix/.done + touch exp/conf_matrix/.done fi confusion=exp/conf_matrix/confusions.txt @@ -75,10 +75,13 @@ function setup_oov_search { fi local/apply_g2p.sh --nj $my_nj --cmd "$decode_cmd" \ --var-counts $g2p_nbest --var-mass $g2p_mass \ - $kwsdatadir/oov.txt exp/g2p $kwsdatadir/g2p + $kwsdatadir/oov.txt exp/g2p $kwsdatadir/g2p || return 1 L2_lex=$kwsdatadir/g2p/lexicon.lex - L1_lex=data/local/lexiconp.txt + if [ -z "$L1_lex" ] ; then + L1_lex=data/local/lexiconp.txt + fi + local/kws_data_prep_proxy.sh \ --cmd "$decode_cmd" --nj $my_nj \ --case-insensitive true \ @@ -86,14 +89,14 @@ function setup_oov_search { --phone-cutoff $phone_cutoff \ --pron-probs true --beam $proxy_beam --nbest $proxy_nbest \ --phone-beam $proxy_phone_beam --phone-nbest $proxy_phone_nbest \ - data/lang $data_dir $L1_lex $L2_lex $kwsdatadir + $lang $data_dir $L1_lex $L2_lex $kwsdatadir } kws_flags=( --use-icu true ) if [ "${dataset_kind}" == "supervised" ] ; then - #The presence of the file had been already verified, so just + #The presence of the file had been already verified, so just #add the correct switches kws_flags+=(--rttm-file $my_rttm_file ) fi @@ -107,20 +110,20 @@ if [ ! -f $dataset_dir/.done.kws.oov ] ; then touch $dataset_dir/.done.kws.oov fi if [ ${#my_more_kwlists[@]} -ne 0 ] ; then - + touch $dataset_dir/extra_kws_tasks - + for extraid in "${!my_more_kwlists[@]}" ; do #The next line will help us in running only one. We don't really - #know in which directory the KWS setup will reside in, so we will + #know in which directory the KWS setup will reside in, so we will #place the .done file directly into the data directory [ -f $dataset_dir/.done.kws.$extraid ] && continue; kwlist=${my_more_kwlists[$extraid]} local/kws_setup.sh --extraid $extraid --case_insensitive $case_insensitive \ "${kws_flags[@]}" "${icu_opt[@]}" \ - $my_ecf_file $kwlist data/lang ${dataset_dir} || exit 1 - + $my_ecf_file $kwlist $lang ${dataset_dir} || exit 1 + #Register the dataset for default running... #We can do it without any problem here -- the kws_stt_tasks will not #run it, unless called with --run-extra-tasks true switch @@ -129,7 +132,7 @@ if [ ${#my_more_kwlists[@]} -ne 0 ] ; then done for extraid in "${!my_more_kwlists[@]}" ; do #The next line will help us in running only one. We don't really - #know in which directory the KWS setup will reside in, so we will + #know in which directory the KWS setup will reside in, so we will #place the .done file directly into the data directory [ -f $dataset_dir/.done.kws.${extraid}_oov ] && continue; setup_oov_search $dataset_dir $dataset_dir/${extraid}_kws ${extraid}_oov diff --git a/egs/babel/s5c/local/datasets/supervised_pem.sh b/egs/babel/s5c/local/datasets/supervised_pem.sh index c32d73e0718..e131fae40fa 100644 --- a/egs/babel/s5c/local/datasets/supervised_pem.sh +++ b/egs/babel/s5c/local/datasets/supervised_pem.sh @@ -1,4 +1,4 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. diff --git a/egs/babel/s5c/local/datasets/supervised_seg.sh b/egs/babel/s5c/local/datasets/supervised_seg.sh index a681688f480..a5ccd36211b 100644 --- a/egs/babel/s5c/local/datasets/supervised_seg.sh +++ b/egs/babel/s5c/local/datasets/supervised_seg.sh @@ -1,4 +1,4 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. @@ -57,7 +57,7 @@ echo "Creating the $unseg_dir/reco2file_and_channel file" cat $unseg_dir/wav.scp | awk '{print $1, $1, "A";}' > $unseg_dir/reco2file_and_channel cat $unseg_dir/wav.scp | awk '{print $1, $1;}' > $unseg_dir/utt2spk utils/utt2spk_to_spk2utt.pl $unseg_dir/utt2spk > $unseg_dir/spk2utt - + make_plp $unseg_dir $workdir/make_plp $workdir/plp || exit 1 local/resegment/generate_segments.sh --nj $my_nj --cmd "$decode_cmd" \ diff --git a/egs/babel/s5c/local/datasets/supervised_uem.sh b/egs/babel/s5c/local/datasets/supervised_uem.sh index 318518ad86e..5ac1e003d5d 100644 --- a/egs/babel/s5c/local/datasets/supervised_uem.sh +++ b/egs/babel/s5c/local/datasets/supervised_uem.sh @@ -1,4 +1,4 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. @@ -6,7 +6,7 @@ eval my_data_cmudb=\$${dataset_type}_data_cmudb if [ "${dataset_kind}" != "supervised" ] ; then - mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" + mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" optional_variables="" else mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" diff --git a/egs/babel/s5c/local/datasets/vocab_kws.sh b/egs/babel/s5c/local/datasets/vocab_kws.sh index 812122bd024..40c1d8e841d 100644 --- a/egs/babel/s5c/local/datasets/vocab_kws.sh +++ b/egs/babel/s5c/local/datasets/vocab_kws.sh @@ -1,13 +1,13 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. if [ "${dataset_kind}" == "supervised" ] ; then - mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" optional_variables="my_subset_ecf" else - mandatory_variables="my_ecf_file my_kwlist_file" + mandatory_variables="my_ecf_file my_kwlist_file" optional_variables="my_subset_ecf" fi @@ -15,7 +15,7 @@ check_variables_are_set if [ "$dataset_kind" == "shadow" ]; then true #we do not support multiple kw lists for shadow set system - + elif [ ! -f $dataset_dir/.done.kws.fullvocab ] ; then #a This will work for both supervised and unsupervised dataset kinds kws_flags=() @@ -25,25 +25,25 @@ elif [ ! -f $dataset_dir/.done.kws.fullvocab ] ; then if $my_subset_ecf ; then kws_flags+=(--subset-ecf $my_data_list) fi - + #We just could come with some bogus naming scheme, #but as long as the audio files can tell the iarpa lang id, we will use that langid=`ls -1 $my_data_dir/audio/ | head -n 1| cut -d '_' -f 3` - #NB: we assume the default KWS search is already done and will "borrow" + #NB: we assume the default KWS search is already done and will "borrow" #the rttm and ecf files. #We could easily generate the ecf file, but the RTTM assumes the decoding - #had been already done. That could be done + #had been already done. That could be done #Ideally, these files should be generated here! local/kws_setup.sh --kwlist-wordlist true "${kws_flags[@]}" \ --extraid fullvocab $my_ecf_file \ - <(cat data/lang/words.txt | \ - grep -v -F "<" | grep -v -F "#" | \ + <(cat $lang/words.txt | \ + grep -v "^<" | grep -v "^#" | \ awk "{printf \"KWID$langid-FULLVOCAB-%05d %s\\n\", \$2, \$1 }" ) \ - data/lang ${dataset_dir} || exit 1 + $lang ${dataset_dir} || exit 1 - echo fullvocab >> $dataset_dir/extra_kws_tasks; + echo fullvocab >> $dataset_dir/extra_kws_tasks; sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks touch $dataset_dir/.done.kws.fullvocab fi diff --git a/egs/babel/s5c/local/decode_helper.sh b/egs/babel/s5c/local/decode_helper.sh index 3be49854038..59b2fdad3c9 100755 --- a/egs/babel/s5c/local/decode_helper.sh +++ b/egs/babel/s5c/local/decode_helper.sh @@ -18,15 +18,6 @@ elif [ "$1" == "FMLLR" ]; then utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 -elif [ "$1" == "SGMM" ]; then - utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 - - steps/decode_sgmm.sh --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR \ - $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1; - - steps/decode_sgmm.sh --use-fmllr true --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR\ - $MODELDIR/graph $DEVDIR $MODELDIR/decode_fmllr || exit 1; - fi diff --git a/egs/babel/s5c/local/extend_lexicon.sh b/egs/babel/s5c/local/extend_lexicon.sh index fd0b27a4172..48553dd6279 100755 --- a/egs/babel/s5c/local/extend_lexicon.sh +++ b/egs/babel/s5c/local/extend_lexicon.sh @@ -10,7 +10,7 @@ # two files: lexiconp.txt (this is the lexicon format that has pronunciation # probabilities; the words in the original lexicon have probability one), and # oov2prob, which says how the OOV mass is distributed among the new OOV words -# in the lexicon. +# in the lexicon. # It assumes that the syllables in pronunciations in the input lexicon.txt are # separated by tabs, as is normal for the BABEL setup; the syllable boundaries @@ -39,7 +39,7 @@ # because we felt that this would make the mapping harder for g2p to learn. # Instead we mapped the phones to unique letters; this is what the "phone_map" # file is about. Furthermore, in BABEL we have the concept of tags on the -# phones, e.g. in a tonal language, ay_3 might be the phone "ay" with tone 3. +# phones, e.g. in a tonal language, ay_3 might be the phone "ay" with tone 3. # As far as Kaldi is concerned, ay_3 is a single phone. To avoid the number of # letters blowing up too much, we make these tags separate letters when generating # phone_map, so ay_3 might be mapped to kX with ay mapping to k and 3 mapping to @@ -79,7 +79,7 @@ # equal to 0.33 times the probability listed in oov2prob. However, that script # will not allow the unigram probability of any OOV word to be more probable than # the least probable word which was originally in the ARPA file (not counting , -# which generally has probability -99); this is applied as a ceiling on the +# which generally has probability -99); this is applied as a ceiling on the # unknown-word probabilities. Note: the --unk-fraction should probably be # similar to the OOV rate in that language. Calculating the OOV rate on some # dev data is one reasonable way to set this; see the commands at the very @@ -149,7 +149,7 @@ cp $input_lexicon $toplevel_dir/input_lexicon.txt # just to have a record of wh loc=`which ngram-count`; if [ -z $loc ]; then if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... - sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 else sdir=`pwd`/../../../tools/srilm/bin/i686 fi @@ -256,21 +256,21 @@ if [ $stage -le -1 ]; then rm $dir/probs.* 2>/dev/null echo '#!/usr/bin/perl -while(1) { +while(1) { $sent = <>; $line=<>; if ($line !~ m/sentences/) { $sent =~ m/^file/ || die "Bad sent $sent"; exit(0); } - $line = <>; if ($line !~ m/logprob= (\S+)/) { die "Bad line $line"; } print "$1 $sent"; + $line = <>; if ($line !~ m/logprob= (\S+)/) { die "Bad line $line"; } print "$1 $sent"; $line = <>; $line eq "\n" || die "expected blank line"; }' >$dir/temp.pl chmod +x $dir/temp.pl $cmd JOB=1:$nj $dir/log/compute_prob.JOB.log \ $ngram -debug 1 -lm $dir/lm.gz -ppl $dir/sents.JOB \| $dir/temp.pl \| sort -gr \> $dir/probs.JOB || exit 1; - if $cleanup; then - rm $dir/sents.*; + if $cleanup; then + rm $dir/sents.*; fi sort -m -gr $dir/probs.* | uniq | head -n $num_prons > $dir/probs - if $cleanup; then - rm $dir/probs.*; + if $cleanup; then + rm $dir/probs.*; fi mass=$(cat $dir/probs | awk '{x += exp($1 * log(10));} END{print x}') @@ -296,7 +296,7 @@ fi # We may lose a little information by doing this, though, because the segmentation # into phonemes may be ambiguous. So we create a mapping from the original phonemes # and tags to letters of the alphabet. Note: tags are things like s_3 for a phone: here -# s is the phone and _3 is the tag. +# s is the phone and _3 is the tag. if [ $stage -le 0 ]; then @@ -375,10 +375,10 @@ if [ $stage -le $[$g2p_iters+1] ]; then awk '{if (NF >= 4) {printf("%s %s ", $1, $3); for (n=4;n<=NF;n++) {printf("%s", $n);} printf("\n"); }}' | \ sort | uniq > $dir/pron2spelling - # Now remove from pron2spelling, any words that appear in $dir/lexiconp_in.txt + # Now remove from pron2spelling, any words that appear in $dir/lexiconp_in.txt # (this also contains the excluded words like ). cat $dir/pron2spelling | \ - perl -e 'open(F, $ARGV[0]) || die "opening $ARGV[0]"; while() { @A=split; $seen_word{$A[0]}=1; } + perl -e 'open(F, $ARGV[0]) || die "opening $ARGV[0]"; while() { @A=split; $seen_word{$A[0]}=1; } while() { @A=split; if (! $seen_word{$A[2]}) { print; }} ' $dir/lexiconp_in.txt > $dir/pron2spelling.excluded # $dir/pron2spelling.excluded contains lines like #ab syllable1 syllable2 ... # e.g. # Kuku 0.000002642 k>&u k>&u - + cat $dir/probs | \ perl -e ' while(){ @A = split; $prob = shift @A; $pron=join("", @A); $pron =~ tr/,//d; print "$pron $_"; } '> $dir/probs.with_pron @@ -402,7 +402,7 @@ if [ $stage -le $[$g2p_iters+1] ]; then # This is so we can get the pronunciation in the same form that we put it in, for # the p2g training, for easier comparison with the lines in $dir/pron2spelling.excluded - perl -e ' ($p2s, $probs_with_pron) = @ARGV; + perl -e ' ($p2s, $probs_with_pron) = @ARGV; open(P2S, "<$p2s" || die); open(PROBS, "<$probs_with_pron")||die; while () { @A = split; @@ -487,7 +487,7 @@ if [ $stage -le $[$g2p_iters+1] ]; then print L "$word\t$pronprob\t$pron"; } close(L); close(W); # wait for sort to finish. ' \ $dir/lexiconp_oov.txt $dir/oov2prob - + # lexiconp_oov.txt contains lines like: #leyanga 0.96471840417664 l 3 j_" a_" N a #leyanga 1 l 3 j_" a_" N g a @@ -497,7 +497,7 @@ if [ $stage -le $[$g2p_iters+1] ]; then #Adlule 9.62418179264897e-08 #Afuna 2.23048402109824e-06 fi - + if [ $stage -le $[$g2p_iters+2] ]; then # put it to the output directory $localdir e.g. data/local/ cat $dir/lexiconp_in.txt $dir/lexiconp_oov.txt | \ @@ -526,7 +526,7 @@ if [ ! -z $dev_text ]; then $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); printf("Seen $invoc out of $tot tokens; token OOV rate is %.2f\n", $oov_rate);' \ $toplevel_dir/lexiconp.txt > $toplevel_dir/new_oov_rates - + # Original type OOV rate cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | sort -u |\ perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} @@ -549,7 +549,7 @@ exit 0; ###BELOW HERE IS JUST COMMENTS ########### #cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt | \ -for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt | \ perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} @@ -559,7 +559,7 @@ done #Seen 13675 out of 60613 tokens; OOV rate is 77.44 #Seen 26936 out of 60613 tokens; OOV rate is 55.56 -for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do cat data/dev10h/text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | \ perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} diff --git a/egs/babel/s5c/local/extract_oov_words.pl b/egs/babel/s5c/local/extract_oov_words.pl index fbb6e95286d..08f8f5d1436 100755 --- a/egs/babel/s5c/local/extract_oov_words.pl +++ b/egs/babel/s5c/local/extract_oov_words.pl @@ -5,15 +5,15 @@ use Data::Dumper; $Data::Dumper::Indent = 1; -binmode STDOUT, ":utf8"; -binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; $ignore_oov = 0; $ignore_first_field = 0; for($x = 0; $x < 2; $x++) { if ($ARGV[0] eq "-f") { - shift @ARGV; - $field_spec = shift @ARGV; + shift @ARGV; + $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } @@ -26,7 +26,7 @@ } } if (!defined $field_begin && !defined $field_end) { - die "Bad argument to -f option: $field_spec"; + die "Bad argument to -f option: $field_spec"; } } } @@ -43,7 +43,7 @@ while() { @A = split(" ", $_); @A == 2 || die "bad line in symbol table file: $_"; - + if ( not defined( $sym2int{$A[0]} ) ) { $sym2int{$A[0]} = []; } @@ -62,7 +62,7 @@ $i = $sym2int{$a}; if (!defined ($i)) { print $a . "\n"; - } + } } } } diff --git a/egs/babel/s5c/local/filter_kwslist.pl b/egs/babel/s5c/local/filter_kwslist.pl index c84a5f6d3c9..7c57b62517a 100755 --- a/egs/babel/s5c/local/filter_kwslist.pl +++ b/egs/babel/s5c/local/filter_kwslist.pl @@ -24,19 +24,19 @@ if(ref($kwentry->{kw}) eq 'ARRAY'){ my @arr = @{$kwentry->{kw}}; my @newarray = (); - + push @newarray, $arr[0]; #$arr[0]->{tbeg} . "\n"; for (my $i = 1; $i < scalar(@arr); $i +=1) { - + my $found = 0; foreach my $kw (@newarray) { - if (( abs($arr[$i]->{tbeg} - $kw->{tbeg}) < $duptime ) && + if (( abs($arr[$i]->{tbeg} - $kw->{tbeg}) < $duptime ) && ( $arr[$i]->{channel} == $kw->{channel}) && ( $arr[$i]->{file} eq $kw->{file}) ) { $found = 1; - + #print $arr[$i]->{tbeg} . "\n"; } } diff --git a/egs/babel/s5c/local/find_transcripts.pl b/egs/babel/s5c/local/find_transcripts.pl index 6429411b864..d34b075e7ea 100755 --- a/egs/babel/s5c/local/find_transcripts.pl +++ b/egs/babel/s5c/local/find_transcripts.pl @@ -21,7 +21,7 @@ # It takes as # Extracts from the dot files the transcripts for a given # dataset (represented by a file list). -# +# @ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts"; $dot_flist = shift @ARGV; @@ -36,7 +36,7 @@ -while(){ +while(){ chop; $uttid = $_; $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_"; diff --git a/egs/babel/s5c/local/fix_kwslist.pl b/egs/babel/s5c/local/fix_kwslist.pl index 29afc73e473..33c6dc30e82 100755 --- a/egs/babel/s5c/local/fix_kwslist.pl +++ b/egs/babel/s5c/local/fix_kwslist.pl @@ -81,7 +81,7 @@ sub mysort { print $xml; } else { if (!open(O, ">$fixed_kwslist_out")) { - print "Fail to open output file: $fixed_kwslist_out\n"; + print "Fail to open output file: $fixed_kwslist_out\n"; exit 1; } print O $xml; diff --git a/egs/babel/s5c/local/generate_confusion_matrix.sh b/egs/babel/s5c/local/generate_confusion_matrix.sh index 4bcbacb5ae9..e6b221f7cc0 100755 --- a/egs/babel/s5c/local/generate_confusion_matrix.sh +++ b/egs/babel/s5c/local/generate_confusion_matrix.sh @@ -2,7 +2,7 @@ # Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 -# Begin configuration section. +# Begin configuration section. nj=4 cmd=run.pl acwt=0.1 @@ -86,7 +86,7 @@ cat $confusion_files | cut -f 2- -d ' ' | sed 's/ *; */\n/g'| sort | uniq -c | \ perl -ane ' die unless scalar @F == 3; print "$F[1] $F[2] $F[0]\n"; - ' > $wdir/confusions.txt + ' > $wdir/confusions.txt exit 0 #-echo "Converting alignments to phone sequences..." diff --git a/egs/babel/s5c/local/generate_example_kws.sh b/egs/babel/s5c/local/generate_example_kws.sh index 2c849438192..e90752926b3 100755 --- a/egs/babel/s5c/local/generate_example_kws.sh +++ b/egs/babel/s5c/local/generate_example_kws.sh @@ -71,7 +71,7 @@ cat $text | perl -e ' } $min_count++; } - + $total = 20; $current = 0; $min_count = 4; @@ -88,7 +88,7 @@ cat $text | perl -e ' } $min_count++; } - + $total = 10; $current = 0; $min_count = 3; diff --git a/egs/babel/s5c/local/generate_proxy_keywords.sh b/egs/babel/s5c/local/generate_proxy_keywords.sh index 8562953efa4..584f7d7902e 100755 --- a/egs/babel/s5c/local/generate_proxy_keywords.sh +++ b/egs/babel/s5c/local/generate_proxy_keywords.sh @@ -3,7 +3,7 @@ # Copyright 2012-2014 Guoguo Chen # Apache 2.0. -# Begin configuration section. +# Begin configuration section. nj=8 cmd=run.pl beam=-1 # Beam for proxy FST, -1 means no prune @@ -46,7 +46,7 @@ if [ $# -ne 1 ]; then exit 1; fi -set -e +set -e set -o pipefail kwsdatadir=$1 @@ -68,8 +68,34 @@ if $pron_probs; then pron_probs_param="--pron-probs"; fi +cat $kwsdatadir/L1.lex | \ + perl -e ' + while ( $line = ) { + chomp $line; + ($word, $pron) = split " ", $line, 2; + $pron = join(" ", split(" ", $pron)); + push @{$LEX{$pron}}, $word; + } + + open(L1, "| sort -u > $ARGV[0]") or die "Cannot open $ARGV[0]\n"; + open(MAP, "| sort -u > $ARGV[1]") or die "Cannot open $ARGV[1]\n"; + foreach $pron (keys %LEX) { + $head = $LEX{$pron}->[0]; + print L1 "$head $pron\n"; + foreach $alt (@{$LEX{$pron}}) { + print MAP "0 0 $alt $head\n"; + } + } + print MAP "0\n"; + close(L1); + close(MAP); +' $kwsdatadir/L1_dedup.lex $kwsdatadir/L1.revdup.fst.txt + +fstcompile --isymbols=$kwsdatadir/words.txt --osymbols=$kwsdatadir/words.txt $kwsdatadir/L1.revdup.fst.txt | \ + fstarcsort --sort_type=olabel - $kwsdatadir/L1.revdup.fst + ndisambig=`utils/add_lex_disambig.pl \ - $pron_probs_param $kwsdatadir/L1.lex $kwsdatadir/L1_disambig.lex` + $pron_probs_param $kwsdatadir/L1_dedup.lex $kwsdatadir/L1_disambig.lex` ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. ( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $kwsdatadir/disambig.txt @@ -86,11 +112,12 @@ cat $kwsdatadir/L2.lex |\ --osymbols=$kwsdatadir/words.txt - |\ fstinvert | fstarcsort --sort_type=olabel > $kwsdatadir/L2.fst +echo $kwsdatadir/phones.txt phone_disambig_symbol=`grep \#0 $kwsdatadir/phones.txt | awk '{print $2}'` word_disambig_symbol=`grep \#0 $kwsdatadir/words.txt | awk '{print $2}'` -phone_disambig_symbols=`grep \# $kwsdatadir/phones.txt |\ +phone_disambig_symbols=`grep "^#" $kwsdatadir/phones.txt |\ awk '{print $2}' | tr "\n" " "` -word_disambig_symbols=`grep \# $kwsdatadir/words.txt |\ +word_disambig_symbols=`grep "^#" $kwsdatadir/words.txt |\ awk '{print $2}' | tr "\n" " "` cat $kwsdatadir/L1_disambig.lex |\ utils/make_lexicon_fst.pl $pron_probs_param - |\ @@ -139,10 +166,11 @@ $cmd JOB=1:$nj $kwsdatadir/split/log/proxy.JOB.log \ generate-proxy-keywords --verbose=1 \ --proxy-beam=$beam --proxy-nbest=$nbest \ --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ - $kwsdatadir/L2xE.fst $kwsdatadir/L1.fst ark:- ark:$kwsdatadir/split/proxy.JOB.fsts + $kwsdatadir/L2xE.fst $kwsdatadir/L1.fst ark:- ark,t:$kwsdatadir/split/proxy.JOB.fsts proxy_fsts="" for j in `seq 1 $nj`; do proxy_fsts="$proxy_fsts $kwsdatadir/split/proxy.$j.fsts" done -cat $proxy_fsts > $kwsdatadir/keywords.fsts +cat $proxy_fsts | fsttablecompose $kwsdatadir/L1.revdup.fst ark:- ark:- | \ + fsts-project ark:- ark:$kwsdatadir/keywords.fsts diff --git a/egs/babel/s5c/local/get_syllable_text.sh b/egs/babel/s5c/local/get_syllable_text.sh deleted file mode 100755 index 97d2af7ed65..00000000000 --- a/egs/babel/s5c/local/get_syllable_text.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash - -# Copyright Johns Hopkins University 2013 (author: Daniel Povey) -# Apache 2.0. - -if [ $# -ne 7 ]; then - echo "Usage: get_syllable_text.sh " - echo "e.g.: get_syllable_text.sh data/train data/lang ../s5-vietnamese-limited-syllables/data/lang_nopos \\" - echo " ../s5-vietnamese-limited-syllables/data/local/syllables/word2syllable_lexicon_unweighted.fst" - echo " exp/tri5h_ali exp/tri5_align_syllables ../s5-vietnamese-limited-syllables/data/train" - echo "This script copies the data-directory to but converts the text into syllable-level text." - echo "The inputs are as follows (those that are not self-explanatory):" - echo " is the syllable-level lang/ directory that has been built without" - echo " word-position dependency (we'll strip the suffixes from phones and expect them to be compatible with this)" - echo " is a kind of lexicon FST that describes words as syllable sequences." - echo " contains a word-level alignment of the data in " - echo " will be used to put temporary files and logs (make it somewhere in exp/)" - echo " is a data directory to put the syllable-level data; transcripts go to /text" - exit 1; -fi - -[ -f path.sh ] && . ./path.sh - -data=$1 -lang=$2 -lang_nopos=$3 -word2syllable_fst=$4 -alidir=$5 -dir=$6 -tgtdata=$7 - -for f in $data/text $lang/L.fst $lang_nopos/L.fst $word2syllable_fst $alidir/ali.1.gz \ - $alidir/final.mdl $alidir/num_jobs; do - if [ ! -f $f ]; then - echo "Expected file $f to exist" - exit 1; - fi -done - -mkdir -p $dir/log -nj=`cat $alidir/num_jobs` || exit 1; -sil=`cat data/lang/phones/optional_silence.txt` || exit 1 - -! ( ( for n in `seq $nj`; do gunzip -c $alidir/ali.$n.gz; done ) | \ - ali-to-phones $alidir/final.mdl ark:- ark,t:- | \ - utils/int2sym.pl -f 2- $lang/phones.txt - | \ - sed -E 's/_I( |$)/ /g' | sed -E 's/_E( |$)/ /g' | sed -E 's/_B( |$)/ /g' | sed -E 's/_S( |$)/ /g' | \ - utils/sym2int.pl -f 2- $lang_nopos/phones.txt | \ - gzip -c > $dir/phones.ark.gz ) 2>&1 | tee $dir/log/align.log \ - && echo "Error getting phone-level (non-word-position-dependent) alignments" && exit 1; - -# Get an archive of syllable-level acceptors corresponding to the training data. -# transcripts. We don't have an fstproject program for archives so we use a line of awk. - -! ( cat $data/text | utils/sym2int.pl --map-oov `cat $lang/oov.int` -f 2- $lang/words.txt | \ - transcripts-to-fsts ark:- ark:- | \ - fsttablecompose $word2syllable_fst ark:- ark,t:- | \ - awk '{if (NF < 4) { print; } else { print $1, $2, $3, $3, $5; }}' | \ - gzip -c > $dir/syllables.ark.gz ) 2>&1 | tee $dir/log/get_syllable_fsts.log && \ - echo "Error getting syllable FSTs" && exit 1; - -cp -rT $data $tgtdata || exit 1; -rm -rf $tgtdata/split* - -# From the phone-level transcripts and the syllable-level acceptors, work out -# the syllable sequence for each . Remove consecutive silences. -! ( fsttablecompose $lang_nopos/L.fst "ark:gunzip -c $dir/syllables.ark.gz|" ark:- | \ - fsttablecompose "ark:gunzip -c $dir/phones.ark.gz | transcripts-to-fsts ark:- ark:- |" \ - ark,s,cs:- ark,t:- | fsts-to-transcripts ark:- ark,t:- | int2sym.pl -f 2- $lang_nopos/words.txt | \ - sed "s/$sil $sil/$sil/g" > $tgtdata/text ) && echo "Error getting text data" && exit 1; - -! utils/fix_data_dir.sh $tgtdata/ && echo "Error fixing data dir" && exit 1; - -exit 0; - - - diff --git a/egs/babel/s5c/local/gridsearch.pl b/egs/babel/s5c/local/gridsearch.pl index 7b2ad530fa4..937273286fe 100755 --- a/egs/babel/s5c/local/gridsearch.pl +++ b/egs/babel/s5c/local/gridsearch.pl @@ -78,7 +78,7 @@ sub substitute { sub escape { my @cmd_in = @{$_[0]}; my @cmd = (); - foreach my $x (@cmd_in) { + foreach my $x (@cmd_in) { if ($x =~ m/^\S+$/) { push @cmd, $x } # If string contains no spaces, take # as-is. @@ -100,11 +100,11 @@ sub escape { for (my $i=0; $i < scalar(@ARGV); $i++) { if ($ARGV[$i] eq "-var") { - + $i++; (my $name, my @range) = gen_sequence(split('=', $ARGV[$i])); $VARIABLES{$name}=\@range - + } elsif ($ARGV[$i] eq "-train") { if ( $cmdid ) { if ( $cmdid eq "-eval" ) { @@ -113,7 +113,7 @@ sub escape { @traincmd = @cmd; } } - + $cmdid = $ARGV[$i]; @cmd = (); @@ -167,12 +167,12 @@ sub escape { @out = substitute(\@traincmd, \%params); print "Running train:\n" . join(" ", @out) . "\n"; system(@out) == 0 or die "system @out failed: exit code $?"; - + @out = substitute(\@evalcmd, \%params); print "Running eval:\n" . join(" ", @out) . "\n"; system(@out) == 0 or die "system @out failed: exit code $?"; - + } diff --git a/egs/babel/s5c/local/gridsearch2.pl b/egs/babel/s5c/local/gridsearch2.pl index 6645743c114..d09d8b28f0a 100755 --- a/egs/babel/s5c/local/gridsearch2.pl +++ b/egs/babel/s5c/local/gridsearch2.pl @@ -91,17 +91,17 @@ sub substitute { for (my $i=0; $i < scalar(@ARGV); $i++) { if ($ARGV[$i] eq "-var") { - + $i++; (my $name, my @range) = gen_sequence(split('=', $ARGV[$i])); $VARIABLES{$name}=\@range - + } elsif (grep {$_ eq $ARGV[$i]} @known_switches) { if ($cmdid) { print "CMD: $cmdid\n"; my @tmp = @cmd; - $found_switches{$cmdid} = \@tmp; + $found_switches{$cmdid} = \@tmp; pp(%found_switches); } @@ -120,7 +120,7 @@ sub substitute { if ($cmdid) { print "CMD: $cmdid\n"; my @tmp = @cmd; - $found_switches{$cmdid} = \@tmp; + $found_switches{$cmdid} = \@tmp; } pp(%VARIABLES); @@ -136,11 +136,11 @@ sub substitute { my @out; @out = substitute(\@traincmd, \%params); system(@out) == 0 or die "system @out failed: exit code $?"; - + @out = substitute(\@evalcmd, \%params); system(@out) == 0 or die "system @out failed: exit code $?"; - + } diff --git a/egs/babel/s5c/local/kwords2indices.pl b/egs/babel/s5c/local/kwords2indices.pl index 47cc3dc2741..776f66c5951 100755 --- a/egs/babel/s5c/local/kwords2indices.pl +++ b/egs/babel/s5c/local/kwords2indices.pl @@ -5,8 +5,8 @@ use Data::Dumper; $Data::Dumper::Indent = 1; -binmode STDOUT, ":utf8"; -binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; sub permute { @@ -16,10 +16,10 @@ sub permute { return map([$_], @$last); } - return map { - my $left = $_; + return map { + my $left = $_; map([@$left, $_], @$last) - } + } permute(@_); } @@ -32,8 +32,8 @@ sub permute { shift @ARGV; $map_oov = shift @ARGV; } if ($ARGV[0] eq "-f") { - shift @ARGV; - $field_spec = shift @ARGV; + shift @ARGV; + $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } @@ -46,7 +46,7 @@ sub permute { } } if (!defined $field_begin && !defined $field_end) { - die "Bad argument to -f option: $field_spec"; + die "Bad argument to -f option: $field_spec"; } } } @@ -61,7 +61,7 @@ sub permute { while() { @A = split(" ", $_); @A == 2 || die "bad line in symbol table file: $_"; - + if ( not defined( $sym2int{$A[0]} ) ) { $sym2int{$A[0]} = []; } diff --git a/egs/babel/s5c/local/kws_combine.sh b/egs/babel/s5c/local/kws_combine.sh index 33446915eac..f795c63aad9 100755 --- a/egs/babel/s5c/local/kws_combine.sh +++ b/egs/babel/s5c/local/kws_combine.sh @@ -17,9 +17,9 @@ # Script for system combination using minimum Bayes risk decoding. -# This calls lattice-combine to create a union of lattices that have been +# This calls lattice-combine to create a union of lattices that have been # normalized by removing the total forward cost from them. The resulting lattice -# is used as input to lattice-mbr-decode. This should not be put in steps/ or +# is used as input to lattice-mbr-decode. This should not be put in steps/ or # utils/ since the scores on the combined lattice must not be scaled. # begin configuration section. @@ -71,7 +71,7 @@ for i in `seq 0 $[num_sys-1]`; do offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. decode_dir=`echo $decode_dir | cut -d: -f1` [ -z "$offset" ] && offset=1 - + weight=$(perl -e "print ($offset/$total_sum);") if [ -f $decode_dir ] ; then systems+="$weight $decode_dir " diff --git a/egs/babel/s5c/local/kws_data_prep.sh b/egs/babel/s5c/local/kws_data_prep.sh index 909e9b2596c..3882c99ce6d 100755 --- a/egs/babel/s5c/local/kws_data_prep.sh +++ b/egs/babel/s5c/local/kws_data_prep.sh @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) # Apache 2.0. -# Begin configuration section. +# Begin configuration section. case_insensitive=true use_icu=true icu_transform="Any-Lower" @@ -21,11 +21,11 @@ help_message=" Note: most important output is keywords.fsts allowed switches: --case-sensitive # Shall we be case-sensitive or not? - # Please not the case-sensitivness depends + # Please not the case-sensitivness depends # on the shell locale! --use-uconv # Use the ICU uconv binary to normalize casing --icu-transform # When using ICU, use this transliteration - + " [ -f ./path.sh ] && . ./path.sh; # source the path. @@ -39,7 +39,7 @@ if [ $# -ne 3 ]; then fi set -u -set -e +set -e set -o pipefail langdir=$1; @@ -51,8 +51,8 @@ keywords=$kwsdatadir/kwlist.xml mkdir -p $kwsdatadir; cat $keywords | perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -75,8 +75,8 @@ if $case_insensitive && ! $use_icu ; then echo "$0: Running case insensitive processing" cat $langdir/words.txt | tr '[:lower:]' '[:upper:]' > $kwsdatadir/words.txt [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ - echo "$0: Warning, multiple words in dictionary differ only in case: " - + echo "$0: Warning, multiple words in dictionary differ only in case: " + cat $kwsdatadir/keywords.txt | tr '[:lower:]' '[:upper:]' | \ sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int @@ -84,7 +84,7 @@ elif $case_insensitive && $use_icu ; then echo "$0: Running case insensitive processing (using ICU with transform \"$icu_transform\")" cat $langdir/words.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" > $kwsdatadir/words.txt [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ - echo "$0: Warning, multiple words in dictionary differ only in case: " + echo "$0: Warning, multiple words in dictionary differ only in case: " paste <(cut -f 1 $kwsdatadir/keywords.txt ) \ <(cut -f 2 $kwsdatadir/keywords.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" ) |\ @@ -107,15 +107,21 @@ fi # Compile keywords into FSTs -if [ -z $silence_word ]; then - transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:$kwsdatadir/keywords.fsts +if [ -s $kwsdatadir/keywords.int ]; then + if [ -z $silence_word ]; then + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:$kwsdatadir/keywords.fsts + else + silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` + [ -z $silence_int ] && \ + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:- | \ + awk -v 'OFS=\t' -v silint=$silence_int '{if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; }' \ + > $kwsdatadir/keywords.fsts + fi else - silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` - [ -z $silence_int ] && \ - echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; - transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:- | \ - awk -v 'OFS=\t' -v silint=$silence_int '{if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; }' \ - > $kwsdatadir/keywords.fsts + echo "WARNING: $kwsdatadir/keywords.int is zero-size. That means no keyword" + echo "WARNING: was found in the dictionary. That might be OK -- or not." + touch $kwsdatadir/keywords.fsts fi # Create utterance id for each utterance @@ -129,7 +135,7 @@ cat $datadir/segments | \ $idx++; }' > $kwsdatadir/utter_id -# Map utterance to the names that will appear in the rttm file. You have +# Map utterance to the names that will appear in the rttm file. You have # to modify the commands below accoring to your rttm file cat $datadir/segments | awk '{print $1" "$2}' | sort | uniq > $kwsdatadir/utter_map; diff --git a/egs/babel/s5c/local/kws_data_prep_proxy.sh b/egs/babel/s5c/local/kws_data_prep_proxy.sh index 787cb009960..04cc59b6499 100755 --- a/egs/babel/s5c/local/kws_data_prep_proxy.sh +++ b/egs/babel/s5c/local/kws_data_prep_proxy.sh @@ -3,7 +3,7 @@ # Copyright 2014 Guoguo Chen # Apache 2.0. -# Begin configuration section. +# Begin configuration section. nj=8 cmd=run.pl beam=-1 # Beam for proxy FST, -1 means no prune @@ -15,6 +15,10 @@ phone_nbest=50 # Use top n best phone sequences in KxL2xE, -1 means all phone_cutoff=5 # We don't generate proxy keywords for OOV keywords that # have less phones than the specified cutoff as they may # introduce a lot false alarms +max_phone_cutoff=9990 # We don't generate proxy keywords for OOV keywords that + # have more than this phonemes. This can be used when + # we need to use different parameters for keywords of + # different lengths. confusion_matrix= # If supplied, using corresponding E transducer count_cutoff=1 # Minimal count to be considered in the confusion matrix; # will ignore phone pairs that have count less than this. @@ -38,13 +42,13 @@ if [ $# -ne 5 ]; then echo " data/local/tmp.lang/lexiconp.txt oov_lexicon.txt data/dev10h/kws/" echo "allowed options:" echo " --case-sensitive # Being case-sensitive or not" - echo " --icu-transform # Transliteration for upper/lower case" + echo " --icu-transform # Transliteration for upper/lower case" echo " # mapping" echo " --proxy-set # Keyword set for generating proxies" exit 1 fi -set -e +set -e set -o pipefail langdir=$1 @@ -62,8 +66,8 @@ keywords=$kwsdatadir/kwlist.xml mkdir -p $kwsdatadir/tmp/ cat $keywords | perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -103,7 +107,7 @@ if $case_insensitive; then else cat $l2_lexicon | sed 's/\s/ /g' > $kwsdatadir/tmp/L2.tmp.lex cp $kwsdatadir/raw_keywords_all.txt $kwsdatadir/keywords_all.txt - + cat $kwsdatadir/keywords_all.txt | \ sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt \ > $kwsdatadir/keywords_all.int @@ -139,11 +143,11 @@ cat $kwsdatadir/keywords_proxy.txt |\ # L1 since it is the lexicon used for the LVCSR training. cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 1 |\ paste -d ' ' - <(cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 2-|\ - sed 's/_[B|E|I|S]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ awk '{if(NF>=2) {print $0}}' > $kwsdatadir/tmp/L1.lex cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 1 |\ paste -d ' ' - <(cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 2-|\ - sed 's/_[B|E|I|S]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ awk '{if(NF>=2) {print $0}}' | perl -e ' ($lex1, $words) = @ARGV; open(L, "<$lex1") || die "Fail to open $lex1.\n"; @@ -230,8 +234,10 @@ cat $kwsdatadir/keywords_proxy.txt | perl -e ' print STEDRR "'$0': No pronunciation found for word: $col[$i]\n"; } } - if ($len >= '$phone_cutoff') { + if (($len >= '$phone_cutoff') && ($len <= '$max_phone_cutoff')){ print "$line\n"; + } elsif ($len > '$max_phone_cutoff'){ + print STDERR "'$0': Keyword $col[0] is too long, not generating proxy\n"; } else { print STDERR "'$0': Keyword $col[0] is too short, not generating proxy\n"; } @@ -256,7 +262,7 @@ cat $datadir/segments | \ $idx++; }' > $kwsdatadir/utter_id -# Map utterance to the names that will appear in the rttm file. You have +# Map utterance to the names that will appear in the rttm file. You have # to modify the commands below accoring to your rttm file cat $datadir/segments | awk '{print $1" "$2}' |\ sort | uniq > $kwsdatadir/utter_map; diff --git a/egs/babel/s5c/local/kws_data_prep_syllables.sh b/egs/babel/s5c/local/kws_data_prep_syllables.sh deleted file mode 100755 index c6245e52c9e..00000000000 --- a/egs/babel/s5c/local/kws_data_prep_syllables.sh +++ /dev/null @@ -1,144 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) -# Apache 2.0. - -# Begin configuration section. -silence_word= # Optional silence word to insert (once) between words of the transcript. -# End configuration section. - -echo $0 "$@" - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - - -if [ $# -ne 4 ]; then - echo "Usage: local/kws_data_prep_syllables.sh [options] " - echo " e.g.: local/kws_data_prep_syllables.sh data/lang/ data/dev10h/ SIL data/kws/" - echo "Input is in : kwlist.xml, ecf.xml (rttm file not needed)." - echo "The lang directory is expected to be syllable-level. The syllable-lexicon " - echo "is a text file with lines of the form:" - echo "word syllable1 syllable2" - echo "This script is as kws_data_prep.sh, except that the output keywords.fsts" - echo "contains the various alternative syllable-level pronunciations of the input" - echo "words." - echo "Output is in : keywords.txt, kwlist_invocab.xml," - echo " kwlist_outvocab.xml, keywords.fsts; note that the only syllable-level" - echo " output (and the only one that really matters) is keywords.fsts" - echo "Note: most important output is keywords.fsts" - echo " Options:" - echo " --silence-word # Note, this is required. It is a word, e.g. SIL," - echo " # in the syllable lexicon, that's optional." - exit 1; -fi - -langdir=$1; -datadir=$2; -syllable_lexicon=$3 -kwsdatadir=$4 -keywords=$kwsdatadir/kwlist.xml - -[ -z $silence_word ] && echo "--silence-word option is required" && exit 1; - -mkdir -p $kwsdatadir; - -cat $keywords | perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; - - use XML::Simple; - use Data::Dumper; - - my $data = XMLin(\*STDIN); - - #print Dumper($data->{kw}); - foreach $kwentry (@{$data->{kw}}) { - #print Dumper($kwentry); - print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; - } -' > $kwsdatadir/keywords.txt - -[ ! -s "$syllable_lexicon" ] && echo "No such file '$syllable_lexicon' (syllable lexicon), or empty file." && exit 1; - -# The word symbols on the first entry of $syllable_lexicon will be given a symbol-table -# file. We just use this symbol table in this script; the values will never appear -# elsewhere. - -mkdir -p $kwsdatadir/temp - -# Remove any lines with symbols we don't have in our symbol vocabulary. -temp_syllable_lexicon=$kwsdatadir/temp/syllable_lexicon.in -cat $syllable_lexicon | sym2int.pl --map-oov 123456789 -f 2- $langdir/words.txt | grep -v -w 123456789 | \ - int2sym.pl -f 2- $langdir/words.txt > $temp_syllable_lexicon - -n1=`cat $syllable_lexicon | wc -l` -n2=`cat $temp_syllable_lexicon | wc -l` -echo "After removing OOV symbols from word-to-syllable lexicon, #lines changed from $n1 to $n2" - - -if $case_insensitive; then - echo "Running case insensitive processing" - # we turn the first element of each line of $temp_syllable_lexicon into upper case. - tr '[:lower:]' '[:upper:]' < $temp_syllable_lexicon | awk '{print $1}' | \ - paste - <(awk '{for(n=2;n<=NF;n++) { printf("%s ", $n); } print ""; }' <$temp_syllable_lexicon) \ - > $kwsdatadir/temp/syllable_lexicon.txt || exit 1; - - # We turn all but the first element of each line in $kwsdatadir/keywords.txt - # into upper case. - tr '[:lower:]' '[:upper:]' < $kwsdatadir/keywords.txt | \ - awk '{for(n=2;n<=NF;n++) { printf("%s ", $n); } print ""; }' | \ - paste <(awk '{print $1}' <$kwsdatadir/keywords.txt) - \ - > $kwsdatadir/temp/keywords.txt || exit 1; -else - cp $temp_syllable_lexicon $kwsdatadir/temp/syllable_lexicon.txt || exit 1; - cp $kwsdatadir/keywords.txt $kwsdatadir/temp/ || exit 1; -fi - -cat $kwsdatadir/temp/syllable_lexicon.txt | awk '{print $1}' | sort | uniq | \ - awk 'BEGIN{print " 0";} {print $1, NR;}' > $kwsdatadir/temp/words.txt - -sym2int.pl --map-oov 0 -f 2- $kwsdatadir/temp/words.txt < $kwsdatadir/temp/keywords.txt \ - > $kwsdatadir/temp/keywords_all.int - -cat $kwsdatadir/temp/keywords_all.int | \ - grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int - -cut -f 1 -d ' ' $kwsdatadir/keywords.int | \ - local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_invocab.xml - -cat $kwsdatadir/temp/keywords_all.int | \ - egrep " 0 | 0$" | cut -f 1 -d ' ' | \ - local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_outvocab.xml - -local/make_lexicon_fst_special.pl $kwsdatadir/temp/syllable_lexicon.txt $silence_word | \ - sym2int.pl -f 4 $kwsdatadir/temp/words.txt | \ - sym2int.pl -f 3 $langdir/words.txt | \ - fstcompile | \ - fstarcsort --sort_type=olabel > $kwsdatadir/temp/L.fst || exit 1; - -# Compile keywords into FSTs, compose with lexicon to get syllables -# and project on the input (keeping only syllable labels), -# before writing to keywords.fsts - -transcripts-to-fsts ark:$kwsdatadir/keywords.int ark:- | \ - fsttablecompose $kwsdatadir/temp/L.fst ark:- ark,t:- | \ - awk '{if (NF < 4) { print; } else { print $1, $2, $3, $3, $5; }}' > \ - $kwsdatadir/keywords.fsts - -# Create utterance id for each utterance -cat $datadir/segments | \ - awk '{print $1}' | \ - sort | uniq | perl -e ' - $idx=1; - while(<>) { - chomp; - print "$_ $idx\n"; - $idx++; - }' > $kwsdatadir/utter_id - -# Map utterance to the names that will appear in the rttm file. You have -# to modify the commands below accoring to your rttm file -cat $datadir/segments | awk '{print $1" "$2}' | sort | uniq > $kwsdatadir/utter_map; - -echo "Kws data preparation succeeded" diff --git a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh index aa9e22cca96..b73112b191d 100755 --- a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh +++ b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) # Apache 2.0. -# Begin configuration section. +# Begin configuration section. cmd=run.pl duptime=0.5 model=final.mdl @@ -35,8 +35,8 @@ mkdir -p $oracledir/log for filename in $lang/words.txt $decodedir/num_jobs \ $data/text $decodedir/lat.1.gz \ $decodedir/../$model ; do - if [[ ! -f $filename ]] ; then - echo "FATAL: File $filename does not exist!" + if [[ ! -f $filename ]] ; then + echo "FATAL: File $filename does not exist!" exit 1; fi done @@ -44,7 +44,7 @@ done nj=`cat $decodedir/num_jobs` (cd $decodedir; ln -s ../$model final.mdl ) -(cd $oracledir; echo "$nj" > num_jobs ) +(cd $oracledir; echo "$nj" > num_jobs ) $cmd LAT=1:$nj $oracledir/log/lat.LAT.log \ cat $data/text \| \ diff --git a/egs/babel/s5c/local/kws_oracle.sh b/egs/babel/s5c/local/kws_oracle.sh index 44334ba1413..c7aa661664f 100755 --- a/egs/babel/s5c/local/kws_oracle.sh +++ b/egs/babel/s5c/local/kws_oracle.sh @@ -1,23 +1,23 @@ #!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal) -# 2013 Johns Hopkins University +# 2013 Johns Hopkins University # Apache 2.0. . ./path.sh . ./cmd.sh -# Begin configuration section. +# Begin configuration section. cmd=run.pl -acwt=0.09091 #Acoustic weight -- should not be necessary for oracle lattices +acwt=0.09091 #Acoustic weight -- should not be necessary for oracle lattices duptime=0.6 #Max time difference in which the occurences of the same KW will be seen as duplicates text= # an alternative reference text to use. when not specified, the /text will be used -model= # acoustic model to use +model= # acoustic model to use extraid= # kws setup extra ID (kws task was setup using kws_setup.sh --extraid stage=0 # to resume the computation from different stage # End configuration section. -set -e +set -e set -o pipefail echo "$0 $@" # Print the command line for logging @@ -47,7 +47,7 @@ fi if [ -z "$model" ]; then # if --model was not specified on the command line... srcdir=`dirname $decodedir`; # The model directory is one level up from decoding directory. - model=$srcdir/final.mdl; + model=$srcdir/final.mdl; fi if [ -z $extraid ] ; then # the same logic as with kws_setup.sh @@ -59,7 +59,7 @@ fi nj=`cat $decodedir/num_jobs`; oracledir=$decodedir/kws_oracle -mkdir -p $oracledir +mkdir -p $oracledir mkdir -p $oracledir/log if [ $stage -le 0 ] ; then @@ -119,17 +119,17 @@ if [ $stage -le 4 ]; then echo "=======================================================" ( echo -n "ATWV-full " - grep Occurrence $oracledir/sum.txt | cut -d '|' -f 13 + grep Occurrence $oracledir/sum.txt | cut -d '|' -f 13 ) #-( #-echo -n "ATWV-invocab " - #-grep Occurrence $oracledir/invocab.sum.txt | cut -d '|' -f 13 + #-grep Occurrence $oracledir/invocab.sum.txt | cut -d '|' -f 13 #-) || echo "Error occured getting the invocab results" #-( #-echo -n "ATWV-outvocab " - #-grep Occurrence $oracledir/outvocab.sum.txt | cut -d '|' -f 13 + #-grep Occurrence $oracledir/outvocab.sum.txt | cut -d '|' -f 13 #-) || echo "Error occured getting the outvocab results" echo "=======================================================" diff --git a/egs/babel/s5c/local/kws_score_f4de.sh b/egs/babel/s5c/local/kws_score_f4de.sh index d761e080c1c..cd6948a8a08 100755 --- a/egs/babel/s5c/local/kws_score_f4de.sh +++ b/egs/babel/s5c/local/kws_score_f4de.sh @@ -16,11 +16,11 @@ help_message="$0: score the kwslist using the F4DE scorer from NIST Example: $0 [additional-parameters] where the most important additional parameters can be: - --extraid #for using, when a non-default kws tasks are setup + --extraid #for using, when a non-default kws tasks are setup (using the kws_setup.sh --extraid) for a kaldi-single data-dir --kwlist #allows for an alternative kwlist -- if not set, the default kwlist is taken from - --f4de-prefix #allows for scoring the same results using + --f4de-prefix #allows for scoring the same results using different kwlists and storing them in the same dir " echo $0 $@ @@ -72,8 +72,9 @@ done echo KWSEval -e $ecf -r $rttm -t $kwlist \ -s $kwsoutputdir/kwslist.xml -c -o -b -d -f $kwsoutputdir -KWSEval -e $ecf -r $rttm -t $kwlist \ - -s $kwsoutputdir/kwslist.xml -c -o -b -d -f ${kwsoutputdir}${f4de_prefix} || exit 1; +KWSEval -e $ecf -r $rttm -t $kwlist -a --zGlobalMeasures MAP \ + --zGlobalMeasures MAPpct --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -s $kwsoutputdir/kwslist.xml -c -o -b -d -f ${kwsoutputdir}${f4de_prefix} || exit 1; duration=`cat ${kwsoutputdir}${f4de_prefix}/sum.txt | grep TotDur | cut -f 3 -d '|' | sed "s/\s*//g"` diff --git a/egs/babel/s5c/local/kws_search.sh b/egs/babel/s5c/local/kws_search.sh index 4b275048e0e..9e998d6c3f9 100755 --- a/egs/babel/s5c/local/kws_search.sh +++ b/egs/babel/s5c/local/kws_search.sh @@ -10,7 +10,7 @@ help_message="$(basename $0): do keyword indexing and search. data-dir is assum Usage: $(basename $0) " -# Begin configuration section. +# Begin configuration section. #acwt=0.0909091 min_lmwt=7 max_lmwt=17 @@ -101,7 +101,7 @@ if [ ! -z "$model" ]; then else model_flags= fi - + if [ $stage -le 0 ] ; then if [ ! -f $indices_dir/.done.index ] ; then @@ -109,8 +109,8 @@ if [ $stage -le 0 ] ; then for lmwt in `seq $min_lmwt $max_lmwt` ; do indices=${indices_dir}_$lmwt mkdir -p $indices - - acwt=`perl -e "print (1.0/$lmwt);"` + + acwt=`perl -e "print (1.0/$lmwt);"` [ ! -z $silence_word ] && silence_opt="--silence-word $silence_word" steps/make_index.sh $silence_opt --cmd "$cmd" --acwt $acwt $model_flags\ --skip-optimization $skip_optimization --max-states $max_states \ diff --git a/egs/babel/s5c/local/kws_setup.sh b/egs/babel/s5c/local/kws_setup.sh index f1036f100de..a6b87ef004f 100755 --- a/egs/babel/s5c/local/kws_setup.sh +++ b/egs/babel/s5c/local/kws_setup.sh @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) # Apache 2.0. -# Begin configuration section. +# Begin configuration section. cmd=run.pl case_insensitive=true subset_ecf= @@ -18,7 +18,7 @@ silence_word= # Optional silence word to insert (once) between words of the tra echo "$0 $@" # Print the command line for logging -set -e +set -e set -u set -o pipefail @@ -26,13 +26,13 @@ help_message="$0: Initialize and setup the KWS task directory Usage: $0 [rttm-file] allowed switches: - --subset-ecf /path/to/filelist # The script will subset the ecf file + --subset-ecf /path/to/filelist # The script will subset the ecf file # to contain only the files from the filelist --rttm-file /path/to/rttm # the preferred way how to specify the rttm - # the older way (as an in-line parameter is + # the older way (as an in-line parameter is # obsolete and will be removed in near future --case-insensitive # Shall we be case-sensitive or not? - # Please not the case-sensitivness depends + # Please not the case-sensitivness depends # on the shell locale! --use-icu # Use the ICU uconv binary to normalize casing --icu-transform # When using ICU, use this transliteration @@ -85,13 +85,13 @@ fi mkdir -p $kwsdatadir if [ -z $subset_ecf ] ; then - test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml + test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml cp "$ecf_file" $kwsdatadir/ecf.xml || exit 1 else local/make_ecf_subset.sh $subset_ecf $ecf_file > $kwsdatadir/ecf.xml fi -if $kwlist_wordlist ; then +if $kwlist_wordlist ; then ( echo '' awk '{ printf(" \n", $1); diff --git a/egs/babel/s5c/local/lattice_to_ctm.sh b/egs/babel/s5c/local/lattice_to_ctm.sh index 08a1b5889a7..5fbde42d237 100755 --- a/egs/babel/s5c/local/lattice_to_ctm.sh +++ b/egs/babel/s5c/local/lattice_to_ctm.sh @@ -39,8 +39,7 @@ if [ -z "$model" ] ; then fi -for f in $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do +for f in $lang/words.txt $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; done @@ -49,17 +48,31 @@ name=`basename $data`; # e.g. eval2000 mkdir -p $dir/scoring/log if [ $stage -le 0 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - set -e -o pipefail \; \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-prune --beam=$beam ark:- ark:- \| \ - lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; + if [ ! -f $lang/phones/word_boundary.int ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + fi fi if [ $stage -le 1 ]; then @@ -76,12 +89,12 @@ if [ $stage -le 1 ]; then grep -v -E '' | \ perl -e '@list = (); %list = (); while(<>) { - chomp; - @col = split(" ", $_); + chomp; + @col = split(" ", $_); push(@list, $_); - $key = "$col[0]" . " $col[1]"; + $key = "$col[0]" . " $col[1]"; $list{$key} = 1; - } + } foreach(sort keys %list) { $key = $_; foreach(grep(/$key/, @list)) { diff --git a/egs/babel/s5c/local/lattice_to_ctm_syllable.sh b/egs/babel/s5c/local/lattice_to_ctm_syllable.sh deleted file mode 100755 index 7165a7a04e5..00000000000 --- a/egs/babel/s5c/local/lattice_to_ctm_syllable.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -decode_mbr=true -beam=4 # Use a fairly narrow beam because lattice-align-words is slow-ish. -word_ins_penalty=0.5 -min_lmwt=7 -max_lmwt=17 -cleanup=true -model= - -#end configuration section. - -#debugging stuff -echo $0 $@ - -[ -f ./path.sh ] && . ./path.sh -[ -f ./cmd.sh ] && . ./cmd.sh -. parse_options.sh || exit 1; - -if [ $# -ne 4 ]; then - echo "Usage: $0 [options] " && exit; - echo "This is as lattice_to_ctm.sh, but for syllable-based systems where we want to" - echo "obtain word-level ctms. Here, is a directory like data/local/w2s," - echo "as created by run-6-syllables.sh. It contains:" - echo " G.fst, Ldet.fst, words.txt, word_align_lexicon.int" - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1) # (createCTM | filterCTM )." - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -w2sdir=$3 -dir=$4 - -if [ -z "$model" ] ; then - model=`dirname $dir`/final.mdl # Relative path does not work in some cases - #model=$dir/../final.mdl # assume model one level up from decoding dir. - #[ ! -f $model ] && model=`(set +P; cd $dir/../; pwd)`/final.mdl -fi - -for f in $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz \ - $w2sdir/{G.fst,Ldet.fst,words.txt,word_align_lexicon.int}; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -# we are counting the LM twice since we have both the original, syllable-level LM -# and the new, word-level one, so we scale by 0.5 to get a reasonably scaled -# LM cost. - -if [ $stage -le 0 ]; then - nj=`cat $dir/num_jobs` || exit 1; - $cmd JOB=1:$nj $dir/scoring/log/get_word_lats.JOB.log \ - lattice-compose "ark:gunzip -c $dir/lat.JOB.gz|" $w2sdir/Ldet.fst ark:- \| \ - lattice-determinize ark:- ark:- \| \ - lattice-compose ark:- $w2sdir/G.fst ark:- \| \ - lattice-scale --lm-scale=0.5 ark:- "ark:|gzip -c >$dir/wlat.JOB.gz" || exit 1; -fi - -if [ $stage -le 1 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/wlat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-prune --beam=$beam ark:- ark:- \| \ - lattice-push ark:- ark:- \| \ - lattice-align-words-lexicon --max-expand=10 --output-if-empty=true $w2sdir/word_align_lexicon.int $model ark:- ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ - utils/int2sym.pl -f 5 $w2sdir/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 2 ]; then - # Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $x.bkup1; - cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION|\(\(\)\)' | \ - grep -v -E '' | \ - grep -v -E '' | \ - grep -v -E '' | \ - grep -v -E '' | \ - grep -v -E '' | \ - perl -e '@list = (); %list = (); - while(<>) { - chomp; - @col = split(" ", $_); - push(@list, $_); - $key = "$col[0]" . " $col[1]"; - $list{$key} = 1; - } - foreach(sort keys %list) { - $key = $_; - foreach(grep(/$key/, @list)) { - print "$_\n"; - } - }' > $x; - done -fi - -$cleanup && rm $dir/wlat.*.gz - -echo "Lattice2CTM finished on " `date` -exit 0 diff --git a/egs/babel/s5c/local/make_L_align.sh b/egs/babel/s5c/local/make_L_align.sh index 03d1ad517fe..50e46a00493 100755 --- a/egs/babel/s5c/local/make_L_align.sh +++ b/egs/babel/s5c/local/make_L_align.sh @@ -20,7 +20,7 @@ set -e if [ $# -ne 3 ]; then echo "This is a simple script that will generate the L_align.fst" - echo "The FST L_align.fst is used for getting the force-aligned " + echo "The FST L_align.fst is used for getting the force-aligned " echo "utterances" echo "The script automaticky recognizes the probabilistic lexicon" echo "is used and will use the correct file" @@ -39,7 +39,7 @@ silphone=`cat $dir/phones/optional_silence.txt` || exit 1; # Create lexicon with alignment info if [ -f $tmpdir/lexicon.txt ] ; then cat $tmpdir/lexicon.txt | \ - awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' + awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' elif [ -f $tmpdir/lexiconp.txt ] ; then cat $tmpdir/lexiconp.txt | \ awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' diff --git a/egs/babel/s5c/local/make_ecf_subset.sh b/egs/babel/s5c/local/make_ecf_subset.sh index 53bddcbc839..9bdd95c3e27 100755 --- a/egs/babel/s5c/local/make_ecf_subset.sh +++ b/egs/babel/s5c/local/make_ecf_subset.sh @@ -8,7 +8,7 @@ echo "$0 $@" 1>&2 # Print the command line for logging [ -f ./path.sh ] && . ./path.sh; # source the path. . parse_options.sh || exit 1; -help_message="$0: generates an subset ecf file for spoken term detection evaluation. +help_message="$0: generates an subset ecf file for spoken term detection evaluation. The first parameter specifies the descriptor of the subset, the second parameter specifies the original ecf file. The file will be generated in the kws subdirectory of the directory @@ -47,6 +47,6 @@ duration=`grep -F -f $list_file $src_ecf_file | sed "s/.*dur=\"\([0-9.][0-9.]*\) # Output is produced here: ( grep "" ) diff --git a/egs/babel/s5c/local/make_lexicon_fst_special.pl b/egs/babel/s5c/local/make_lexicon_fst_special.pl index 976c28c029c..3df6e7a9527 100755 --- a/egs/babel/s5c/local/make_lexicon_fst_special.pl +++ b/egs/babel/s5c/local/make_lexicon_fst_special.pl @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (author: Daniel Povey) # makes lexicon FST -- special version only for use in keyword search -# for allowing optional silences between words. This version has +# for allowing optional silences between words. This version has # no pron-probs involved, and # does support an optional silence, but this silence is only allowed # between words (where it may occur an arbitrary number of times), diff --git a/egs/babel/s5c/local/make_lexicon_subset.sh b/egs/babel/s5c/local/make_lexicon_subset.sh index c2bf0e21623..1e77fcaa2b9 100755 --- a/egs/babel/s5c/local/make_lexicon_subset.sh +++ b/egs/babel/s5c/local/make_lexicon_subset.sh @@ -10,9 +10,9 @@ input_lexicon_file=$2 output_lexicon_file=$3 ( - #find $dev_data_dir/transcription/ -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' + #find $dev_data_dir/transcription/ -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' find $transcriptions -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' -) | sort -u | awk ' +) | sort -u | awk ' BEGIN { while(( getline line< ARGV[2] ) > 0 ) { split(line, e, "\t") @@ -20,7 +20,7 @@ output_lexicon_file=$3 } FILENAME="-" i=0 - + while(( getline word< ARGV[1] ) > 0 ) { if (word in LEXICON) print LEXICON[word] diff --git a/egs/babel/s5c/local/make_pitch.sh b/egs/babel/s5c/local/make_pitch.sh deleted file mode 100755 index 107016d78a9..00000000000 --- a/egs/babel/s5c/local/make_pitch.sh +++ /dev/null @@ -1,307 +0,0 @@ -#!/bin/bash - -# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) -# Bagher BabaAli -# Apache 2.0 -# To be run from .. (one directory up from here) -# This makes two-dimension p(voicing) and pitch features for some data/ directory. - -# Begin configuration section. -nj=4 -cmd=run.pl -stage=0 -pitch_config= -interpolate_pitch_opts= -process_pitch_opts= -cleanup=true -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -if [ -f path.sh ]; then . ./path.sh; fi -. parse_options.sh || exit 1; - -if [ $# != 3 ]; then - echo "Usage: make_pitch.sh [options] "; - echo "Makes two dimensional [p(voicing), pitch] features, based on SAcC pitch" - echo "extractor followed by some normalization and smoothing" - echo "E.g.: make_pitch.sh data/train_pitch exp/make_pitch_train plp/" - echo "Options: " - echo " --pitch-config # config passed to compute-pitch-feats " - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - exit 1; -fi - -data=$1 -expdir=$2 -pitchdir=$3 - -# make $pitchdir an absolute pathname. -pitchdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $pitchdir ${PWD}` -# make $expdir an absolute pathname. -expdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $expdir ${PWD}` - -# use "name" as part of name of the archive. -name=`basename $data` - -mkdir -p $pitchdir || exit 1; -mkdir -p $expdir/log || exit 1; - -scp=$data/wav.scp - -[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. - -( # this is for back compatiblity: - cd $KALDI_ROOT/tools - if [ -d sacc ] && [ ! -d pitch_trackers/sacc ]; then - echo "Linking sacc directory to new location." - mkdir -p pitch_trackers - cd pitch_trackers - ln -s ../sacc .. - fi -) - -sacc_dir=$KALDI_ROOT/tools/pitch_trackers/sacc/SAcC_GLNXA64/ -# make $sacc_dir an absolute pathname. -sacc_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $sacc_dir ${PWD}` - -sacc_script=$sacc_dir/run_SAcC.sh -sacc_config=$sacc_dir/conf/Babelnet_sr8k_bpo6_sb24_k10.config - -if [ ! -f $sacc_script ]; then - echo "*Expecting the script $sacc_script to exist" - echo "*cd to $KALDI_ROOT/tools/, and run extras/install_sacc.sh" - echo "*Re-run this script when it is installed." - exit 1; -fi - -required="$scp $pitch_config $sacc_config" - -for f in $required; do - if [ ! -f $f ]; then - echo "make_pitch.sh: no such file $f" - exit 1; - fi -done - -# note: in general, the double-parenthesis construct in bash "((" is "C-style -# syntax" where we can get rid of the $ for variable names, and omit spaces. -# The "for" loop in this style is a special construct. - -basename=`basename $data` -wavdir=$pitchdir/temp_wav_$basename -mkdir -p $wavdir - -if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then - wav_scp=$expdir/wav.scp - cat $data/segments | awk -v dir=$wavdir '{key=$1; printf("%s %s/%s.wav\n", key, dir, key);}' \ - > $wav_scp || exit 1; - - if [ -f $data/segments ]; then - echo "$0 [info]: segments file exists: creating temporary wav files in $wavdir" - segments=$data/segments - else - # create a fake segments file that takes the whole file; this is an easy way - # to copy to static wav files. Note: probably this has not been tested. - cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments - segments=$expdir/fake_segments - fi - if [ $stage -le 0 ]; then - echo "Extracting wav-file segments (or just converting to wav format)" - $cmd $expdir/log/extract-segments.log \ - extract-segments scp:$data/wav.scp $segments scp:$wav_scp || exit 1; - fi -else - echo "No segments file exists, and wav scp is plain: using wav files as input." - wav_scp=$data/wav.scp -fi - -wav_checked_scp=$expdir/wav_checked.scp -cat $wav_scp | \ - perl -ane '@A=split; if (-f $A[1]) { print; }' >$wav_checked_scp -nl_orig=`cat $wav_scp | wc -l` -nl_new=`cat $wav_checked_scp | wc -l` - -echo "After removing non-existent files, number of utterances decreased from $nl_orig to $nl_new"; -[ $nl_new -eq 0 ] && exit 1; - -# now $wav_scp is an scp file for the per-utterance wav files. - -# Split up the wav files into multiple lists. -split_wavs="" -for ((n=1; n<=nj; n++)); do - split_wavs="$split_wavs $expdir/split_wavs.$n.scp" -done -utils/split_scp.pl $wav_checked_scp $split_wavs || exit 1; - -# For each wav file, create corresponding temporary pitch file, in the -# format the SAcC outputs: [ 0 frame pitch p(voicing) ] -temp_pitchdir=$pitchdir/temp_pitch_$basename -mkdir -p $temp_pitchdir - -for ((n=1; n<=nj; n++)); do - mkdir -p $temp_pitchdir/$n - cat $expdir/split_wavs.$n.scp | awk -v pdir=$temp_pitchdir -v n=$n \ - '{key=$1; wavfile=$2; printf("%s,%s/%s/%s.pitch\n", wavfile, pdir, n, key);}' \ - > $expdir/sacc_flist.$n || exit 1 -done - -if [ $stage -le 1 ]; then - # Need to do this in director $sacc_dir as some of the things in its config - # are relative pathnames. - $cmd JOB=1:$nj $d/$expdir/log/sacc.JOB.log \ - cd $sacc_dir '&&' $sacc_script $expdir/sacc_flist.JOB $sacc_config || exit 1; -fi - -# I don't want to put a separate script in svn just for this, so creating a temporary -# script file in the experimental directory. Quotes around 'EOF' disable any -# interpretation in the here-doc. -cat <<'EOF' > $expdir/convert.sh -#!/bin/bash -sacc_flist=$1 -scpfile=$2 -[ $# -ne 2 ] && echo "Usage: convert.sh " && exit 1; - -for f in `cat $sacc_flist | cut -d, -f2`; do - g=`echo $f | sed s:.pitch$:.mat:` - if [ -f $f ]; then - cat $f | awk 'BEGIN{printf("[ "); } {print $4, $3;} END{ print "]"; }' > $g - rm $f - fi -done -cat $sacc_flist | cut -d, -f2 | \ - perl -ane 'm:/([^/]+)\.pitch$: || die "Bad line $_"; $key=$1; s/\.pitch$/\.mat/; print "$key $_";' > $scpfile -EOF -chmod +x $expdir/convert.sh - -if [ $stage -le 2 ]; then - echo "Converting format from .pitch to .mat (kaldi-readable format)" - $cmd JOB=1:$nj $expdir/log/convert.JOB.log \ - $expdir/convert.sh $expdir/sacc_flist.JOB $expdir/mat.scp.JOB || exit 1; -fi - -if [ $stage -le 3 ]; then - echo "Doing final processing (interpolation, smoothing, etc.) on pitch features" - $cmd JOB=1:$nj $expdir/log/process.JOB.log \ - interpolate-pitch $interpolate_pitch_opts scp:$expdir/mat.scp.JOB ark:- \| \ - process-pitch-feats $process_pitch_opts ark:- \ - ark,scp:$pitchdir/${basename}_pitch.JOB.ark,$pitchdir/${basename}_pitch.JOB.scp || exit 1; -fi - -echo "Creating $data/feats.scp" -for ((n=1; n<=nj; n++)); do cat $pitchdir/${basename}_pitch.$n.scp; done > $data/feats.scp - -if $cleanup; then - echo "Removing temporary files" - rm -r $wavdir $temp_pitchdir -fi - -echo "Finished extracting pitch features for $basename" - -debug=~/temp2.m -echo "A = [" > $debug -copy-feats scp:$data/feats.scp ark,t:- | grep -v ']' | grep -v '\[' | awk '{if (NF == 2) { print; }}' | head -n 200000 \ - >> $debug - -cat <<'EOF' >>$debug -]; -pov = A(:, 1); -pitch = A(:, 2); -subplot(2, 2, 1); -hist(pov, 30); -legend('pov') -subplot(2, 2, 2); -hist(pitch, 30); -legend('pitch') - -len=size(pov, 1); -povD = pov(1:len-1) - pov(2:len); -subplot(2, 2, 3); -hist(povD, 30); -legend('delta-pov') - -pitchD = pitch(1:len-1) - pitch(2:len); -pitchD = max(pitchD, -0.05); -pitchD = min(pitchD, 0.05); -subplot(2, 2, 4); -hist(pitchD, 50); -legend('delta-pitch'); - -print -deps 'C.eps' -EOF - -exit 0; - - -# Here's - -#copy-feats scp:plp/train_pitch_pitch.10.scp ark,t:- | grep -v ']' | grep -v '\[' | awk '{if (NF == 2) { print; }}' | head -n 200000 > ~/temp2.m - -# -### data goes here. -#]; - - - -# rm $expdir/.error 2>/dev/null - -# # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" -# # done - -# # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# # extract-segments scp:$scp $expdir/segments.JOB ark:- \| \ -# # compute-pitch-feats --verbose=2 --config=$pitch_config ark:- \ -# # ark,scp:$pitchdir/raw_pitch_$name.JOB.ark,$pitchdir/raw_pitch_$name.JOB.scp \ -# # `pwd`/$expdir/JOB || exit 1; - -# $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# extract-segments scp:$scp $expdir/segments.JOB ark:- \| \ -# local/SAcC.sh $expdir/wav.JOB.scp $pitchdir $name.JOB || exit 1; - -# else -# echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." -# split_scps="" -# for ((n=1; n<=nj; n++)); do -# split_scps="$split_scps $expdir/wav.$n.scp" -# done - -# utils/split_scp.pl $scp $split_scps || exit 1; - -# # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" -# # done - -# # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# # compute-pitch-feats --verbose=2 --config=$pitch_config scp:$expdir/wav.JOB.scp \ -# # ark,scp:$pitchdir/raw_pitch_$name.JOB.ark,$pitchdir/raw_pitch_$name.JOB.scp \ -# # $expdir/JOB || exit 1; - -# pushd $sacc_dir -# $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# cd $sacclocal/SAcC.sh $expdir/wav.JOB.scp $pitchdir $name.JOB || exit 1; -# fi - - -# if [ -f $expdir/.error.$name ]; then -# echo "Error producing pitch features for $name:" -# tail $expdir/make_pitch.*.log -# exit 1; -# fi - -# # concatenate the .scp files together. -# for ((n=1; n<=nj; n++)); do -# cat $pitchdir/raw_pitch_$name.$n.scp >> $data/pitchs.scp || exit 1; -# done > $data/pitchs.scp - -# rm $expdir/wav.*.scp $expdir/segments.* 2>/dev/null - -# nf=`cat $data/pitchs.scp | wc -l` -# nu=`cat $data/utt2spk | wc -l` -# if [ $nf -ne $nu ]; then -# echo "It seems not all of the feature files were successfully ($nf != $nu);" -# echo "consider using utils/fix_data_dir.sh $data" -# fi - -# echo "Succeeded creating PITCH features for $name" diff --git a/egs/babel/s5c/local/make_syllable_lexicon.sh b/egs/babel/s5c/local/make_syllable_lexicon.sh deleted file mode 100755 index 118845982b9..00000000000 --- a/egs/babel/s5c/local/make_syllable_lexicon.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash - - -help="Usage: $(basename $0) - E.g. $(basename $0) data/local/lexicon.txt word2syllable_lexicon.txt data/local/syllables/lexicon.txt - Here, is the text-form lexicon but with tabs separating the syllables, e.g. - WORD w o rr d - has entries of the form - WORD w/o rr/d - has entries of the form - w/o w o" - -# config vars: -pron_probs=false # If you set --pron-probs true, will expect pron-prob on input lexicon and produce - # pron-probs on word2syllable lexicon. -# end configs. -. utils/parse_options.sh - -if [ $# != 3 ]; then - echo $help 2>&1; - exit 1; -fi - -lex_in=$1 -w2s_lex_out=$2 -s2p_lex_out=$3 - -[ ! -f $lex_in ] && echo "No such file $lex_in" && exit 1; -mkdir -p `dirname $w2s_lex_out` -mkdir -p `dirname $s2p_lex_out` - -cat $lex_in | perl -e ' - ($w2s, $pron_probs) = @ARGV; - open(W2S, ">$w2s") || die "opening word to syllable lexicon"; - $saw_tabs = 0; - while() { - chop; - if ($pron_probs eq "true") { - m:(\S+)\s+(\S+)\s+(.+): || die "Bad line $_ (note: have pron probs)."; - $word = $1; - $prob = $2; - $pron = $3; - ($prob > 0.0 && $prob <= 1.0) || die "Bad pron-prob $prob in line $_"; - print W2S "$word $prob"; - } else { - m:(\S+)\s+(.+): || die "Bad line $_ (note: do not have pron probs)."; - $word = $1; - $pron = $2; - print W2S "$word"; - } - @A = split("\t", $pron); - @A >= 1 || die "Bad lexicon line $_\n"; - if (@A > 1) { $saw_tabs = 1; } - foreach $s (@A) { - $s =~ s/^\s+//; # Remove leading space. - $s =~ s/\s+$//; # Remove trailing space. - if ($s ne "") { - $s =~ m:/: && die "slash (/) present in syllable $s (not allowed)\n"; - $t = join("/", split(" ", $s)); # replace spaces with / - print W2S " $t"; - print "$t $s\n"; - } - } - print W2S "\n"; - } - if (! $saw_tabs) { - die "You seem to be using as input to this script, a lexicon that does not have " . - "syllables separated by tabs."; - } - ' $w2s_lex_out $pron_probs | sort | uniq > $s2p_lex_out || exit 1; - -exit 0; diff --git a/egs/babel/s5c/local/naive_comb.pl b/egs/babel/s5c/local/naive_comb.pl index e49ac972169..74ad20d84e3 100755 --- a/egs/babel/s5c/local/naive_comb.pl +++ b/egs/babel/s5c/local/naive_comb.pl @@ -102,7 +102,7 @@ sub KwslistTimeCompare { } } else { $a->[0] cmp $b->[0]; - } + } } sub KwslistTimeSort { @@ -124,7 +124,7 @@ sub KwslistTimeSort { my $method = 1; my $power = 0.5; -GetOptions('tolerance=f' => \$tolerance, +GetOptions('tolerance=f' => \$tolerance, 'method=i' => \$method, 'power=f' => \$power, 'inv-power=f' => sub { (my $opt, my $val) = @_; $power = 1.0/$val;}); diff --git a/egs/babel/s5c/local/ndx2flist.pl b/egs/babel/s5c/local/ndx2flist.pl index 48fc3dec101..c5f676affcd 100755 --- a/egs/babel/s5c/local/ndx2flist.pl +++ b/egs/babel/s5c/local/ndx2flist.pl @@ -21,7 +21,7 @@ #;; #;; Index for WSJ0 SI-short Sennheiser training data #;; Data is read WSJ sentences, Sennheiser mic. -#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts +#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts #;; per speaker TI) = 7236 utts #;; #11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 @@ -37,7 +37,7 @@ foreach $fn (@ARGV) { $fn =~ m:.+/([0-9\.\-]+)/?$: || die "Bad command-line argument $fn\n"; - $disk_id=$1; + $disk_id=$1; $disk_id =~ tr/-\./__/; # replace - and . with - so 11-10.1 becomes 11_10_1 $fn =~ s:/$::; # Remove final slash, just in case it is present. $disk2fn{$disk_id} = $fn; diff --git a/egs/babel/s5c/local/nist_eval/create_compound_set.sh b/egs/babel/s5c/local/nist_eval/create_compound_set.sh index 63de46f6106..1e745d1ebba 100755 --- a/egs/babel/s5c/local/nist_eval/create_compound_set.sh +++ b/egs/babel/s5c/local/nist_eval/create_compound_set.sh @@ -3,7 +3,7 @@ #Simple script to create compound set info that will allow for more automatized #work with the shadow set. # -#The notion of shadow data set came from the need to be able to verify +#The notion of shadow data set came from the need to be able to verify #the output of the recognizer during decoding the evaluation data. #The idea is simple -- instead of decoding just the eval data, decode both #eval data plus the dev data (or at least some portion of it) interleved diff --git a/egs/babel/s5c/local/nist_eval/export_systems.sh b/egs/babel/s5c/local/nist_eval/export_systems.sh index 7e514bcc077..d0af608416c 100755 --- a/egs/babel/s5c/local/nist_eval/export_systems.sh +++ b/egs/babel/s5c/local/nist_eval/export_systems.sh @@ -2,11 +2,11 @@ set -e set -o pipefail -. ./cmd.sh; . ./path.sh; +. ./cmd.sh; . ./path.sh; #( -#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* #bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* #) & #bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/tri6*_nnet*/decode_shadow.uem* @@ -14,9 +14,9 @@ set -o pipefail ( bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* -#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* ) & -bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem +bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem wait wait diff --git a/egs/babel/s5c/local/nist_eval/filter_data.sh b/egs/babel/s5c/local/nist_eval/filter_data.sh index f36903035b6..8576b93fef8 100755 --- a/egs/babel/s5c/local/nist_eval/filter_data.sh +++ b/egs/babel/s5c/local/nist_eval/filter_data.sh @@ -38,7 +38,7 @@ outputname=$name while (( "$#" )); do resultdir=$1;shift - echo "Processing data directory $resultdir" + echo "Processing data directory $resultdir" [ ! -d $resultdir ] && echo "Decode dir $resultdir does not exist!" && exit 1; diff --git a/egs/babel/s5c/local/nist_eval/get_training_times.sh b/egs/babel/s5c/local/nist_eval/get_training_times.sh index 2b92dcefcdc..f5b0012c2f2 100755 --- a/egs/babel/s5c/local/nist_eval/get_training_times.sh +++ b/egs/babel/s5c/local/nist_eval/get_training_times.sh @@ -24,8 +24,8 @@ function process { replace+="\t" done - ( - eval `grep "group=all"` + ( + eval `grep "group=all"` echo -n "threads=$total_threads" echo -n " cpu_time=$total_cpu_time wall_time=$clock_time" echo -n " human_cpu_time="`convertsecs $total_cpu_time` @@ -43,17 +43,17 @@ local/summarize_logs.pl $dir/exp/make_*/*train*/ | process if [ -d $dir/data/local/extend ] ; then legend "Extending the lexicon" - local/summarize_logs.pl $dir/data/local/extend/tmp/log | process + local/summarize_logs.pl $dir/data/local/extend/tmp/log | process fi legend "Training upto stage tri5" -local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process +local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process legend "SGMM2 stage training" -local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log | process +local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log | process legend "SGMM2+bMMI stage training" -local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process +local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process nnet=tri6_nnet [ ! -d $dir/exp/$nnet ] && nnet=tri6b_nnet diff --git a/egs/babel/s5c/local/nist_eval/make_release.sh b/egs/babel/s5c/local/nist_eval/make_release.sh index ce784431a5c..aff89f92846 100755 --- a/egs/babel/s5c/local/nist_eval/make_release.sh +++ b/egs/babel/s5c/local/nist_eval/make_release.sh @@ -57,7 +57,7 @@ function export_file { else echo "$source_file -> $target_file" fi - + else echo "The file is already there, not doing anything. Either change the version (using --version), or delete that file manually)" exit 1 @@ -72,7 +72,7 @@ function export_kws_file { fixed_xml=$2 kwlist=$3 export_xml=$4 - + echo "Exporting KWS $source_xml as `basename $export_xml`" if [ -f $source_xml ] ; then cp $source_xml $fixed_xml.bak @@ -110,7 +110,7 @@ function find_best_stt_result { local dir=$1 local mask=$2 local record=`(find $dir -name "*.ctm.sys" -path "$mask" -not -ipath "*rescore*" | xargs grep Avg) | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1` - + echo $record >&2 local file=`echo $record | awk -F ":" '{print $1}'` #echo $file >&2 @@ -200,7 +200,7 @@ function figure_out_scase { if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then local basnam=${ecf%%.ecf.xml} local scase=`echo $basnam | awk -F _ '{print $2}'` - + if [[ $scase =~ conv-dev(\..*)? ]]; then echo "BaDev" elif [[ $scase =~ conv-eval(\..*)? ]]; then @@ -211,7 +211,7 @@ function figure_out_scase { echo "BaDev" return 1 fi - else + else echo "WARNING: The ECF file $ecf is probably not an official file" >&2 echo "WARNING: Does not match the mask IARPA-babel.*.ecf.xml" >&2 echo "BaDev" @@ -225,7 +225,7 @@ function figure_out_partition { if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then local basnam=${ecf%%.ecf.xml} local scase=`echo $basnam | awk -F _ '{print $2}'` - + if [[ $scase =~ conv-dev(\..*)? ]]; then echo "conv-dev" elif [[ $scase =~ conv-eval(\..*)? ]]; then @@ -235,7 +235,7 @@ function figure_out_partition { echo "conv-dev" return 1 fi - else + else echo "WARNING: The ECF file $ecf is probably not an official file" >&2 echo "conv-dev" return 1 @@ -264,7 +264,7 @@ fi #data=data/shadow.uem dirid=`basename $data` kws_tasks="kws " -[ -f $data/extra_kws_tasks ] && kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` +[ -f $data/extra_kws_tasks ] && kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` [ -d $data/compounds ] && compounds=`ls $data/compounds` if [ -z "$compounds" ] ; then @@ -295,7 +295,7 @@ else submit_to_google $best_one $ATWV $MTWV ) || echo "Submission failed!" - + for compound in $compounds ; do compound_best_one=`echo $best_one | sed "s:$master/${kws}_:$compound/${kws}_:g"` echo "From ($kws) $best_one going to $compound_best_one" diff --git a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh index 760d7ee80d5..3b12222e13a 100755 --- a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh +++ b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh @@ -7,7 +7,7 @@ # This script, which will generally be called from other neural-net training # scripts, extracts the training examples used to train the neural net (and also # the validation examples used for diagnostics), and puts them in separate archives. -# This is similar to the script steps/nnet2/get_egs.sh, but this also extracts +# This is similar to the script steps/nnet2/get_egs.sh, but this also extracts # frames from unsupervsied data. Decode directory for unsupervised data which # has the best path done along with posteriors (can be done using local/combine_posteriors.sh) @@ -25,15 +25,15 @@ samples_per_iter=400000 # each iteration of training, see this many samples # per job. This is just a guideline; it will pick a number # that divides the number of samples in the entire data. transform_dir_sup= # If supplied, overrides alidir -transform_dir_unsup= +transform_dir_unsup= num_jobs_nnet=16 # Number of neural net jobs to run in parallel stage=-10 -io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. splice_width=4 # meaning +- 4 frames on each side for second LDA spk_vecs_dir_sup= spk_vecs_dir_unsup= random_copy=false -weight_threshold=0.7 # Threshold on confidence factor of an unsupervised data +weight_threshold=0.7 # Threshold on confidence factor of an unsupervised data # frame for it to not be ignored supervised_copies=3 # Make x copies of supervised data. use_frame_selection=true @@ -70,7 +70,7 @@ if [ $# != 6 ]; then echo " --supervised-copies <#copies|3> # Make copies of supervised data" echo " --transform-dir-sup # Directory with transforms for supervised training data" echo " --transform-dir-unsup # Directory with transforms for unsupervised training data" - + exit 1; fi @@ -109,7 +109,7 @@ cp $alidir/tree $dir awk '{print $1}' $data_sup/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset > $dir/valid_uttlist -# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately if [ -f $data_sup/utt2uniq ]; then echo "File $data_sup/utt2uniq exists, so augmenting valid_uttlist to" echo "include all perturbed versions of the same 'real' utterances." @@ -121,7 +121,7 @@ if [ -f $data_sup/utt2uniq ]; then rm $dir/uniq2utt $dir/valid_uttlist.tmp fi -# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately awk '{print $1}' $data_sup/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \ head -$num_utts_subset > $dir/train_subset_uttlist @@ -137,7 +137,7 @@ if [ "$norm_vars" != "$norm_vars_unsup" ]; then fi cp $alidir/norm_vars $dir 2>/dev/null -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir_sup/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -150,7 +150,7 @@ case $feat_type in valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` #splice_opts_unsup=`cat $latdir/../splice_opts 2>/dev/null` #if [ "$splice_opts" -ne "$splice_opts_unsup" ]; then @@ -159,14 +159,14 @@ case $feat_type in # exit 1 #fi cp $alidir/splice_opts $dir/splice_opts 2>/dev/null - + #if [ "`diff $alidir/final.mat $latdir/../final.mat &> /dev/null; echo $?`" -ne "0" ]; then # echo "ERROR: Features mismatch for supervised and unsupervised data!" # echo "LDA matrices $alidir/final.mat for supervised data and $latdir/../final.mat for unsupervised data don't match" # exit 1 #fi - cp $alidir/final.mat $dir + cp $alidir/final.mat $dir feats_sup="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata_sup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_sup/JOB/utt2spk scp:$sdata_sup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" feats_unsup="ark,s,cs:cat $sdata_unsup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_unsup/JOB/utt2spk scp:$sdata_unsup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" @@ -309,18 +309,18 @@ if [ $stage -le 3 ]; then for (( i=0; i \$fragMarkers, - "oov=s" => \$OOV_symbol, + "oov=s" => \$OOV_symbol, "vocab=s" => \$vocabFile, "icu-transform=s" => \$icu_transform, "get-whole-transcripts=s" => \$get_whole_transcripts @@ -112,7 +112,7 @@ print STDERR ("\tLimiting transcriptions to words in $vocabFile\n"); print STDERR ("\tMapping OOV tokens to \"$OOV_symbol\"\n"); print STDERR ("\tif they remain OOV even after removing [$fragMarkers] from either end\n") if ($fragMarkers); - } + } print STDERR ("$0 ADVICE: Use full path for the Input Directory\n") unless ($inDir=~m:^/:); } else { print STDERR ("Usage: $0 [--options] InputDir OutputDir\n"); @@ -295,7 +295,7 @@ } else { print STDERR ("$0 ERROR: No .txt files found $TranscriptionDir\n"); exit(1); - } + } } else { print STDERR ("$0 ERROR: No directory named $TranscriptionDir\n"); exit(1); @@ -322,8 +322,8 @@ $SampleRate = 8000; #default while ($#Info>=0) { $line = shift @Info; - $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):); - $SampleRate = $1 if ($line =~ m:sample_rate -i (\d+):); + $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):); + $SampleRate = $1 if ($line =~ m:sample_rate -i (\d+):); } if ($SampleCount<0) { # Unable to extract a valid duration from the sphere header @@ -342,7 +342,7 @@ print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); } else { print STDERR ("$0 NOTICE: No .sph files in $AudioDir\n"); - } + } @AudioFiles = `ls ${AudioDir}/*.wav`; if ($#AudioFiles >= 0) { @@ -378,8 +378,8 @@ print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); } else { print STDERR ("$0 NOTICE: No .wav files in $AudioDir\n"); - } - + } + if ( $#waveformName == 0 ) { print STDERR ("$0 ERROR: No audio files found!"); } diff --git a/egs/babel/s5c/local/prepare_lexicon.pl b/egs/babel/s5c/local/prepare_lexicon.pl index 721e56a0dcf..ff128f07637 100755 --- a/egs/babel/s5c/local/prepare_lexicon.pl +++ b/egs/babel/s5c/local/prepare_lexicon.pl @@ -27,10 +27,10 @@ # 㓤 k_1 i:_1 t_1 # 兄妹 h_1 i:_1 N_1 m_2 u:j_2 # 兄妹 h_1 i:_1 N_1 m_6 u:j_6 -# +# # # Write only one pronunciation per line -# Transfer any tags, prefixed by underscores, to phones in the syllable +# Transfer any tags, prefixed by underscores, to phones in the syllable # Remove the syllable boundary markers, given by periods or pound signs # # NOTE: The Romainzation is present only for some languages. See -r option. @@ -46,7 +46,7 @@ $icu_transform = ""; $phonemap=""; # -# - nonsilence_phones.txt: tagged phones from the new lexicon +# - nonsilence_phones.txt: tagged phones from the new lexicon # # - optional_silence.txt: phones used to model silence in acoustic training # @@ -61,12 +61,12 @@ # ############################################################################### -GetOptions("add=s" => \$nsWordsFile, - "oov=s" => \$OOV_symbol, - "romanized!" => \$romanized, - "sil=s" => \$sil, +GetOptions("add=s" => \$nsWordsFile, + "oov=s" => \$OOV_symbol, + "romanized!" => \$romanized, + "sil=s" => \$sil, "icu-transform=s" => \$icu_transform, - "phonemap=s" => \$phonemap + "phonemap=s" => \$phonemap ); if ($#ARGV == 1) { @@ -165,7 +165,7 @@ $syllable =~ s:\s+: :g; @original_phones = split(" ", $syllable); @substituted_original_phones=(); - + foreach $phone (@original_phones) { if (defined $phonemap_hash{$phone} ) { #print "Sub: $phone => " . join (' ', @{$phonemap_hash{$phone}}) . "\n"; @@ -205,7 +205,7 @@ # It is a phone if ( $substituted_phones{phone} ) { die "ERROR, the $new_phone and $phone are both existing phones, so we cannot do automatic map!"; - } + } $is_original_phone{$phone} = "$phone"; $new_phones .= " $phone"; } @@ -277,7 +277,7 @@ && print STDERR ("$0: Wrote $numProns pronunciations to $outLex\n"); ############################################################################### -# - nonsilence_phones.txt: tagged phones from the new lexicon, 1 phone/line +# - nonsilence_phones.txt: tagged phones from the new lexicon, 1 phone/line ############################################################################### foreach $phone (sort keys %is_new_phone) { diff --git a/egs/babel/s5c/local/prepare_stm.pl b/egs/babel/s5c/local/prepare_stm.pl index edf1b43676d..b4daec585e3 100755 --- a/egs/babel/s5c/local/prepare_stm.pl +++ b/egs/babel/s5c/local/prepare_stm.pl @@ -92,7 +92,7 @@ @tokens = split(/\s+/, $line); unless ($#tokens == 3) { $num_failed_parses+=1; - print STDERR "$0: Couldn't parse line $. in $segmentsFile\n" + print STDERR "$0: Couldn't parse line $. in $segmentsFile\n" if ($num_failed_parses == 1); print STDERR ("\tLine: $line") if ($num_failed_parses le $num_failed_parses_max); @@ -174,7 +174,7 @@ $waveform{$recordingID} =~ s:.+/::; # remove path prefix $waveform{$recordingID} =~ s:\.(sph|wav)\s*$::; # remove file extension $channel{$recordingID} = 1 # Default - unless (exists $channel{$recordingID}); + unless (exists $channel{$recordingID}); ++$numRecordings; } close(SCP); @@ -321,7 +321,7 @@ $w =~ s:([^\x00-\x7F])(?=[^\x00-\x7F]):$1 :g; # split adjacent non-ASCII chars print CHARSTM ("$w\n"); } -close(CHARSTM); +close(CHARSTM); close(STM); print STDERR ("$0: Wrote char.stm file $charStmFile\n"); diff --git a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl index 06a762d7762..9d865cca8c9 100755 --- a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl +++ b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl @@ -1,6 +1,6 @@ #!/usr/bin/env perl -# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar +# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar # Apache 2.0 ################################################################################ diff --git a/egs/babel/s5c/local/resegment/generate_segments.sh b/egs/babel/s5c/local/resegment/generate_segments.sh index 01917c3d4e9..95e88deb87d 100755 --- a/egs/babel/s5c/local/resegment/generate_segments.sh +++ b/egs/babel/s5c/local/resegment/generate_segments.sh @@ -37,14 +37,14 @@ if [ $# -ne 5 ]; then echo " --segmentation-opts '--opt1 opt1val --opt2 opt2val' # options for segmentation.py" echo " --reference-rttm # Reference RTTM file that will be used for analysis of the segmentation" echo " --get-text (true|false) # Convert text from base data directory to correspond to the new segments" - echo + echo echo "e.g.:" echo "$0 data/dev10h data/lang exp/tri4b_seg exp/tri4b_resegment_dev10h" exit 1 fi datadir=$1 # The base data directory that contains at least the files wav.scp and reco2file_and_channel -lang=$2 +lang=$2 model_dir=$3 # Segmentation model directory created using local/resegment/run_segmentation_train.sh temp_dir=$4 # Temporary directory to store some intermediate files during segmentation output_dir=$5 # The target directory @@ -73,18 +73,18 @@ if [ $stage -le 1 ]; then ali-to-phones --per-frame=true $model_dir/final.mdl ark:- ark,t:- \| \ utils/int2sym.pl -f 2- $lang/phones.txt \| \ gzip -c '>' $temp_dir/pred.JOB.gz || exit 1 - + mkdir -p $temp_dir/pred gunzip -c $temp_dir/pred.*.gz | \ - perl -ne '($file, $phones)=split / /, $_, 2; - open($fh, ">'$temp_dir/pred/'$file.pred" ) or die $!; - print {$fh} "$file $phones"; + perl -ne '($file, $phones)=split / /, $_, 2; + open($fh, ">'$temp_dir/pred/'$file.pred" ) or die $!; + print {$fh} "$file $phones"; close($fh);' || exit 1 fi t2=$(date +%s) total_time=$((total_time + t2 - t1)) -echo "SI decoding done in $((t2-t1)) seconds" +echo "SI decoding done in $((t2-t1)) seconds" ############################################################################### @@ -99,8 +99,8 @@ if ! [ `cat $lang/phones/optional_silence.txt | wc -w` -eq 1 ]; then exit 1; fi -silphone=`cat $lang/phones/optional_silence.txt` -# silphone will typically be "sil" or "SIL". +silphone=`cat $lang/phones/optional_silence.txt` +# silphone will typically be "sil" or "SIL". # 3 sets of phones: 0 is silence, 1 is noise, 2 is speech., ( @@ -127,15 +127,15 @@ local/resegment/segmentation.py --verbose 2 $segmentation_opts \ if [ ! -s $output_dir/segments ] ; then echo "Zero segments created during segmentation process." - echo "That means something failed. Try the cause and re-run!" + echo "That means something failed. Try the cause and re-run!" exit 1 fi t2=$(date +%s) total_time=$((total_time + t2 - t1)) -echo "Resegment data done in $((t2-t1)) seconds" +echo "Resegment data done in $((t2-t1)) seconds" -for file in reco2file_and_channel wav.scp ; do +for file in reco2file_and_channel wav.scp ; do [ ! -f $datadir/$file ] && echo "Expected file $datadir/$file to exist" && exit 1 cp $datadir/$file $output_dir/$file done diff --git a/egs/babel/s5c/local/rttm_to_text.pl b/egs/babel/s5c/local/rttm_to_text.pl index 7312acdb886..d33c71e2f17 100755 --- a/egs/babel/s5c/local/rttm_to_text.pl +++ b/egs/babel/s5c/local/rttm_to_text.pl @@ -64,7 +64,7 @@ sub float_gt { @times = (); $filename = $_filename; } - + #I don't really know what is the distinction between all #of these. Let's throw away the SPEAKER, as it does not #really contain information that is to be found in the transcript @@ -91,12 +91,12 @@ sub float_gt { my $B = $times[-1][0]; my $Aend = $times[-2][1]; my $Bend = $times[-1][1]; - + #print "WARNING: Elements in the RTTM file are not sorted for FILENAME $filename!\n"; #print $times[-2][0] . " " . $times[-2][1] - $times[-2][0]. " " . $times[-2][2] . "\n"; #print $times[-1][0] . " " . $times[-1][1] - $times[-1][0]. " " . $times[-1][2] . "\n"; #print "\n"; - + my @sorted = sort {$a <=> $b} ($A, $B, $Aend, $Bend); #print Dumper(\@sorted); $times[-1][0] = $sorted[0]; @@ -129,7 +129,7 @@ sub float_gt { #if ($segmentname ne "10470_A_20111118_172644_000000" ) { # next; #} - + #print $filename . "\n"; #print Dumper(\@times); diff --git a/egs/babel/s5c/local/run_kws_stt_task.sh b/egs/babel/s5c/local/run_kws_stt_task.sh index 50c96e41035..d622aac9442 100755 --- a/egs/babel/s5c/local/run_kws_stt_task.sh +++ b/egs/babel/s5c/local/run_kws_stt_task.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) # Licensed under the Apache License, Version 2.0 (the "License"); @@ -39,7 +39,7 @@ if [ $(basename $0) == score.sh ]; then fi echo $0 "$@" -. utils/parse_options.sh +. utils/parse_options.sh if [ $# -ne 3 ]; then echo "Usage: $0 [options] " @@ -47,27 +47,29 @@ if [ $# -ne 3 ]; then exit 1; fi -data_dir=$1; +data_dir=$1; lang_dir=$2; -decode_dir=$3; +decode_dir=$3; ##NB: The first ".done" files are used for backward compatibility only ##NB: should be removed in a near future... -if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then - local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ - --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ - $data_dir $lang_dir $decode_dir - - if ! $skip_scoring ; then - local/score_stm.sh --cmd "$cmd" --cer $cer \ - --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ +if ! $skip_stt ; then + if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then + local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ $data_dir $lang_dir $decode_dir + + if ! $skip_scoring ; then + local/score_stm.sh --cmd "$cmd" --cer $cer \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ + $data_dir $lang_dir $decode_dir + fi + touch $decode_dir/.done.score fi - touch $decode_dir/.done.score fi if ! $skip_kws ; then - if [ ! -f $decode_dir/.kws.done ] && [ ! -f $decode_dir/.done.kws ]; then + if [ ! -f $decode_dir/.kws.done ] && [ ! -f $decode_dir/.done.kws ]; then local/kws_search.sh --cmd "$cmd" --max-states ${max_states} \ --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} --skip-scoring $skip_scoring\ --indices-dir $decode_dir/kws_indices $lang_dir $data_dir $decode_dir diff --git a/egs/babel/s5c/local/score_combine.sh b/egs/babel/s5c/local/score_combine.sh index f425b5afc68..7e8af85b2d8 100755 --- a/egs/babel/s5c/local/score_combine.sh +++ b/egs/babel/s5c/local/score_combine.sh @@ -18,9 +18,9 @@ # Script for system combination using minimum Bayes risk decoding. -# This calls lattice-combine to create a union of lattices that have been +# This calls lattice-combine to create a union of lattices that have been # normalized by removing the total forward cost from them. The resulting lattice -# is used as input to lattice-mbr-decode. This should not be put in steps/ or +# is used as input to lattice-mbr-decode. This should not be put in steps/ or # utils/ since the scores on the combined lattice must not be scaled. # begin configuration section. @@ -43,7 +43,7 @@ help_message="Usage: "$(basename $0)" [options] or: "$(basename $0)" data/test data/lang exp/tri1/decode exp/tri2/decode:18 exp/tri3/decode:13 exp/combine Options: --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. - --min-lmwt INT # minumum LM-weight for lattice rescoring + --min-lmwt INT # minumum LM-weight for lattice rescoring --max-lmwt INT # maximum LM-weight for lattice rescoring --lat-weights STR # colon-separated string of lattice weights --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. @@ -70,7 +70,7 @@ decode_dirs=( $@ ) # read the remaining arguments into an array unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir num_sys=${#decode_dirs[@]} # number of systems to combine -#Let the user to set the CTM file name +#Let the user to set the CTM file name #use the data-dir name in case the user doesn't care if [ -z ${ctm_name} ] ; then ctm_name=`basename $data` @@ -94,7 +94,7 @@ for i in `seq 0 $[num_sys-1]`; do offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. decode_dir=`echo $decode_dir | cut -d: -f1` [ -z "$offset" ] && offset=0 - + model=`dirname $decode_dir`/final.mdl # model one level up from decode dir for f in $model $decode_dir/lat.1.gz ; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; @@ -103,7 +103,7 @@ for i in `seq 0 $[num_sys-1]`; do nj=`cat $decode_dir/num_jobs` || exit 1; else if [ $nj != `cat $decode_dir/num_jobs` ]; then - echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" exit 1; fi fi @@ -128,7 +128,7 @@ if [ -z "$lat_weights" ]; then for i in `seq $[$num_sys-1]`; do lat_weights="$lat_weights:1.0"; done fi -if [ $stage -le 0 ]; then +if [ $stage -le 0 ]; then $cmd $parallel_opts LMWT=$min_lmwt:$max_lmwt $dir/log/combine_lats.LMWT.log \ mkdir -p $dir/score_LMWT/ '&&' \ lattice-combine --lat-weights=$lat_weights "${lats[@]}" ark:- \| \ @@ -155,12 +155,12 @@ if [ $stage -le 1 ]; then grep -v -E '' | \ perl -e '@list = (); %list = (); while(<>) { - chomp; - @col = split(" ", $_); + chomp; + @col = split(" ", $_); push(@list, $_); - $key = "$col[0]" . " $col[1]"; + $key = "$col[0]" . " $col[1]"; $list{$key} = 1; - } + } foreach(sort keys %list) { $key = $_; foreach(grep(/$key/, @list)) { diff --git a/egs/babel/s5c/local/score_mbr.sh b/egs/babel/s5c/local/score_mbr.sh index 1c39830b4c7..a86dd5c3f71 100755 --- a/egs/babel/s5c/local/score_mbr.sh +++ b/egs/babel/s5c/local/score_mbr.sh @@ -48,7 +48,7 @@ for inv_acwt in `seq $min_lmwt $max_lmwt`; do done wait; [ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout."; - + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ cat $dir/scoring/LMWT.tra \| \ diff --git a/egs/babel/s5c/local/score_sctk_prune.sh b/egs/babel/s5c/local/score_sctk_prune.sh index a6eca9fd071..09662af57c8 100755 --- a/egs/babel/s5c/local/score_sctk_prune.sh +++ b/egs/babel/s5c/local/score_sctk_prune.sh @@ -73,12 +73,12 @@ if [ $stage -le 1 ]; then grep -v -E '' | \ perl -e '@list = (); %list = (); while(<>) { - chomp; - @col = split(" ", $_); + chomp; + @col = split(" ", $_); push(@list, $_); - $key = "$col[0]" . " $col[1]"; + $key = "$col[0]" . " $col[1]"; $list{$key} = 1; - } + } foreach(sort keys %list) { $key = $_; foreach(grep(/$key/, @list)) { @@ -103,8 +103,8 @@ if [ $stage -le 1 ]; then foreach (@char) { $char = encode("UTF8", $_); $start += $dur; - # printf "$col[0] $col[1] $start $dur $char\n"; - printf "%s %s %.2f %.2f %s %s\n", $col[0], $col[1], $start, $dur, $char, $col[5]; + # printf "$col[0] $col[1] $start $dur $char\n"; + printf "%s %s %.2f %.2f %s %s\n", $col[0], $col[1], $start, $dur, $char, $col[5]; } } }' > $y.char.ctm @@ -122,7 +122,7 @@ if [ $stage -le 2 ]; then cp $data/char.stm $dir/score_LMWT/'&&'\ $ScoringProgram -s -r $dir/score_LMWT/char.stm stm -h $dir/score_LMWT/${name}.char.ctm ctm -o all -o dtl; fi - + # for x in $dir/score_*/*.ctm; do # mv $x.filt $x; # rm -f $x.filt*; diff --git a/egs/babel/s5c/local/score_stm.sh b/egs/babel/s5c/local/score_stm.sh index 2406af4e726..56835109722 100755 --- a/egs/babel/s5c/local/score_stm.sh +++ b/egs/babel/s5c/local/score_stm.sh @@ -48,7 +48,7 @@ data=$1 lang=$2 # This parameter is not used -- kept only for backwards compatibility dir=$3 -set -e +set -e set -o pipefail set -u @@ -82,8 +82,9 @@ if [ $stage -le 0 ] ; then \> $dir/score_LMWT/stm '&&' \ paste -d ' ' \<\(cut -f 1-4 -d ' ' $dir/score_LMWT/${name}.ctm.sorted \) \ \<\(cut -f 5- -d ' ' $dir/score_LMWT/${name}.ctm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ - \> $dir/score_LMWT/${name}.ctm '&&' \ - utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm '&&' \ + \> $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.sorted2 \>$dir/score_LMWT/${name}.ctm '&&' \ $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ -n "$name.ctm" -f 0 -D -F -o sum rsum prf dtl sgml -e utf-8 || exit 1 fi diff --git a/egs/babel/s5c/local/shadow_set_kws_search.sh b/egs/babel/s5c/local/shadow_set_kws_search.sh index 76521fda9b6..a67a3a57f6a 100755 --- a/egs/babel/s5c/local/shadow_set_kws_search.sh +++ b/egs/babel/s5c/local/shadow_set_kws_search.sh @@ -13,7 +13,7 @@ help_message="$0: create subset of the input directory (specified as the first d Example: $0 [data-dir2 [data-dir3 [ ...] ]" -# Begin configuration section. +# Begin configuration section. #acwt=0.0909091 min_lmwt=7 max_lmwt=17 @@ -101,8 +101,8 @@ if [ $stage -le 0 ] ; then for lmwt in `seq $min_lmwt $max_lmwt` ; do kwsoutdir=$decodedir/kws_$lmwt mkdir -p $kwsoutdir - - acwt=`perl -e "print (1.0/$lmwt);"` + + acwt=`perl -e "print (1.0/$lmwt);"` steps/make_index.sh --strict $strict --cmd "$cmd" --max-states $max_states\ --acwt $acwt $model_flags --skip-optimization $skip_optimization \ --word_ins_penalty $word_ins_penalty \ @@ -128,14 +128,14 @@ if [ $stage -le 1 ] ; then dirB=$decodedir/`basename $datasetB`/kws_$lmwt mkdir -p $dirA mkdir -p $dirB - + steps/search_index.sh --cmd "$cmd" $kwsdatadir $kwsoutdir || exit 1 [ ! -f $datasetA/kws/utter_id ] && echo "File $datasetA/kws/utter_id must exist!" && exit 1; cat $kwsoutdir/result.* | \ grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ grep "^KW[-a-zA-Z0-9]*-A " | \ - sed 's/^\(KW.*\)-A /\1 /g' > $dirA/results + sed 's/^\(KW.*\)-A /\1 /g' > $dirA/results [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; cat $kwsoutdir/result.* | \ @@ -152,7 +152,7 @@ if [ $stage -le 1 ] ; then cat $kwsoutdir/result.* | \ grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ grep "^KW[-a-zA-Z0-9]*-B " | \ - sed 's/^\(KW.*\)-B /\1 /g' > $dirA/results + sed 's/^\(KW.*\)-B /\1 /g' > $dirA/results [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; cat $kwsoutdir/result.* | \ @@ -192,7 +192,7 @@ if [ $stage -le 3 ] ; then utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationA \ --segments=$datadir/segments --normalize=false --remove-dup=true\ --map-utter=$kwsdatadir/utter_map - $rootdirA/kws_LMWT/kwslist.unnormalized.xml || exit 1 - + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_write_unnormalized.LMWT.log \ set -e';' set -o pipefail';' \ cat $rootdirAB/kws_LMWT/results \| \ @@ -204,15 +204,15 @@ fi echo "Scoring $datasetA" if [ $stage -le 4 ] ; then if [[ (! -x local/kws_score.sh ) || ($skip_scoring == true) ]] ; then - echo "Not scoring, because the file local/kws_score.sh is not present" + echo "Not scoring, because the file local/kws_score.sh is not present" exit 1 elif [ ! -f $datasetA/kws/rttm ] ; then echo "Not scoring, because the file $datasetA/kws/rttm is not present" else $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_scoring.LMWT.log \ - local/kws_score.sh $datasetA $rootdirA/kws_LMWT + local/kws_score.sh $datasetA $rootdirA/kws_LMWT $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_scoring.LMWT.log \ - local/kws_score.sh --kwlist $datasetB/kws/kwlist.xml $datasetA $rootdirAB/kws_LMWT + local/kws_score.sh --kwlist $datasetB/kws/kwlist.xml $datasetA $rootdirAB/kws_LMWT fi fi diff --git a/egs/babel/s5c/local/split_ctms.sh b/egs/babel/s5c/local/split_ctms.sh index efba126a5dd..b24a1380111 100755 --- a/egs/babel/s5c/local/split_ctms.sh +++ b/egs/babel/s5c/local/split_ctms.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) # Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,8 +32,8 @@ echo "$0 $@" set -e set -o pipefail -data=$1; -q=$2; +data=$1; +q=$2; shift; shift; if [ -z $ctm_name ] ; then diff --git a/egs/babel/s5c/local/stm2text.pl b/egs/babel/s5c/local/stm2text.pl index 3ec3806238a..3b069c63554 100755 --- a/egs/babel/s5c/local/stm2text.pl +++ b/egs/babel/s5c/local/stm2text.pl @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0. -#This script takes the source STM file and generates the *.txt files which +#This script takes the source STM file and generates the *.txt files which #are usually part of the BABEL delivery #The *.txt files are not the part of the delivery for the evalpart1 subset #The program works as a filter and the only parameter it expects is @@ -12,7 +12,7 @@ #example of usage: # cat data/evalpart1/stm local/stm2text.pl data/raw_evalpart1_data/transcriptions -use strict; +use strict; use warnings; use utf8; @@ -30,7 +30,7 @@ next if ( $filename =~ /;;.*/ ); #$filename =~ s/;;(.*)/$1/ if ( $filename =~ /;;.*/ ); $text = "" if not $text; - + if ( $prev_filename ne $filename ) { #close($OUTPUT) if ( tell(FH) != -1 ); print "$output_dir/$filename.txt\n"; diff --git a/egs/babel/s5c/local/subset_atwv.pl b/egs/babel/s5c/local/subset_atwv.pl index 910703db996..ce6b7043116 100755 --- a/egs/babel/s5c/local/subset_atwv.pl +++ b/egs/babel/s5c/local/subset_atwv.pl @@ -13,7 +13,7 @@ e.g.: subset_atwv.pl keywords.list bsum.txt This script will compute the ATWV for a subset of the original keywords in bsum.txt. -Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list +Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list is a list of the keywords that you want to compute the ATWV for. For example: KW101-0001 KW101-0002 @@ -27,7 +27,7 @@ my $subset_name = ""; my $width = 5; GetOptions('subset-name=s' => \$subset_name, - 'width=i' => \$width); + 'width=i' => \$width); @ARGV == 2 || die $Usage; @@ -72,7 +72,7 @@ if (/^Keyword/) {$flag = 1;} my @col; if ($flag == 1) { - # Figure out keywords that don't have occurrences in the search collection + # Figure out keywords that don't have occurrences in the search collection @col = split(/\|/, $_); $col[2] =~ s/^\s+//; $col[2] =~ s/\s+$//; diff --git a/egs/babel/s5c/local/subset_kwslist.pl b/egs/babel/s5c/local/subset_kwslist.pl index 96c2c7a7fdd..361291179ef 100755 --- a/egs/babel/s5c/local/subset_kwslist.pl +++ b/egs/babel/s5c/local/subset_kwslist.pl @@ -29,5 +29,5 @@ } $data->{kw} = \@filtered_kws; my $xml = XMLout($data, RootName=> "kwlist", KeyAttr=>''); -print $xml; +print $xml; exit 0 diff --git a/egs/babel/s5c/local/summarize_logs.pl b/egs/babel/s5c/local/summarize_logs.pl index 4f7fc058f96..e816d57d68f 100755 --- a/egs/babel/s5c/local/summarize_logs.pl +++ b/egs/babel/s5c/local/summarize_logs.pl @@ -23,7 +23,7 @@ sub parse_accounting_entry { $entry= shift @_; @elems = split " ", $entry; - + $time=undef; $threads=undef; foreach $elem (@elems) { @@ -96,7 +96,7 @@ sub parse_accounting_entry { $total_threads=0.0; foreach $fgroup (split_hundreds($fmap{$c})) { $lines=`grep -P "# Accounting:? " $fgroup |sed 's/.* Accounting:* *//g'`; - + #print $lines ."\n"; @entries = split "\n", $lines; diff --git a/egs/babel/s5c/local/syllab/ali_to_syllabs.sh b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh new file mode 100755 index 00000000000..8f0cb88771a --- /dev/null +++ b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh + +if [ -f ./path.sh ]; then . ./path.sh; fi + +if [ $# != 4 ]; then + echo "This script takes an ali directory and syllab lang dir and generates" + echo "syllabic transceription of the alignment" + echo "" + echo "Usage: $0 " + echo " e.g.: $0 data/train data/lang_syll exp/tri5_ali exp/tri5_ali_syll" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) " + + exit 1; +fi + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +data=$1 +lang=$2 +ali=$3 +out=$4 + + +for f in real_words.txt lex.words2syllabs.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory is probably not a syllable lang dir" && \ + echo "The file $lang/$f is missing" && \ + exit 1 +done + +for f in words.txt L.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +for f in $ali/num_jobs $ali/final.mdl $ali/ali.1.gz ; do + [ ! -f $f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +nj=$(cat $ali/num_jobs) +echo "Extracting phoneme sequences" +$cmd JOB=1:$nj $out/log/ali-to-phones.JOB.log \ + ali-to-phones $ali/final.mdl ark:"gunzip -c $ali/ali.JOB.gz|" ark:- \| \ + transcripts-to-fsts ark:- ark:$out/phones.JOB.fst || exit 1 + +echo "Composing with files in $lang to get syllable sequences" +$cmd JOB=1:$nj $out/log/get-syll-text.JOB.log \ + cat $data/split$nj/JOB/text \| sym2int.pl -f 2- --map-oov '\' $lang/real_words.txt \| \ + transcripts-to-fsts ark,t:- ark:- \|\ + fsttablecompose $lang/lex.words2syllabs.fst ark:- ark:-\| \ + fsts-project ark:- ark:-\| \ + fsttablecompose $lang/L.fst ark:- ark:- \|\ + fsttablecompose ark:$out/phones.JOB.fst ark:- ark:- \| \ + fsts-to-transcripts ark:- ark,t:"|int2sym.pl -f 2- $lang/words.txt > $out/text.JOB" +cat $out/text.* | sort > $out/text + +echo "Done" + diff --git a/egs/babel/s5c/local/syllab/create_syllables.pl b/egs/babel/s5c/local/syllab/create_syllables.pl new file mode 100755 index 00000000000..29a0a67dc8d --- /dev/null +++ b/egs/babel/s5c/local/syllab/create_syllables.pl @@ -0,0 +1,154 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 Johns Hopkins University (Author: Yenda Trmal) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use Getopt::Long; +use Data::Dumper; + +my $with_probs; +my $position_independent_phones; + +GetOptions("with-probs" => \$with_probs, + "position-independent-phones" => \$position_independent_phones +); + +my %SYLLS; +my %LEXICON; + +while (my $line = ) { + chomp $line; + my $word; my $prob; my $pron; + if ($with_probs) { + ($word, $prob, $pron) = split(" ", $line, 3); + } else { + ($word, $pron) = split(" ", $line, 2); + } + my @syllabs = split(/\s*\t\s*/, $pron); + + my $pronlen= scalar @syllabs; + my @extended_syllabs; + if (( $syllabs[0] =~ /x\<.*\>/) || ($word eq "SIL")) { + $SYLLS{$pron} +=1; + push @extended_syllabs, $pron; + } elsif ($pronlen == 1) { + my $syl; + my @phones=split " ", $syllabs[0]; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if (scalar(@phones) == 1 ) { + $syl = "$phones[0]_S"; + } else { + $phones2[0] = $phones[0] . "_B" unless $position_independent_phones; + $phones2[-1] = $phones[-1] ."_E" unless $position_independent_phones; + $syl = join(" ", @phones2); + } + } + $SYLLS{$syl} += 1; + push @extended_syllabs, $syl; + } else { + for (my $i = 0; $i lt $pronlen; $i+=1) { + my $syl; + my @phones=split " ", $syllabs[$i]; + my $first_index = 0; + my $last_index = scalar(@phones)-1; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if ($i == 0) { + $phones2[$first_index] = $phones[$first_index] . "_B"; + } elsif ( $i == ($pronlen - 1)) { + $phones2[$last_index] = $phones[$last_index] . "_E"; + } + $syl = join(" ", @phones2); + } + + push @extended_syllabs, $syl; + $SYLLS{$syl} += 1; + } + } + push @{$LEXICON{$word}}, \@extended_syllabs; +} + + +my %VOCAB; +my %COUNTS; +my %REV_VOCAB; +foreach my $syl (keys %SYLLS) { + my $seq=1; + my $word=$syl; + $word =~ s/_[^\s]*//g; + $word =~ s/ //g; + $word =~ s/[^a-zA-Z0-9<>-|\/]//g; + + my $wordx=$word; + $wordx .= "#$seq"; + while (exists $COUNTS{$wordx}) { + $seq += 1; + $wordx = "$word#$seq"; + } + + $COUNTS{$wordx} += $SYLLS{$syl}; + push @{$VOCAB{$wordx}}, $syl; + $REV_VOCAB{$syl} = $wordx; +} + +open(my $lex_f, "|sort -u > $ARGV[0]") or +die "Cannot open the file\"$ARGV[0]\" for writing"; + +foreach my $word (keys %VOCAB) { + print $lex_f "$word\t" . join("\t", @{$VOCAB{$word}}) . "\n"; +} + +close($lex_f); + +open(my $word2syll_f, "|sort -u > $ARGV[1]") or +die "Cannot open the file\"$ARGV[1]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + my @pron_in_syllabs; + foreach my $syl (@{$pron}) { + die "In word $word, pronunciation $pron: syllable $syl not in the lexicon!" unless exists $REV_VOCAB{$syl}; + push @pron_in_syllabs, $REV_VOCAB{$syl}; + } + print $word2syll_f "$word\t" . join(" ", @pron_in_syllabs) . "\n"; + } +} + +close($word2syll_f); + +open(my $word2ali_f, "|sort -u > $ARGV[2]") or +die "Cannot open the file\"$ARGV[2]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + print $word2ali_f "$word\t$word\t" . join(" ", @{$pron}) . "\n"; + } +} + +close($word2ali_f); + diff --git a/egs/babel/s5c/local/syllab/generate_syllable_lang.sh b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh new file mode 100755 index 00000000000..2d1fcb2259e --- /dev/null +++ b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh +. ./path.sh + + + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +llang=$2 +lang=$3 +out=$4 +lout=$5 + +test -d $lout && rm -rf $lout +mkdir -p $lout +test -d $out && rm -rf $out +cp -R $lang $out +rm -rf $out/tmp $out/L.fst $out/L_disambig.fst $out/G.fst $out/words.txt +rm -rf $out/phones/word_boundary.{int,txt} + +echo "Generating lexicons.." +if [ -f $lang/phones/word_boundary.int ] ; then + echo "Position dependent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl \ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +else + echo "Position independent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs --position-independent-phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl --position_independent_phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +fi +cp $lout/lex.{syllabs2phones,words2syllabs,words2phones}.txt $out + +#We will fake the words.txt file +( + echo ""; + cut -f 1 $out/lex.syllabs2phones.txt; + echo -e "#0\n\n"; +) | nl -v 0 | awk '{print $2, $1}' > $out/syllabs.txt +ln -s syllabs.txt $out/words.txt +cp $lang/words.txt $out/real_words.txt + + +#Figure out the "OOV" token +oovword=$(cat $lang/oov.txt) +oovsyl=$(grep -w -F "$oovword" $out/lex.words2syllabs.txt | \ + awk '{if (NF == 2) { print $2;} + else {print "Error, oov word has more than one syllable "; exit 1;}}') + +echo $oovsyl > $out/oov.txt +grep -w -F "$oovsyl" $out/words.txt | awk '{print $2}' > $out/oov.int + +phone_disambig_symbol=$(grep '#0' $out/phones.txt | awk '{print $2}') +word_disambig_symbol=$(grep '#0' $out/words.txt | awk '{print $2}') + +optional_sil=$(cat $out/phones/optional_silence.txt) +utils/add_lex_disambig.pl $out/lex.syllabs2phones.txt $out/lex.syllabs2phones.disambig.txt > /dev/null +cat $out/lex.syllabs2phones.disambig.txt | sort -u > $lout/lexicon.txt + +echo " SIL" | cat - $lout/lexicon.txt | perl -ane 'print $F[0], " ", join(" ", @F), "\n";' | \ + sed 's/ #[0-9]$//g' > $out/phones/align_lexicon.txt +cat $lout/lexicon.txt | perl -ane 'print $F[0], "\t1.0\t", join(" ", @F[1..$#F]), "\n";' \ + > $lout/lexiconp.txt + +cat $out/phones/align_lexicon.txt |\ + sym2int.pl -f 3- $out/phones.txt |\ + sym2int.pl -f 1-2 $out/words.txt \ + > $out/phones/align_lexicon.int + +ndisambig=$(cat $out/phones/disambig.int | wc -l) +ndisambig=$[$ndisambig-1] + + +#Compile the lexicons +echo "Compiling words2syllables FST" +utils/make_lexicon_fst.pl $out/lex.words2syllabs.txt | \ + fstcompile --isymbols=$out/syllabs.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.words2syllabs.fst + +echo "Compiling L.fst and L_disambig.fst" +sil=$(cat $lang/phones/optional_silence.txt) +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.txt 0.5 $sil | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.fst +ln -s lex.syllabs2phones.fst $out/L.fst + + +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.disambig.txt 0.5 $sil '#'$ndisambig | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstaddselfloops "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |"|\ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.disambig.fst +ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst + +echo "Validating the output lang dir" +utils/validate_lang.pl $out || exit 1 + +sed -i'' 's/#1$//g' $lout/lexicon.txt +sed -i'' 's/#1$//g' $lout/lexiconp.txt + +echo "Done OK." +exit 0 diff --git a/egs/babel/s5c/local/syllab/map_prons_to_syllables.pl b/egs/babel/s5c/local/syllab/map_prons_to_syllables.pl new file mode 100755 index 00000000000..df3ce93ce4e --- /dev/null +++ b/egs/babel/s5c/local/syllab/map_prons_to_syllables.pl @@ -0,0 +1,61 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use GetOpt::Long; + +my $probs; + +GetOptions ("--with-probs" => \$probs) + +my $syllab_lexicon=$ARGV[0]; + +my %PRON2SYL; + + +open(my $f, $syllab_lexicon) or die "Cannot open file $syllab_lexicon\n"; +while (my $line = <$f>) { + chomp $line; + + my $syll; + my $pron; + my $prob; + + if ($probs) { + $syll, $prob, $pron = split " ", $line, 3; + } else { + $syll, $pron = split " ", $line, 2; + } + $PRON2SYL{$pron} = $syll; +} + +while (my $line = ) { + chomp $line; + my ($word, $pron) = split(/\s*\t\s*/, $line, 2); + my @syllabs = split(/\s*\t\s*/, $pron); + + my @syl_pron; + foreach my $syl (@syllabs) { + die "in $line unknown syllable $syl" unless exists $PRON2SYL{$syl}; + push @syl_pron, $PRON2SYL{$syl}; + } + print "$word\t" . join(" ", @syl_pron) . "\n"; + +} diff --git a/egs/babel/s5c/local/train_g2p.sh b/egs/babel/s5c/local/train_g2p.sh index d608d084ac2..08be0014656 100755 --- a/egs/babel/s5c/local/train_g2p.sh +++ b/egs/babel/s5c/local/train_g2p.sh @@ -2,7 +2,7 @@ # Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 -# Begin configuration section. +# Begin configuration section. iters=5 stage=0 encoding='utf-8' @@ -74,7 +74,7 @@ if [ $stage -le 0 ]; then fi for i in `seq 0 $(($iters-2))`; do - + echo "Training the G2P model (iter $[$i + 1] )" if [ $stage -le $i ]; then diff --git a/egs/babel/s5c/local/train_lms_srilm.sh b/egs/babel/s5c/local/train_lms_srilm.sh index 5bb1bfaa760..be2b0247aeb 100755 --- a/egs/babel/s5c/local/train_lms_srilm.sh +++ b/egs/babel/s5c/local/train_lms_srilm.sh @@ -4,22 +4,41 @@ export LC_ALL=C words_file= train_text= dev_text= +oov_symbol="" -. ./utils/parse_options.sh +echo "$0 $@" + +[ -f path.sh ] && . ./path.sh +. ./utils/parse_options.sh || exit 1 echo "-------------------------------------" echo "Building an SRILM language model " echo "-------------------------------------" +if [ $# -ne 2 ] ; then + echo "Incorrect number of parameters. " + echo "Script has to be called like this:" + echo " $0 [switches] " + echo "For example: " + echo " $0 data data/srilm" + echo "The allowed switches are: " + echo " words_file= word list file -- data/lang/words.txt by default" + echo " train_text= data/train/text is used in case when not specified" + echo " dev_text= last 10 % of the train text is used by default" + echo " oov_symbol=> symbol to use for oov modeling -- by default" + exit 1 +fi + datadir=$1 tgtdir=$2 outlm=lm.gz + ##End of configuration loc=`which ngram-count`; if [ -z $loc ]; then if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... - sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 else sdir=`pwd`/../../../tools/srilm/bin/i686 fi @@ -34,23 +53,39 @@ if [ -z $loc ]; then fi fi -[ -z $words_file ] && words_file=$datadir/lang/words.txt -[ -z $train_text ] && train_text=$datadir/train/text -[ -z $dev_text ] && dev_text=$datadir/dev2h/text - -echo "Using words file: $words_file" -echo "Using train text: $train_text" -echo "Using dev text : $dev_text" +# Prepare the destination directory +mkdir -p $tgtdir for f in $words_file $train_text $dev_text; do [ ! -s $f ] && echo "No such file $f" && exit 1; done -# Prepare the destination directory -mkdir -p $tgtdir +[ -z $words_file ] && words_file=$datadir/lang/words.txt +if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then + nr=`cat $train_text | wc -l` + nr_dev=$(($nr / 10 )) + nr_train=$(( $nr - $nr_dev )) + orig_train_text=$train_text + head -n $nr_train $train_text > $tgtdir/train_text + tail -n $nr_dev $train_text > $tgtdir/dev_text + + train_text=$tgtdir/train_text + dev_text=$tgtdir/dev_text + echo "Using words file: $words_file" + echo "Using train text: 9/10 of $orig_train_text" + echo "Using dev text : 1/10 of $orig_train_text" +else + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" + train_text=$datadir/train/text + dev_text=$datadir/dev2h/text +fi + + # Extract the word list from the training dictionary; exclude special symbols -sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' > $tgtdir/vocab +sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' | grep -v -F "$oov_symbol" > $tgtdir/vocab if (($?)); then echo "Failed to create vocab from $words_file" exit 1 @@ -67,8 +102,8 @@ if (($?)); then else echo "Removed first word (uid) from every line of $train_text" # wc text.train train.txt # doesn't work due to some encoding issues - echo $train_text contains `cat $train_text | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` - echo train.txt contains `cat $tgtdir/train.txt | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` + echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` fi # Kaldi transcript files contain Utterance_ID as the first word; remove it @@ -79,56 +114,76 @@ if (($?)); then else echo "Removed first word (uid) from every line of $dev_text" # wc text.train train.txt # doesn't work due to some encoding issues - echo $train_text contains `cat $dev_text | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` - echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` + echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` fi - echo "-------------------" echo "Good-Turing 3grams" echo "-------------------" -ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" echo "-------------------" echo "Kneser-Ney 3grams" echo "-------------------" -ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" echo "-------------------" echo "Good-Turing 4grams" echo "-------------------" -ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" echo "-------------------" echo "Kneser-Ney 4grams" echo "-------------------" -ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +if [ ! -z ${LIBLBFGS} ]; then + set -x + #please not that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault + #instead of that, we simply output the model in the maxent format and convert it using the "ngram" + echo "-------------------" + echo "Maxent 3grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 4grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1 + +fi + echo "--------------------" echo "Computing perplexity" echo "--------------------" ( - for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done -) | sort -r -n -k 13 | column -t | tee $tgtdir/perplexities.txt + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done +) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " @@ -141,9 +196,9 @@ nof_trigram_lm=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | wc -l` if [[ $nof_trigram_lm -eq 0 ]] ; then lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` elif [[ $nof_trigram_lm -eq 2 ]] ; then - lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` + lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` else #exactly one 3gram LM - lmfilename=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' '` + lmfilename=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' '` fi (cd $tgtdir; ln -sf `basename $lmfilename` $outlm ) diff --git a/egs/babel/s5c/local/train_mmi_sgmm2.sh b/egs/babel/s5c/local/train_mmi_sgmm2.sh index 2d3d0b5bf49..cdf9e28b1bf 100755 --- a/egs/babel/s5c/local/train_mmi_sgmm2.sh +++ b/egs/babel/s5c/local/train_mmi_sgmm2.sh @@ -30,7 +30,7 @@ if [ $# -ne 5 ]; then echo " --cancel (true|false) # cancel stats (true by default)" echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." echo " --config # config containing options" - echo " --stage # stage to do partial re-run from." + echo " --stage # stage to do partial re-run from." echo " --transform-dir # directory to find fMLLR transforms." exit 1; fi @@ -68,7 +68,7 @@ echo "$0: feature type is $feat_type" case $feat_type in delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |" - cp $alidir/final.mat $dir + cp $alidir/final.mat $dir ;; *) echo "Invalid feature type $feat_type" && exit 1; esac @@ -152,7 +152,7 @@ while [ $x -lt $num_iters ]; do $cmd $dir/log/num_acc_sum.$x.log \ sgmm2-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1; rm $dir/num_acc.$x.*.acc - + $cmd $dir/log/update.$x.log \ sgmm2-est-ebw $update_opts $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1; fi diff --git a/egs/babel/s5c/local/txt_to_rttm.pl b/egs/babel/s5c/local/txt_to_rttm.pl index 659d3c593d7..0e128520880 100755 --- a/egs/babel/s5c/local/txt_to_rttm.pl +++ b/egs/babel/s5c/local/txt_to_rttm.pl @@ -18,7 +18,7 @@ my $flen = 0.01; GetOptions('symtab=s' => \$symtab, 'segment=s' => \$segment, - 'flen=f' => \$flen); + 'flen=f' => \$flen); if ($symtab) { if (!open(S, "<$symtab")) {print "Fail to open symbol table: $symtab\n"; exit 1;} @@ -82,7 +82,7 @@ my $uid = shift @col; my $words = join(" ", @col); @col = split(/;/, $words); - + my $utt = $uid; my $sta = 0; if ($segment) { diff --git a/egs/babel/s5c/local/uem_ctm2segments.pl b/egs/babel/s5c/local/uem_ctm2segments.pl index ab560639c06..658690172c8 100755 --- a/egs/babel/s5c/local/uem_ctm2segments.pl +++ b/egs/babel/s5c/local/uem_ctm2segments.pl @@ -40,10 +40,10 @@ $defaultSegLen = 10; # seconds ################################################################################ -GetOptions("ctmTimeStep=f" => \$ctmTimeStep, - "minSilence=f" => \$minSilence, - "silence=s" => \$silence, - "maxSegLen=f" => \$maxSegLen, +GetOptions("ctmTimeStep=f" => \$ctmTimeStep, + "minSilence=f" => \$minSilence, + "silence=s" => \$silence, + "maxSegLen=f" => \$maxSegLen, "defaultSegLen=f" => \$defaultSegLen); if ($#ARGV == 1) { diff --git a/egs/babel/s5c/path.sh b/egs/babel/s5c/path.sh index c8fdbad6ff7..2d7dba09015 100755 --- a/egs/babel/s5c/path.sh +++ b/egs/babel/s5c/path.sh @@ -1,5 +1,4 @@ export KALDI_ROOT=`pwd`/../../.. -. /export/babel/data/software/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +. /export/babel/data/software/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export LC_ALL=C - diff --git a/egs/babel/s5c/results/RESULTS.105-turkish.flp b/egs/babel/s5c/results/RESULTS.105-turkish.flp new file mode 100644 index 00000000000..737d0893abe --- /dev/null +++ b/egs/babel/s5c/results/RESULTS.105-turkish.flp @@ -0,0 +1,29 @@ +%WER 57.5 | 22070 54382 | 49.0 41.7 9.2 6.5 57.5 30.8 | -1.255 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.8 | 22070 54382 | 57.3 34.1 8.6 5.1 47.8 29.0 | -0.605 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.7 8.3 4.8 45.8 28.7 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.4 8.5 4.8 45.8 28.4 | -0.630 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_17/dev10h.pem.ctm.sys +%WER 47.1 | 22070 54382 | 56.5 32.7 10.8 3.6 47.1 28.7 | -0.430 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5003, THRESHOLD = 0.555 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_13/metrics.txt:MTWV = 0.5339, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_19/metrics.txt:MTWV = 0.5203, THRESHOLD = 0.553 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5078, THRESHOLD = 0.553 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_12/metrics.txt:MTWV = 0.0045, THRESHOLD = 0.891000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_11/metrics.txt:MTWV = 0.0066, THRESHOLD = 0.720000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_18/metrics.txt:MTWV = 0.0058, THRESHOLD = 0.867000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_20/metrics.txt:MTWV = 0.0072, THRESHOLD = 0.785000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 diff --git a/egs/babel/s5c/results/RESULTS.106-tagalog.flp b/egs/babel/s5c/results/RESULTS.106-tagalog.flp new file mode 100644 index 00000000000..72568cebf81 --- /dev/null +++ b/egs/babel/s5c/results/RESULTS.106-tagalog.flp @@ -0,0 +1,34 @@ +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/dev_oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5117, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_10/metrics.txt:MTWV = 0.5408, THRESHOLD = 0.504 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_17/metrics.txt:MTWV = 0.5221, THRESHOLD = 0.556 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5077, THRESHOLD = 0.648 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_oov_kws_10/metrics.txt:MTWV = 0.0038, THRESHOLD = 0.900000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.659 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_17/metrics.txt:MTWV = 0.0047, THRESHOLD = 0.889000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0052, THRESHOLD = 0.522 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 diff --git a/egs/babel/s5c/results/RESULTS.107-vietnamese.flp b/egs/babel/s5c/results/RESULTS.107-vietnamese.flp new file mode 100644 index 00000000000..e64bca74572 --- /dev/null +++ b/egs/babel/s5c/results/RESULTS.107-vietnamese.flp @@ -0,0 +1,50 @@ +%WER 57.9 | 21875 111957 | 45.4 42.3 12.3 3.2 57.9 36.7 | -1.203 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.3 | 21875 111957 | 53.2 37.3 9.5 3.5 50.3 35.8 | -0.917 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_9/dev10h.pem.ctm.sys +%WER 47.4 | 21875 111957 | 55.1 32.8 12.1 2.6 47.4 35.7 | -0.642 | exp/tri6_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 48.6 | 21875 111957 | 54.3 35.9 9.8 2.9 48.6 35.4 | -0.769 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 51.3 32.4 16.2 1.8 50.4 35.7 | -0.487 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys + +############################################################################################################################# + +#KWS on the dev kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_9/metrics.txt:MTWV = 0.4488, THRESHOLD = 0.601 +exp/tri6_nnet/decode_dev10h.pem/kws_10/metrics.txt:MTWV = 0.4926, THRESHOLD = 0.576 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_15/metrics.txt:MTWV = 0.4589, THRESHOLD = 0.635 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4477, THRESHOLD = 0.591 + +#KWS on the dev kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_8/metrics.txt:MTWV = 0.0001, THRESHOLD = 0.778 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_11/metrics.txt:MTWV = 0.0024, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_16/metrics.txt:MTWV = 0.0012, THRESHOLD = 0.596 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_15/metrics.txt:MTWV = 0.0017, THRESHOLD = 0.817 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_8/metrics.txt:MTWV = 0.2886, THRESHOLD = 0.513 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.3672, THRESHOLD = 0.693 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.2999, THRESHOLD = 0.792 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.3041, THRESHOLD = 0.693 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_10/metrics.txt:MTWV = 0.0000, THRESHOLD = 0 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.873 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.214 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.831 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_kws_9/metrics.txt:MTWV = 0.3791, THRESHOLD = 0.564 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_12/metrics.txt:MTWV = 0.4444, THRESHOLD = 0.406 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_15/metrics.txt:MTWV = 0.3780, THRESHOLD = 0.609 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.3904, THRESHOLD = 0.51 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_10/metrics.txt:MTWV = 0.0021, THRESHOLD = 0.724 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0040, THRESHOLD = 0.491 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_15/metrics.txt:MTWV = 0.0032, THRESHOLD = 0.867 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0039, THRESHOLD = 0.105 + +############################################################################################################################ + diff --git a/egs/babel/s5c/run-1-main.sh b/egs/babel/s5c/run-1-main.sh index e01910ffac0..dc5ed134a04 100755 --- a/egs/babel/s5c/run-1-main.sh +++ b/egs/babel/s5c/run-1-main.sh @@ -119,7 +119,7 @@ if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then echo --------------------------------------------------------------------- echo "Training SRILM language models on" `date` echo --------------------------------------------------------------------- - local/train_lms_srilm.sh --dev-text data/dev2h/text \ + local/train_lms_srilm.sh --oov-symbol $oovSymbol --dev-text data/dev2h/text \ --train-text data/train/text data data/srilm fi @@ -249,7 +249,6 @@ if [ ! -f exp/tri5/.done ]; then touch exp/tri5/.done fi - ################################################################################ # Ready to start SGMM training ################################################################################ diff --git a/egs/babel/s5c/run-4-anydecode.sh b/egs/babel/s5c/run-4-anydecode.sh index 68b87ea1e27..472acbfe80e 100755 --- a/egs/babel/s5c/run-4-anydecode.sh +++ b/egs/babel/s5c/run-4-anydecode.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash set -e set -o pipefail @@ -10,13 +10,12 @@ dir=dev10h.pem kind= data_only=false fast_path=true -skip_kws=false +skip_kws=true skip_stt=false skip_scoring=false -max_states=150000 extra_kws=true vocab_kws=false -tri5_only=false +tri5_only=true wip=0.5 echo "run-4-test.sh $@" @@ -32,7 +31,7 @@ fi #set of scripts will exit when sourcing several of them together #Otherwise, the CTRL-C just terminates the deepest sourced script ? # Let shell functions inherit ERR trap. Same as `set -E'. -set -o errtrace +set -o errtrace trap "echo Exited!; exit;" SIGINT SIGTERM # Set proxy search parameters for the extended lexicon case. @@ -82,8 +81,8 @@ if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then fi eval my_stm_file=\$${dataset_type}_stm_file -eval my_ecf_file=\$${dataset_type}_ecf_file -eval my_kwlist_file=\$${dataset_type}_kwlist_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_kwlist_file=\$${dataset_type}_kwlist_file eval my_rttm_file=\$${dataset_type}_rttm_file eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate @@ -196,16 +195,15 @@ if [ ! -f $dataset_dir/.done ] ; then else echo "Unknown type of the dataset: \"$dataset_segments\"!"; echo "Valid dataset types are: seg, uem, pem"; - exit 1 fi elif [ "$dataset_kind" == "unsupervised" ] ; then if [ "$dataset_segments" == "seg" ] ; then - . ./local/datasets/unsupervised_seg.sh + . ./local/datasets/unsupervised_seg.sh elif [ "$dataset_segments" == "uem" ] ; then . ./local/datasets/unsupervised_uem.sh elif [ "$dataset_segments" == "pem" ] ; then ##This combination does not really makes sense, - ##Because the PEM is that we get the segmentation + ##Because the PEM is that we get the segmentation ##and because of the format of the segment files ##the transcript as well echo "ERROR: $dataset_segments combined with $dataset_type" @@ -215,12 +213,10 @@ if [ ! -f $dataset_dir/.done ] ; then else echo "Unknown type of the dataset: \"$dataset_segments\"!"; echo "Valid dataset types are: seg, uem, pem"; - exit 1 fi else echo "Unknown kind of the dataset: \"$dataset_kind\"!"; echo "Valid dataset kinds are: supervised, unsupervised, shadow"; - exit 1 fi if [ ! -f ${dataset_dir}/.plp.done ]; then @@ -230,7 +226,7 @@ if [ ! -f $dataset_dir/.done ] ; then make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp touch ${dataset_dir}/.plp.done fi - touch $dataset_dir/.done + touch $dataset_dir/.done fi ##################################################################### # @@ -240,12 +236,15 @@ fi echo --------------------------------------------------------------------- echo "Preparing kws data files in ${dataset_dir} on" `date` echo --------------------------------------------------------------------- +lang=data/lang +set -x if ! $skip_kws ; then . ./local/datasets/basic_kws.sh || exit 1 - if $extra_kws ; then + if $extra_kws ; then + L1_lex=data/local/lexiconp.txt . ./local/datasets/extra_kws.sh || exit 1 fi - if $vocab_kws ; then + if $vocab_kws ; then . ./local/datasets/vocab_kws.sh || exit 1 fi fi @@ -257,7 +256,7 @@ fi #################################################################### ## -## FMLLR decoding +## FMLLR decoding ## #################################################################### decode=exp/tri5/decode_${dataset_id} @@ -284,11 +283,11 @@ if ! $fast_path ; then "${lmwt_plp_extra_opts[@]}" \ ${dataset_dir} data/lang ${decode} - local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ - --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ - --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ - "${lmwt_plp_extra_opts[@]}" \ - ${dataset_dir} data/lang ${decode}.si + #local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + # --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + # --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + # "${lmwt_plp_extra_opts[@]}" \ + # ${dataset_dir} data/lang ${decode}.si fi if $tri5_only; then @@ -297,7 +296,7 @@ if $tri5_only; then fi #################################################################### -## SGMM2 decoding +## SGMM2 decoding ## We Include the SGMM_MMI inside this, as we might only have the DNN systems ## trained and not PLP system. The DNN systems build only on the top of tri5 stage #################################################################### @@ -493,5 +492,5 @@ for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ ${dataset_dir} data/lang $decode fi done -echo "Everything looking good...." +echo "Everything looking good...." exit 0 diff --git a/egs/babel/s5c/run-4b-anydecode-bnf.sh b/egs/babel/s5c/run-4b-anydecode-bnf.sh index 27c68bacfd8..205f37b46d9 100755 --- a/egs/babel/s5c/run-4b-anydecode-bnf.sh +++ b/egs/babel/s5c/run-4b-anydecode-bnf.sh @@ -45,7 +45,7 @@ if [ -z "$unsup_string" ] ; then fi fi -if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.uem,.seg} | grep -w "$type" >/dev/null; then +if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.pem,.uem,.seg} | grep -w "$type" >/dev/null; then # note: echo dev10.uem | grep -w dev10h will produce a match, but this # doesn't matter because dev10h is also a valid value. echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg} @@ -247,11 +247,13 @@ if [ -f $exp_dir/tri7_nnet/.done ] && touch $decode/.done fi - local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ - --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ - "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ - ${datadir} data/lang $decode fi -echo "$0: Everything looking good...." +decode=$exp_dir/tri7_nnet/decode_${dirid} +local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/lang $decode + +echo "$0: Everything looking good...." exit 0 diff --git a/egs/babel/s5d/EXAMPLE.vietnamese b/egs/babel/s5d/EXAMPLE.vietnamese new file mode 100644 index 00000000000..f5dde82c364 --- /dev/null +++ b/egs/babel/s5d/EXAMPLE.vietnamese @@ -0,0 +1,116 @@ +#!/bin/bash + +#This is an example sequence of commands for running the default Kaldi Babel OP1 system +#It is not assumed that you will run it as a script, even though you can try :) + +./run-1-main.sh +./run-2a-nnet-ensemble-gpu.sh +./run-2b-bnf.sh --semisupervised false --ali-dir exp/tri5_ali/ +./run-3b-bnf-sgmm.sh --semisupervised false +./run-3b-bnf-nnet.sh --semisupervised false + +##Training of the automatic segmenter +./run-2-segmentation.sh + +##Decoding the automatic segmentation of dev2h subset. dev2h.pem would mean decoding +##the dev2h subset using the officialy provided segmentation. +##Also possible to run dev10h.pem, dev10h.uem, dev10h.seg and so on... +./run-4-anydecode.sh --dir dev2h.seg +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised false --extra-kws true + +##Decoding of the unsupervivsed data +./run-4-anydecode.sh --dir unsup.seg --skip-kws true --skip-stt true +./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised false + +##Get the one-best path and the weights for frame-weighting of posteriors +./local/best_path_weights.sh --cmd "$train_cmd" data/unsup.seg/ data/lang \ + exp/tri6b_nnet/decode_unsup.seg/ \ + exp/sgmm5_mmi_b0.1/decode_fmllr_unsup.seg_it1/ \ + exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \ + exp_bnf/tri7_nnet/decode_unsup.seg \ + exp_bnf_semisup/best_path_weights/unsup.seg + +##Semisupervised bottleneck system training (initial setup) +./run-2b-bnf.sh --semisupervised true --ali-model exp/tri6b_nnet/ \ + --weights-dir exp/best_path_weights/unsup.seg/decode_unsup.seg/ + +##Semisup training, SGMM+bMMI on the top of the BN features +./run-3b-bnf-sgmm.sh --semisupervised true +##Semisup training, pNorm DNN on the top of the BN features +./run-3b-bnf-nnet.sh --semisupervised true + +##And decoding again. We decode the unsup.seg again to do the second run of the +##semisupervised training +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised true --extra-kws true +./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised true + +##One-best output and frame weights for the second run of the semisup training +./local/best_path_weights.sh --cmd "$train_cmd" data/unsup.seg/ data/lang \ + exp_bnf_semisup/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \ + exp_bnf_semisup/tri7_nnet/decode_unsup.seg \ + exp/tri6b_nnet/decode_unsup.seg/ \ + exp/sgmm5_mmi_b0.1/decode_fmllr_unsup.seg_it1/ \ + exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \ + exp_bnf/tri7_nnet/decode_unsup.seg \ + exp_bnf_semisup2/best_path_weights/unsup.seg + +##Second run of the semisup training +./run-2b-bnf.sh --unsup-string "_semisup2" --semisupervised true --ali-model exp/tri6b_nnet/ \ + --weights-dir exp_bnf_semisup2/best_path_weights/unsup.seg/decode_fmllr_unsup.seg_it1/ + +./run-3b-bnf-sgmm.sh --semisupervised true --unsup_string "_semisup2" +./run-3b-bnf-nnet.sh --semisupervised true --unsup_string "_semisup2" + +##Decode again to see if we got an improvement +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised true --unsup_string "_semisup2" --extra-kws true + + +##Decoding of the dev10h (all systems, all stages) +./run-4-anydecode.sh --dir dev10h.seg --extra-kws true +./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised false --extra-kws true +./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised true --extra-kws true +./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised true --extra-kws true --unsup_string "_semisup2" + +##Decoding of the shadow.seg (combination of dev10h.seg and eval.seg) +##We did this for eval run as a kind of "sanity check" -- we check the shadow.seg/dev10h.seg subset +##performance vs the standalone dev10h.seg performance to catch (hopefully) possible problems +./run-4-anydecode.sh --dir shadow.seg --extra-kws true +./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised false --extra-kws true +./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised true --extra-kws true +./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised true --extra-kws true --unsup_string "_semisup2" + + + +#This prepares for separation/split of the shadow dataset into the devset, which we can evaluate +# and the eval set, which we will submit +#Note: we do this only once, for ./data, as we do not really need anything else +#just the file lists... +#NB: there was a oversight in one of the scripts that was causing thectm files contain +#BN: incorrect channel info (A instead of 1) +#NB: To fix that, you can run something like this: +#NB: find exp/ -name "shadow.seg.ctm" | xargs -t -n 1 sed -i'.bakx' 's/ A / 1 /g' +./local/nist_eval/create_compound_set.sh --evlset eval.seg --devset dev10h.seg --tgtdir data/shadow.seg + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp/tri6b_nnet/decode_shadow.seg +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp/tri6b_nnet/decode_shadow.seg + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp/sgmm5_mmi_b0.1/decode_*shadow.seg* +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp/sgmm5_mmi_b0.1/decode_*shadow.seg* + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.seg* +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.seg* + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp_bnf_semisup/sgmm7_mmi_b0.1/decode_*shadow.seg* +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp_bnf_semisup/sgmm7_mmi_b0.1/decode_*shadow.seg* + +#The following commands will actually do two things +#a) looking at the performance of the dataset --master they will figure out the correct LMW +#b) symlink the appropriate evaluation result file under the correct EXPID into the ./release directory +#Warning: it's a lot of files so it's easy to get confused! +./local/nist_eval/make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --data data/shadow.seg --master dev10h.seg lang.conf ./release +./local/nist_eval/make_release.sh --dryrun false --dir exp/tri6b_nnet --data data/shadow.seg --master dev10h.seg lang.conf ./release +./local/nist_eval/make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --data data/shadow.seg --master dev10h.seg lang.conf ./release +./local/nist_eval/make_release.sh --dryrun false --dir exp_bnf_semisup/sgmm7_mmi_b0.1 --extrasys SEMISUPX --data data/shadow.seg --master dev10h.seg lang.conf ./release + +#Combine results (what we call 4way-combo) + diff --git a/egs/babel/s5d/README.txt b/egs/babel/s5d/README.txt new file mode 100644 index 00000000000..6bc3ddacba7 --- /dev/null +++ b/egs/babel/s5d/README.txt @@ -0,0 +1,82 @@ +How to setup the BABEL database training environment +==================================================== +a) Preparation: you need to make sure the BABEL data and the F4DE scoring software + is set up as it is in JHU, or change this setup accordingly. This will probably + be hard and will involve some trial and error. Some relevant pathnames can be + found in conf/lang/* and ./path.sh + + Link one of the config files in conf/languages to ./lang.conf. E.g.: + ln -s conf/languages/105-turkish-limitedLP.official.conf lang.conf + + +b) If you plan to work on one or more languages, the following approach is advised. + aa) create empty directory somewhere according to your choice + ( + mkdir 206-zulu-llp; cd 206-zulu-llp + ) + + ab) copy cmd.sh and path.sh (you will probably need to do some changes in these) + especially pay attention to KALDI_ROOT in path.sh and possibly switch to using + run.pl in cmd.sh + ( + cp /path/to/kaldi/egs/babel/s5b/{cmd.sh,path.sh} . + ) + + ac) symlink all the directories here to that directory + ( + ln -s /path/to/kaldi/egs/babel/s5b/{conf,steps,utils,local} . + ) + ad) link the necessary scripts ( see below ) + { + ln -s /path/to/kaldi/egs/babel/s5b/run-1-main.sh . + } + ae) link the appropriate language-specific config file to lang.conf in + each directory. + ( + 206-zulu-llp$ ln -s conf/lang/206-zulu-limitedLP.official.conf lang.conf + ) + + +Running the training scripts +=================================================== + +You run the scripts in order, i.e. + run-1-main.sh + run-2a-nnet.sh and run-2-bnf.sh may be run in parallel, but run-2-bnf.sh should be + run on a machine that has a GPU. + run-3-bnf-system.sh trains an SGMM system on top of bottleneck features from run-2-bnf.sh + run-4-test.sh is decoding with provided segmentation (we get this from CMU) + run-5-anydecode.sh seems to be decoding with the segmentation provided + + + +Official NIST submission preparation +================================================== +The make_release.sh script might come handy. +The scripts evaluates the performance of the sgmm2_mmi_b.0.1 system on +the eval.uem dataset and chooses the same set of parameters to +determine the path inside the test.uem dataset. + +./make_release.sh --relname defaultJHU --lp FullLP --lr BaseLR --ar NTAR \ + conf/languages/106-tagalog-fullLP.official.conf /export/babel/data/releases + + + + + +./run-1-main.sh +./run-2a-nnet-ensemble-gpu.sh +./run-2b-bnf.sh --semisupervised false --ali-dir exp/tri5_ali/ +./run-3b-bnf-sgmm.sh --semisupervised false +./run-3b-bnf-nnet.sh --semisupervised false + +./run-2-segmentation.sh + +./run-4-anydecode.sh --dir dev2h.seg +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised false --extra-kws true + + + +./run-4-anydecode.sh --dir unsup.seg --skip-kws true --skip-stt true +./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised false + diff --git a/egs/babel/s5d/RESULTS.txt b/egs/babel/s5d/RESULTS.txt new file mode 100644 index 00000000000..c87bf7f2b8b --- /dev/null +++ b/egs/babel/s5d/RESULTS.txt @@ -0,0 +1,8 @@ +The results are by default to be found in /decode_* where the individual /decode_* directory correspond to the language model weight. + +An easthetically pleasing table with the results can be obtained for example like this (YMMV, as well as your aesthetic feeling): +find exp/sgmm5_mmi_b0.1 -name "*.ctm.sys" -not -name "*char.ctm.sys" -ipath "*fmllr_eval.pem*" | xargs grep 'Sum/Avg' | sed 's/:* *| */ /g' | sed 's/ */ /g' | sort -n -k 9 | column -t + +similarly, for the kws outputs, the same table can be obtained as +find exp/sgmm5_mmi_b0.1 -name "sum.txt" -ipath "*fmllr_eval.pem*" | xargs grep "| Occurrence" | cut -f 1,13 -d '|'| sed 's/:|//g' | column -t | sort -k 2 -n -r + diff --git a/egs/babel/s5d/RUN_UNICODE_SYSTEM b/egs/babel/s5d/RUN_UNICODE_SYSTEM new file mode 100644 index 00000000000..79168d4c3bc --- /dev/null +++ b/egs/babel/s5d/RUN_UNICODE_SYSTEM @@ -0,0 +1,9 @@ +./run-1-main-unicode.sh --unicode-lexicon true --morfessor true --tri5-only true + +# For tri5 +./run-4-anydecode.sh --fast-path false --tri5-only true --skip-kws true (for tri5 only) + +# For lstm +./run-4-anydecode.sh --fast-path false --tri5-only true --skip-kws true --data-only true +./local/nnet3/run_lstm.sh +./run-4-anydecode.sh --nnet3-model nnet3/lstm_sp --is-rnn true --dir dev10h.pem --skip-kws true diff --git a/egs/babel/s5d/UNICODE_README b/egs/babel/s5d/UNICODE_README new file mode 100644 index 00000000000..b8b2358436f --- /dev/null +++ b/egs/babel/s5d/UNICODE_README @@ -0,0 +1,119 @@ +Graphemic Lexicon from Unicode +================================================================================ + +General Description +---------------------------------------- +Given some form of word list in an unknown language, we must find pronunciations +for each word. When the language is written alphabetically, the letters +themselves can be used as word pronunciations. In English for instance there +would be 26 phones, and possibly a few extra for the rarely occuring letters, + + "ö","é","è","â", ... + +which occur primarily in foreign loan words. + +Some languages use syllabic systems or partially alphabetic scripts, for which +nothing close to a 1-1 mapping from graphemes to phonemes exists. Examples of +such are Abougidas and Abjads. + +The premise of this system is that for most languages, there exists a unicode +description of the graphemes from which the phonetics may be recovered. + +While non-alphabetic scripts present an obvious challenge, we find that even +for languages such as English and French, the issue of whether or not to treat +each accented character as a separate phone presents a problem. After all, +pâté, pâte, and pate are all English words with different pronunciations. +Resume, and résumé, are also examples. And this for a language that is generally +considered unaccented. In French, which is known to have many diacritics +affecting pronunciation, we nonetheless find words such as forêt, and bosquet, +with essentially the same meaning whose "e" sounds have very much the same +pronunciation. In some scripts, such diacritics are vowel markers, indicators +of tone, or stress, and probably many other linguistic phenomena we have not +yet encounted. + +Fortunately, the unicode representation of such graphemes has an alternate +normalization, "NFD", which decomposes a grapheme into its constituent parts. +In this implementation we treat such marks as modifying the preceding grapheme. +When the grapheme occurs frequently enough, the accented grapheme is +automatically considered a separate phoneme. For infrequent accented graphemes +we treat the accent as a tag and use the tag as an extra question in the tree +building step. + +The issue of syllable boundaries in words is mostly important for keyword-seach. +Syllables can be created by training a morphological analyser on the +conversational transcripts, and then segmenting each word into its learned +morphemes. + +Usage +---------------------------------------- +All the scripts for creating the graphemic lexicon are located in local/lexion, +except for prepare_unicode_lexicon.py. Run ... + +./run-1-main-unicode.sh --unicode-lexicon true --morfessor true + +for a full system run using a unicode-lexicon and morfessor. + +The general structure is. + +1. Generate list of unqiue words in the training data. Just use the word + entries of the filtered_lexicon if available. Do not include words present in + in conversation transcriptions such as , etc.. + +local/lexicon/phone2morph_lexicon.py + +2. Use morfessor to create somewhat logical syllabic units. Train the system + on the conversational transcriptions for instance, though any body of text + in the language should do. The conversational transcriptions were used in + this script however. + +3. Segment each word in the word list into its morphemes. Represent this as + a lexicon of sorts. + +local/lexicon/make_unicode_lexicon.py + +4. Use the morphemic lexicon created in step 3. as input. + +5. Get the unicode representation for each grapheme in each word + +local/lexicon/methods/blind_tags_counts.py + +6. Convert the unicode representation of each word into actual units with + which we derive an entry in the lexicon. This function is actually imported + into make_unicode_lexicon.py It's written this way to allow for more + flexibility in processing the unicode descriptions of graphemes. + +local/prepare_unicode_lexicon.py +7. This creates the rest of the data/local directory. It also adds the extra + questions derived from the unicode-derived tags to extra_questions.txt. + + +Script Descriptions +------------------------------------------------------------------------------ +In local/lexicon, +make_unicode_lexicon.py : + + This script takes as arguments: a lexicon, word-list, or file with distinct + space separated words; a path to an output lexicon that will be generated; a + directory containing all possible methods of processing the unicode + character descriptions; and the name of the method in the directory to use. + Options exist for specifying the type of input file, whether to treat the + input lexicon entries as morphemes, etc.. + +In local/lexicon/methods +blind_tags_counts.py + + Each method in the methods directory is supposed to follow a strict format: + 1. Must have a fmt global specifying the output lexicon format + (normally kalid). + 2. Must have an encode function which maps a certain structure in which + unicode character descriptions were stored to lexicon entries in the + new lexicon we are creating. + 3. Certain input arguments, especially a table argument for the table + containing the mapping between unicode graphemes, and lexical entries. + + +In local/lexicon/methods +phone2morph_lexicon.py + + This script takes an input word list, and outputs a morphemic dictionary. + diff --git a/egs/babel/s5d/babel.html b/egs/babel/s5d/babel.html new file mode 100644 index 00000000000..9848e6566f8 --- /dev/null +++ b/egs/babel/s5d/babel.html @@ -0,0 +1,788 @@ + + + + + +

Description of Kaldi subsystems

+ + This is a description of the complete Kaldi sub-system, containing a description of + all components. It will be referred to from the system descriptions of the various + Kaldi sub-systems, and from the top-level system description of the RADICAL team. + +

1. Abstract

+ +

+ The Kaldi keyword search system is based mostly on a conventional LVCSR pipeline. + We have three main sub-systems, which separately decode the data; + we then use conventional system combination techniques. The four systems are: +

    +
  • SGMM+BMMI. This is a Subspace Gaussian Mixture Model (SGMM) of the type described in [2], + discriminatively trained with Boosted MMI [3]. +
  • DNN. This is a Deep Neural Network with p-norm activations as described in [8]. + For LimitedLP systems we improve performance with an ensemble method which we will + describe below. +
  • Bottleneck SGMM+BMMI system. In this system we train a DNN with a bottleneck layer + of dimension 42, and use it to extract features which we train an SGMM+BMMI system on. +
+ For LimitedLP we add a fourth system, which is a version of the bottleneck system where + DNN to extract the bottleneck features is trained on automatically transcribed data as + well as the LimitedLP data. For FullLP we add a different fourth system, which is + a "sequence-trained" version of the DNN, trained with the State-level Minimum Bayes + Risk criterion (a variant of MPE). + + We also include a fifth, less conventional sub-system, based on the "Point Process Model" (PPM) + that uses phone-level posteriors from a DNNs trained for one of the systems above. + This will be described in Section 4.16. Its outputs are combined with our systems above + for keyword spotting but not for transcription. +

+ Our keyword search pipeline is based on lattice-indexing as described in [5]; the lattices + are generated using the "exact" lattice generation method described in [6]. + To handle out of vocabulary (OOV) keywords, we use the method [4] which constructs for + an OOV keyword sequence, proxy keyword sequences consisting of word sequences which are phonetically + similar. This year we added a "lexicon expansion" method, in which we generate plausible + new words using a syllable-level language model and add them to the lexicon and language model + when decoding (see Section 4.4). (This even slightly improves the WER). We actually add + the original and expanded-lexicon versions of each system to the final system combination, + but including non-expanded decodings in the system combination is not really necessary. +

+ The code and scripts used for the main Kaldi system are available as part of Kaldi; + see svn://svn.code.sf.net/p/kaldi/code/trunk/. The scripts we used this year are + located in the directory egs/babel/s5b. + + +

2. Notable features

+ + A new feature of our system that is shared by all the sub-systems is our + pitch features . We describe these in more detail in [7]. This is a + pitch extraction algorithm based on the old "getf0" method, but which naturally + ensures continuity of the pitch contours even in unvoiced regions. We also + derive a continuous-valued voicing feature from the algoirhtm. Finally we get + a three-dimensional feature consisting of pitch, delta-pitch, and a feature + derived from probability of voicing (POV). These are appended to the PLP + features, giving us consistent gains across languages compared with our + previous pitch features (other teams have also reported gains using our + features). +

+ Something else that is new is the p-norm neural networks [8]. This + is a new nonlinearity type that is related to maxout (in that it is a + dimension-reducing nonlinearity). This gave us around 1% absolute improvement + compared with our old, tanh-based networks. On top of this, for LimitedLP + we introduce an ensemble training method . Imagine training four + networks from different random seeds. We can average the scores from all + of them to get an improvement (around 2% absolute). But we don't like to have + to use multiple networks in test time. Our ensemble method introduces a term in + the objective function to train the networks' outputs towards each other, to make + them more similar, so that in test time we can pick just one of the networks to test with. + This gives us three quarters of the improvement from the simple method of averaging the scores, + but does not slow us down in test time. We only do this for limitedLP because it + slows down training too much to be practical for FullLP. +

+ Our bottleneck feature system is heavily modified since last year, and + has improved. + Firstly, we implemented it all in Kaldi, as opposed to last year's system which was a + hybrid between Kaldi and Theano. This makes the training faster, since Kaldi + supports parallelized neural network training, using multiple GPUs. The basic + recipe is basically the same as last year-- a DNN with a 42-dimensional bottleneck, appending + these features with the baseline fMLLR features, splicing across 3 frames and doing + LDA dimension reduction to 60 dimensions, then training an SGMM system on these features. + However, results seemed a little better with the Kaldi implementation, perhaps 0.5\% + absolute. It's hard to say why, as there are too many differences. The thing that is + new is that we implemented semi-supervised training in the LimitedLP case. We + use the 1-best output from decoding as supervision for the untranscribed data, but only + train on a frame if the state-level posterior is above a threshold (we use a low threshold + of 0.35 for this case). +

+ Our point process model system ( Section 4.16), while it get only around half + the ATWV of our conventional system by itself, is giving us large improvements in + combination with our conventional system, of around 3 to 4% ATWV. This is an + unconventional "exemplar-based" approach. +

+ Our expanded lexicon (Section 4.4) also new. This method takes + as input the provided lexicon, and uses it to hypothesize likely new words + and their pronuciations, along with their probabilities. We generate 2 million + extra words, with associated probabilities, and we allocate the "unknown-word" + probability mass of our language model to these words. Our method is + "backwards", in that we first generate the phonetic sequences, and then + work out the spellings. The improvement this gives is extremely variable. + For Bengali and Assamese, it makes essentialy no difference. But for Zulu + LimitedLP using the development keywords on the development data, it improved + the Kaldi-only ATWV from 0.20 to 0.28. + +

3. Extra resources

+ + For the submitted Kaldi systems we did not use any linguistic or other + resources outside of the language development pack. For our LimitedLP + submissions, we did use the FullLP and "untranscribed" data for unsupervised + training, without using the transcriptions. (This is allowed even in the + BaseLR condition). + +

4. System description

+ +

4.1 Low level features

+ + Our basic features are standard 13-dimensional PLP features. To these we + append 3-dimensional features derived from our "Kaldi" pitch tracker, giving a + 16-dimensional "base feature". Our pitch tracker and the configuration we used + are described in [7]. These features were extremly helpful on tonal languages: + on Cantonese and Vietnamese last year, our tests showed as much as 6% absolute + WER improvement compared with no pitch features. In general our new "Kaldi" + pitch features give us about twice as much improvement as our old features from + last year that were based on SAcC. + +

4.2 Segmentation

+ + Our segmentation is performed via decoding the whole-conversation data using a + GMM-based model. The model is trained in the normal way for an LVCSR system, + but the decoding graph is derived from a phone bigram language model (unsmoothed, + to avoid blowup due to context dependency). We do a single pass of decoding, + without adaptation; the features are processed as spliced-frames+LDA+STC. The + model used for segmentation is trained on transcripts that included certain + data we would normally exclude: segments containing only non-speech events such + as noise are included in the transcripts. +

+ The output of the decoding above is used as the input to the following algorithm. + First we map the frames of the decoder best path to one of three classes: speech, + noise or silence. The segmentation algorithm is as follows: + +

    +
  • Get initial segments: Contiguous regions consisting of speech and/or noise are marked as the initial segments.
  • +
  • Pad the initial segments: Non-speech frames on either side of the initial segments are included in the segments one at a time until there +are no more non-speech frames adjacent to any segments (unlikely) or until the non-speech frames make up about 5% of the total frames in the conversation.
  • +
  • Merge segments: Two segments are merged if the length of non-speech frames between two segments is less than about 1 second and the merged segments are not longer than 10 seconds.
  • +
  • Split long segments: Initial segments that are longer than 10s are split into equal pieces, each shorter than 10s.
  • +
  • Remove segments with only non-speech frames, i.e. containing only silence and noise.
  • +
+ + +

4.3 Lexicon (non-expanded)

+ + Here we describe our basic lexicon, before expansion. The BABEL lexicon + comes with syllable boundaries marked using tabs, and syllable-level tags + marking tone. We attach the tone tags to the phones, so that a syllable + k a t _1 would become the phone sequence k_1 a_1 t_1 + Formally, each tone version of a phone is a separate phone, but see + our explanation of contenxt dependency below . + We noticed that in some languages, the original lexicon seemed to have been expanded + with some kind of script where some original phone was mapped to two alternative + phones. That was the case for Vietnamese last year and Zulu this year, and it + was helpful to reverse this mapping. Our mapping for Zulu is as follows: + + + + + + + +
k_> g_<
3 e
R l
o O
b_< b
t_> th
+ After generating a lexicon as described above, we perform the standard procedure + in Kaldi training scripts, to add word-position dependency. Each phone is mapped + to five versions of the phone depending on whether it's at the beginning, middle + or end of a word, or is a singleton phone, or is a nonword phone (e.g. optional + silence in the lexicon). By this point the phone set is quite large, but again, + see our explanation of context dependency below . +

+ We have four phones in our inventory apart from those that appear in words; + they are all modeled in a context independent way and using a different topology + (5 states, where the middle 3 states all have transitions to each other). These are + for silence, noise, vocalized-noise and unknown-words. The difference between + vocalized noise and unknown-words is that vocalized noise models things like coughs + and laughs, whereas the unknown-word phone models words whose pronunciation is not + known (mainly so we can align them during training). + +

4.4 Lexicon (expanded)

+ + As mentioned above, we perform lexicon expansion to improve our ability to decode + OOV words. The lexicon expansion procedure produces pronunciations and probabilities + for the generated words, so that we know how to allocate the "unknown-word" probability + mass in the language model. The unknown words are introduced as unigrams into our + ARPA language model, with probabilities equal to the probabilities we estimated, + times the unknown-word fraction (equal to the token OOV rate). +

+ The lexicon expansion procedure works as follows (but note that lexicon expansion is + not the only thing we do to handle OOVs; see also Section 4.15). We first take all the entries + in our original lexicon and view them as sentences, where each syllable corresponds to + one symbol (we ignore the spelling). We train an ARPA language model on this with + SRILM; a 3-gram "modified Kneser-Ney with interpolation" seemed to work the best. + We then generate a large number of "sentences" from this language model: 20 million or so, + For each unique sentence in the generated sentences, we compute its language model + probability; we then exclude the sentences that correspond to words in the original + lexicon, take the 2 million best ones, and these will become the pronunciations of + our lexicon entries. +

+ A lexicon entry needs a spelling as well as a pronunciation, and to do this we + use the g2p tool from Sequitur in reverse to produce the most likely + spellings for each pronunciation. We reverse it by taking each lexicon entry, + e.g.
+ hi h iy +and reversing it to produce something like
+ hiy h i
+ Actually we don't do it exactly this way because we want iy to appear as a single + symbol on the left, rather than as a sequence of two symbols. So we map the phones + to ASCII symbols first. When doing so we treat tags (e.g. tones) separately, so each tag + has its own ASCII symbol, and a phone with a tag would be rendered as two ASCII symbols. +

+ We use g2p to generate a list of the top few likely spellings for each of the generated + pronunciations. We take the pronunciations we generated and the probabilities of their spellings, + and convert them into a list of words with probabilities on the words, and a list of + pronunciations for each word with asociated pronunciation probabilities. This is the output + of the lexicon expansion and it is used to create the lexicon and language model that we + decode with. +

+ We ran two versions of each system, one with and one without the lexicon + expansion, because we wanted to see how much effect it was having. Because we + had them both available, we decided to combine both versions for the final + system combination, but this combination made very little difference to the + results and we could equally well have submitted just the expanded-lexicon + systems. + + +

4.5 Phonetic context dependency

+ + Our phonetic context dependency is a fairly standard setup based on triphone context + and a phonetic decision tree with questions about the phonetic context. However, + we should mention how we handle tone and word-position-dependent phones. The number + of actual phone symbols is quite large; it consists of the number of "base phones" + times five (from word-position dependency), times the number of tones. Firstly, + the decision-tree roots are not separate for each phone symbol, but we have one per + "base phone", with all states sharing a root. The questions can be about the state + of the HMM, or about the left phone, the central phone, or the right phone. + Each question is simply a set of phone symbols. However, in constructing the questions + we make use of the structure of the phone symbols. Each question is either about + the tone (or some other tag), about the word-position, or about the "base-phone", + and the questions about the base phone consist of sets of base-phones that are derived + from a binary tree clustering of the acoustic statistics from the central HMM-states + of all the phones. + +

4.6 Language models

+ + Our language models are created using SRILM using the training transcripts. + We automatically select the best one from among a range of smoothing rules and + count cutoffs, using perplexity on held-out data as the criterion; a typical + chosen language model is a good-Turing smoothed 3-gram. + +

4.7 Feature processing and adaptation

+ + Our base features, as described above, are 16-dimensional (MFCC + pitch) features. + We process these by splicing with 3 frames of left and right context, doing + LDA (with the context-dependent states as the classes), and then estimating + an STC/MLLT transform [13] along with our models. We then use speaker adaptation + based on fMLLR, done also during training (i.e. our models are speaker adaptive). + In test time the transforms are obtained by decoding with a GMM-based model. + Our SGMM models use speaker vectors as an additional form of adaptation on top of + this. + + +

4.8 Subspace Gaussian Mixture Models (SGMMs)

+ + Two of the branches of our systems are based on SGMMs [14], as mentioned in the + introduction. Our SGMMs are the "SGMM2" recipe of Kaldi; this uses + the "symmetric" extension of SGMMs as described in [2], and also a substate-tying + scheme that uses a two-level version of the phonetic decision tree, and is similar + in spirit to the Gaussian tying used in BBN's Byblos system. +

+ The main tunable parameters of the SGMM training are given below: + + + + +
Num-gauss-UBM Num-leaves Num-substates
LimitedLP 750 5000 18000
FullLP 800 10000 80000
+ The number of "leaves per group" in the substate-tying scheme is set at its normal value, which + is 5. + + +

4.9 Deep Neural Networks

+ + The deep neural network training setup we use in Kaldi is one of two parallel setups that + we maintain "Karel's setup" and "Dan's setup". This system uses "Dan's setup". The + training procedure differs in a number of ways from previously published methods, and + for reasons of time and space we can't document it fully here. + See here for more information. + The most salient point is that the setup allows us to train a neural network in parallel + on multiple GPUs, which substantially decreases the training time. For example, for Zulu, the + FullLP system took 11 hours to train for 25 epochs on 8 GPUs. + The LimitedLP system took 7 hours to train for 25 epochs on 4 GPUs, but note that we + were training 4 networks at the same time, which slowed down the training by roughly a factor + of 4. + +
4.9.1 p-norm nonlinearities
+ + Our major improvement to our DNN system was the introduction of "p-norm" nonlinearities. + This is described in [8]. The input to our DNNs are 40-dimensional fMLLR features, obtained + via first-pass decoding with our GMM system. These are spliced across a 9-frame context window + (4 frames on each side), and processed with an LDA-like transform to decorrelate them. + The FullLP system has four hidden layers with 4000 as the input dimension to the nonlinearity + and 400 as the output-dimension (so the group size is 10). There are 12000 output neurons + in the softmax layer; this is more than the number of context-dependent states (which is + about 5000), because of the "mixing-up" as described here . + For the LimitedLP system the input/output dimensions are 3000/300 and the softmax layer dimension + is 5000 (versus about 2000 context-dependent states). + +
4.9.2 Ensemble training
+ + For the LimitedLP system we improve our system via a novel "ensemble training" method. + This involves training four versions of the neural network in parallel. We initialize + four networks using four different random seeds. During training, we train them + towards each other by adding a term in the objective function which penalizes the + K-L divergence between their outputs. Practically speaking, this means interpolating + the "hard label" for each frame with a "soft label" derived from interpolating the + posteriors derived from the averaged output of all four neural nets. The amount of + the "soft label" we add to the "hard" label is determined by a constant that we vary + from about 3 to 5 during training, so the extent of "training towards each other" gets + stronger as we train. +

+ During decoding, we pick just one of the systems arbitrarily. Since it has been + trained towards the other networks, it acts a little bit like an ensemble of + networks, even though it is just one network. This gives us about 1.5% WER + improvement. + +

4.9.3 Sequence training
+ + For the FullLP system only, we do discriminative training ("sequence training") + on our DNN. Our discriminative training is based on a state-level variannt of + the Minimum Phone Error (MPE) criterion, called sMBR [15]. We are mostly following + the recipe described in [16], although modified for our parallel-training method. + + The training is based on Stochastic Gradient Descent (SGD), although modified by our + "preconditioning method" which will eventually be described + here (till then, see the code). + We use a learning rate of 9E-5, but one tenth that value for the output layer. + Training is for four epochs. + Instead of frame-level randomization we use segment-level randomization, where a + segment is the smallest pieces we could chop our lattices into while still being + able to accurately evaluate the objective function. The training is in parallel + using 4 GPUs and periodically averaging the parameters, just like for our basic training. + (Note that the "effective learning rate" is as a result four times lower than what + we mentioned above). + + +

4.10 Bottleneck features

+ + Our bottleneck system is based on the same code and methods as our DNN system, + except that we use tanh rather than p-norm nonlinearities, and the DNN has a bottleneck + layer. For the LimitedLP system we use four hidden layers with 1024 neurons, then + a bottleneck layer with 42 neurons, then one hidden layer with 1024 neurons, then the + output layer. For the FullLP system, replace (4, 1024) with (5, 2048). As before, + the input to the network is 40-dimensional LDA+STC+fMLLR features, spliced across 9 frames. +

+ For feature extraction we remove the part after the 42-dimensional bottleneck, including + the tanh nonlinearity, and append it with the baseline 40-dimensional features, giving + an 82-dimensional feature vector. This is appended across ±1 frame and the dimension + is reduced with LDA to 60 dimensions. (Note: we don't commence training on these features + from scratch but start with alignments from our SAT-trained GMM-based system). +

+ From this point we train an SGMM+BMMI system. Because the feature dimension is higher the + number of parameters would increase if we left the rest of the configuration of the system + the same, so we use the following reduced configuration values: + + + + +
Num-gauss-UBM Num-leaves Num-substates
LimitedLP 500 5000 10000
FullLP 5500 10000 50000
+ Because the features are much "stronger" than normal features (i.e. more informative about the + class), and more correlated, we need to decode with a different acoustic scale than normal. + We normally decode SGMM systems with an acoustic scale of 0.1. For this system we decode with + an acoustic scale of 1/15 = 0.06666. Note: the more finely tuned acoustic scale is determined + by best WER or ATWV on the development data, after rescoring the lattices with different weights; + this value is just to get us in the right ballpark during lattice generation. + + +

4.11 Build order

+ + In order to clarify the relationship between the various systems, we document here the + order of system building. The initial stages, when the dependency graph is just a linear + sequence, are as follows: + + + + + + + + + +
Stage Num-leaves/gauss Num-leaves/gauss Feature type
(LimitedLP) (FullLP)
mono n/a n/a delta+delta-delta
tri1 1000/10k 1000/10k delta+delta-delta
tri2 2500/36k 1000/20k delta+delta-delta
tri3 2500/36k 6000/75k delta+delta-delta
tri4 2500/36k 6000/75k LDA+STC
tri5 2500/36k 6000/75k LDA+STC+fMLLR
+After the tri5 stage, the build graph "branches out", and the training of the SGMM system, the +DNN system and the DNN that includes the bottleneck features, all depend on the alignments and +transforms obtained from the tri5 system. We have documented the number of parameters of those +other systems separately. + +

4.12 Decoding order

+ + After training the tri5 system, we obtain via single-pass retraining a version of the system that + is trained on speaker-independent features. This model is used in the first, speaker-independent pass + of recognition-- other than about segmentation, which we have documented separately. All decoding + passes are with WFST decoders that output lattices. Starting from a raw, + state-level lattice we use the determinization algorithm of [6] to produce + a word-level lattice, although this year we extended the determinization algorithm slightly to + enable the generation of deeper lattices, by first doing a phone-level determinization before + the word-level determinization. This keeps the determinization from "blowing up" when the + beam is too large. +

+ The lattices from the speaker-independent decoding are used with the speaker-adapted "tri5" model to compute initial + fMLLR transforms, which are used with the speaker-adapted model to rescore the lattices to get + better posteriors and estimate the fMLLR transforms a second time. + Then another lattice generation pass is done with the speaker-adapted model and adapted features, + and the fMLLR transforms are estimated a third time and the lattices rescored with those features. +

+ Note: we don't include silence frames in the fMLLR computation. Since the + lattice generates soft counts, this is accomplished via per-frame weights, + not a hard cutoff. +

+ The decoding of the later models-- the SGMMs, DNNs and bottleneck feature based SGMMs-- + all depend on the "tri5" decoding because they use the fMLLR transforms generated there. +

+ Once we have these transforms, the DNN decoding is single-pass, but for the discriminatively + trained DNNs we first decode with the basic DNN and then rescore the lattices with + four different versions of the final DNN system, one for each epoch. This is so that we + can choose the best epoch to use. +

+ The SGMM decoding naturally has two passes: one using a speaker-independent version of + the SGMM system (speaker-independent because it doesn't have speaker vectors, although + we do have fMLLR features), and then another pass of decoding after estimating the + speaker vectors. However, we only generate the lattice once. In order to ensure + an accurate final lattice, we dump the state-level lattice from the first pass of + decoding and don't do the final lattice-determinization until after estimating the + speaker vectors. See [6] if the term "state-level lattice" is confusing. + +

4.13 Keyword index generation

+ + The keyword index generation uses Finite State Transducer concepts, and is based on [5]. + It relies on the fact that our lattices are determinized at the word level, which + is an essential part of our lattice generation procedure. This method constructs + an index such that for any given keyword sequence (of any length), one can do a simple + lookup in a finite state transducer and find a list of all the occurrences of that keyword + sequence in the set of lattices that were indexed. + The number of potential word sequences grows exponentially with the sequence + length, and the index does not blow up even though it allows us to look up arbitrarily long + sequences. This is accomplished through the magic of determinization, together with + some clever choices of semirings. +

+ We build a separate index for each language model scale in a predetermined range (e.g. 10, 12, 13, 14, 15), + so that we can separately run the keyword search pipeline for each scale, and pick the + scale with the best ATWV on the dev data. (Note: since there is only one dev set, all our + numbers reported on the dev set have these scales optimized on that set, and the same + applies for WER numbers). + +

4.14 Keyword search

+ + Once the index is built, keyword search is very simple and fast: we look up + the sequence in the index generated above, and it returns a list of the hit locations + (utterance-ids and start and end times) and the associated lattice posteriors. + In this document, we assume that by "keyword" we mean some given sequence of words, possibly + of length one. +

+ The most non-obvious aspect of this is the per-keyword normalization of the scores. + The Term Weighted Value (TWV) metric, after ignoring constant terms and doing + a few manipulations, may be expressed as follows: +

+ TWV = const + sum-over-keywords ( 1/K ( Ntrue-hit / Ntrue - beta/duration NFA ) ) +

+ Here, sum-over-keywords is taken over all keywords that were actually seen in + the test set being considered. The values in the equation may be defined as follows: + + + + + + + + +
Name Definition
K Number of keywords that appear in this test set
Ntrue-hit Number of occurrences of this keyword that we correctly spotted.
Ntrue Number of times this keyword actually occurred in this test set.
NFA Number of incorrect hits of this keyword that we produced.
beta A constant equal to exactly 999.9 (don't ask)
duration The total number of seconds of audio in the test set: a constant we know exactly.
+ + I believe the following analysis comes from [17]. In statistical systems, if we assume + model correctness we can generally trust marginals even of very noisy and unreliable things. + So for instance, even if our individual recognitions of a word are very inaccurate, the sum + of the posterior may be reasonably accurate if the system was well trained. At least, we can hope so. + So if we take the sum of posteriors of the hits of a keyword over our entire training set, we can form + a reasonable estimate of Ntrue. In what goes below, let Ntrue-estimate be simply + the sum of the lattice posteriors of this keyword, over all our test set. We will use Ntrue-estimate + in place of Ntrue. So for some keyword, the TWV contribution from that keyword is: +

+ TWV-contribution = 1/K ( Ntrue-hit / Ntrue-estimate - beta/duration NFA ) +

+ Here, Ntrue-estimate and beta/duration are both known quantities. Consider one putative hit, + i.e. one location in time where we have a nonzero posterior and we might want to produce a hit. Let + the posterior of the keyword in the lattice be p. Let's assume that p is a reasonable estimate of the + probability that the keyword actually exists there, which is reasonable assuming model correctness. + As an aside, note that we scale down the acoustics in our lattices while computing the posteriors, so the probabilities + are quite well calibrated; also, we have plotted the (posterior in our lattice) versus + (probability that the word was actually there) and it's within spitting distance of a straight line. + Anyway, back to the task at hand. We can write, for this putative hit, +

+ expected-TWV-contribution = 1/K ( p / Ntrue-estimate - beta/duration (1-p) ) . +

+ Here, all but one of the quantities in the equation are known. K is not known, because we don't know + how many keywords were actually seen in the test set, but because we only care about the sign of this quantity + we don't actually need to know K. For a putative hit, the equation above gives us all we need to know + in order to know whether to say "yes" or "no": if it's positive, "yes", else "no". We want to + keep the hit if this is positive, i.e. if. +

+ p / Ntrue-estimate - beta/duration (1-p) > 0
+ p (1/Ntrue-estimate + beta/duration) - beta/duration > 0
+ p > (beta/duration) / (1/Ntrue-estimate + beta/duration)
+ p > Ntrue-estimate / (duration/beta + Ntrue-estimate) +

+ Let's call the value above the "threshold", i.e.
+threshold = Ntrue-estimate / (duration/beta + Ntrue-estimate)

+ (there is a different threshold for each keyword). In order to make it easier to choose + the cutoff point for when to stop producing hits, we would to produce the output + as normalized scores that are all somehow comparable to each other. That way we can tune a global threshold. + We would like to normalize our scores in such a way that they are still all between zero and one. + We do this by converting p to a log-ratio, i.e. q = log(p / (1-p)), computing a similar log-ratio for the + threshold, i.e. t = log(threshold / (1-threshold)), and then subtracting t from q, + i.e. q' = q - t, to produce a normalized log-ratio q' (so if q' > 0, then p > threshold). + Then we convert back from a log-ratio to an actual + probability, call this p'. When we work out the equations for this, it comes out to
+ p' = (1-threshold) * p / ((1-threshold)*p + (1-p)*threshold) + + +

4.15 Out of vocabulary (OOV) keyword search

+ + In this section we describe how we perform the keyword search when the keyword is + OOV-- i.e. when at least one of the words in the sequence is not in our lexicon. + Note that this is a separate thing from the lexicon expansion described above. + If we are using the lexicon-expanded decoding graph, then this procedure is only applied + if the keyword is OOV with respect to the expanded lexicon. +

+ We have described our basic proxy search procedure in [4] so we will not repeat + it at length here. The basic idea is to use a learned phone confusion matrix + to find a list of in-vocabulary word sequences that are phonetically close to + the sequence we want, with associated penalties for being too distant. As a + special case, we don't penalize the proxy sequences for having extra phones at + their beginning and end (so, for instance, if the pronunciation of a + searched-for word appeared as part of a longer word, we would allow that + without penalty). +

+ As background, our index lookup is actually done by FST composition, where one + of the things to be composed is the "query FST" (normally with a linear structure) + and one is the huge index. In our proxy search method, we represent the set of + proxy keywords, and their associated weights, as an FST, and to the keyword + search pipeline it looks no different from a linear sequence (since the input + is just an FST). +

+ There is something new about our proxy keyword search pipeline this + year. After implementing the "expanded lexicon", we noticed that the process + of generating proxy keywords was very slow. This procedure involves various + operations of composition and determinization, where the inputs are a linear + sequence consisting of the OOV keyword (as phones), a phone-edit-distance FST, + and a lexicon. When we made the lexicon much bigger, it became slow. In order + to make it fast again, we had to rearrange the order of composition and + determinization, and implement an "on-demand" FST pruning procedure for OpenFST + (as part of the Kaldi extensions to OpenFST). + + + +

4.16. Point Process Models for Keyword Search

+ +

The point process model (PPM) for keyword search [9] is a +whole-word, event-based acoustic modeling and phonetic search technique. +It operates on sparse phonetic event streams extracted from the speech +signal using a frame-level subword acoustic model. In our Babel system, +we use our Kaldi Deep Neural Network acoustic models described above to +generate posteriorgrams over context-dependent states. We subsequently +sum posterior dimensions sharing the same center phone to produce +monophone posteriorgrams for each utterance. After applying the matched +filter smoothing of [10], local maxima of each posterior trajectory +define phonetic event times. The set of phonetic events for the search +collection defines the index for subsequent keyword search; this +construction, which is performed entirely independent of the keyword +set, is our only use of the test audio. +

+The next stage is point process model construction. For +in-vocabulary words, we perform MAP estimation of the Poisson rate +parameters for each word in the lexicon [11]. This takes advantage of +any exemplars present in the training data, but falls back on +dictionary-based model priors (the simple variant, see [11] for details) +if no exemplars are available. For OOV keywords, we use Sequitur G2P +pronunciations to construct the dictionary models. Multi-word keyword +models are constructed by concatenating MAP estimated unigram PPMs, with +the overall duration distributions derived using the Monte Carlo +techniques from [12]. Search for each keyword is performed using an +optimized detection function calculation scheme that is 500,000 times +faster than realtime. We consider the PPM system performance both in +isolation and in combination (at the kwslist level) with the Kaldi LVCSR +search engine outputs. + +
+

4.17. Class-based language model

+Due to the sparsity of the Tamil data a combination of different smoothing techniques where used to train a trigram for LimitedLP and FullLP: +
    +
  • 1. a class based language model, where the class is derived from the first three characters of the Tamil word
  • +
  • 2. a class based LM using the first six characters
  • +
  • 3. one using the last three characters
  • +
  • 4. a skip bigram
  • +
  • 5. a word trigram where the absolute discounting parameter depends on the count level using a rational function
  • +
  • 6. the original trigram (KN as implemented in SRILM)
  • +
+Models 1-5 where implemented in LSVLM. In order to map them to ARPA format an artificial corpus of 30 million tokens was sampled using model 5. A trigram tree was constructed and probabilities of models 1-5 where written to the leafs of that tree. In the end model 1-6 where combined using linear interpolation. Model 2 had for all experiments the largest contribution. + +
+ +

4.18. Segment-level decoding

+
+ +

4.19 System combination methods

+ +
4.19.1 System combination for transcription
+ + Here we describe the system combination methods that are used in the "Kaldi-only" + submissions. For the overall RADICAL combination, which is based on ROVER, we + provide both the individual Kaldi sub-systems, and the overall combined system + which we combine as described in this section. +

+ Our systems are not cross-adapted, unless you count the fact that they all use + the fMLLR transforms from the shared "tri5" stage. For transcription purposes, + the only form of combination we use in the Kaldi sub-system is a combination + procedure based on Minimum Bayes Risk decoding, as described in [1]. We view + this as a more principled way to do confusion network combination (CNC) [18], + without the various heuristics that are used to produce confusion networks. + There is one aspect of this that we should explain, which relates to the + language-model weight. Normally when decoding, we do a linear sweep over the + language model weights over some range (e.g. 10, 11, 12, ... 18), and select + the best one. We do the same when combining systems, except that sometimes the + different systems will require substantially different language model weights + and there is no one weight that is good for all of them; it's not practical to + try all possible combinations of weights. When combining systems, we apply a + different offset to the language-model weights for each system. This offset is + determined by the beginning of the language-model-weight range that we sweep + for each system, which in turn was determined by us when setting up the + configuration files for our system. So for instance, if we start the regular + SGMM system at offset 10, and the bottleneck+SGMM system at 15, then there would + be an offset of 5 between the two systems when we do the combination. +

+ We don't bother applying weights to the different systems when combining, but + on occasion we do leave out some of the worse systems from the combination. + This is decided by a human operator, based on trying different combinations on + the dev set. The identities of the systems that were combined will be noted + in the individual submissions. + +

4.19.2 System combination for keyword search
+ + In this section we describe the Kaldi-internal method of system combination for + keyword search. For the overall RADICAL system combination, we provide the kwslists + for both the individual Kaldi subsystems, and their combination as described in this + section. +

+ The Kaldi-internal combination for keyword search is based on averaging across systems the + unnormalized putative hits (i.e. the lattice posteriors extracted from the index), + before normalizing the averaged posteriors using the normalization method described + in Section 4.14.. Note that in order to do this averaging, we have + to have some notion of when multiple hits are "at the same time". This is pretty obvious + (hits are the same if they overlap in time), so we won't refer to it further. If one + system did not have a hit at a particular time, it's identical to it having a posterior of + zero. +

+ We do not do a conventional average (i.e. a mean). + We wanted to implement something that was in between a mean and a geometric mean. We + used the notion that a geometric mean is a mean of logs, and a log is like a power of + x, (1/p) xp, as p approaches zero. So if we take the mean of xp + for some power p between zero and one, and take the result to the power 1/p, + this is somewhere between a mean and a geometric mean. So this is what we do. + Suppose we have three scores: a, b and c. We choose a power p (say, p=0.5, but it's tuned + per language). Then we let
+ average = (ap + bp + cp)1/p . +
+Actually we extend this to a weighted average, i.e. +
+ average = (waap + wbbp + wccp)1/p +
+where the weights sum to one. The weights are determined manually in small scale +experiments on one of the languages, as the result is not very sensitive to the +weights. We used weights that are fairly close to each other, but with better +systems having larger weights. +

+We apply the normalization method of Section 4.14. after taking the + weighted mean. + + +

5. Hardware

+ +A variable number of 16-core (Intel(R) Xeon(R) CPU E5-2680) machines was used. The amount +of per-core memory was 2\;GB. The training of the LimitedLP was done using 32 cores (2 nodes), +the training of FullLP system was done using 64 cores (4 nodes). Each of the nodes was +equipped with one GPU card (Tesla K20m), however these card weren't used for training, with +the exception of the neural networks (DNN and BNF systems). The detailed timing info will be +provided in the next section. The maximum total storage capacity used was approximately 5TB. +The typical size of a complete system (including lattices) is around 300\;GB. The lattice +generation of the shadow dataset (combined dev10h and eval) was done on 96 cores (6 nodes). +Indexing and search was done on 64 cpus (4 nodes). + +

6. Timing

+
+DATADEF:==BaseLR{204LimitedLP}:AM{204LimitedLP},LM{204LimitedLP},PRON{204LimitedLP},AR{None}
+
+
+Ingestion Elapsed Time (hh:mm:ss) - 151:29:03
+Ingestion Total CPU Time (hh:mm:ss) - 9546:33:38
+Ingestion Total GPU Time (hh:mm:ss) - 92:23:16
+
+Ingestion Maximum CPU Memory (gbytes) - 192
+Ingestion Maximum GPU Memory (gbytes) - 16
+
+Search Elapsed Time (hh:mm:ss) - 12:39:08
+Search Total CPU Time (hh:mm:ss) - 427:17:22
+Search Total GPU Time (hh:mm:ss) - 0:00:00
+
+Search Maximum CPU Memory (gbytes) - 32
+Search Maximum GPU Memory (gbytes) - 16
+
+ + +

7. References

+ + +
    + +
  • [1] "Minimum Bayes Risk decoding and system combination based on a recursion for edit distance", + Haihua Xu, Daniel Povey, Lidia Mangu and Jie Zhu, Computer Speech and Language, 2011.
  • + +
  • [2] "A Symmetrization of the Subspace Gaussian Mixture Model", Daniel Povey, + Martin Karafiat, Arnab Ghoshal, Petr Schwarz, ICASSP 2011
  • + +
  • [3] "Boosted MMI for Model and Feature Space Discriminative Training" , + Daniel Povey, Dimitri Kanevsky, Brian Kingsbury, Bhuvana Ramabhadran, George Saon & Karthik Visweswariah.
  • + +
  • [4] "Using Proxies for OOV keywords in the Keyword Search Task", Guoguo Chen, Oguz Yilmaz, Jan Trmal, + Daniel Povey, and Sanjeev Khudanpur, ASRU 2013
  • + +
  • [5] "Lattice Indexing for Spoken Term Detection", Dogan Can and Murat Saraclar, + IEEE Transactions on Audio, Speech and Language Processing.
  • + +
  • [6] "Generating exact lattices in the WFST framework", D. Povey, M. Hannemann et. al, ICASSP 2012
  • + +
  • [7] "A Pitch Extraction Algorithm Tuned for Automatic Speech Recognition", + Pegah Ghahremani, Bagher BabaAli, Daniel Povey, Korbinian Riedhammer, Jan Trmal and Sanjeev Khudanpur, ICASSP 2014
  • + +
  • [8] "Improving Deep Neural Network Acoustic Models using Generalized Maxout Networks", + Xiaohui Zhang, Jan Trmal, Daniel Povey and Sanjeev Khudanpur, ICASSP 2014 + +
  • [9] Jansen, A. and Niyogi, P., ``Point process models for spotting keywords in + continuous speech", IEEE Trans. Audio, Speech and Language Proc., 17(8), pp. 1457-1470, 2009.
  • + +
  • [10] Kintzley, K., Jansen, A., and Hermansky, H., ``Event Selection from Phone Posteriorgrams + Using Matched Filters," in Proc. of INTERSPEECH, 2011.
  • + +
  • [11] Kintzley, K., Jansen, A., and Hermansky, H., ``MAP Estimation of Whole-Word Acoustic Models + with Dictionary Priors," in Proc. of INTERSPEECH, 2012.
  • + +
  • [12] Kintzley, K., Jansen, A., and Hermansky, H., ``Featherweight Phonetic Keyword Search for + Conversational Speech", in Proc. of ICASSP, 2014.
  • + +
  • [13] Mark Gales, "Semi-Tied Covariance Matrices for Hidden Markov Models", IEEE Trans. SAP, 1999.
  • + +
  • [14] Daniel Povey, Lukas Burget et. al, "The Subspace Gaussian Mixture Model– a Structured + Model for Speech Recognition", Computer Speech and Language, 2011.
  • + +
  • [15] Gibson, Matthew. "Minimum Bayes risk acoustic model estimation and adaptation." Dissertation, + University of Sheffield, 2008.
  • + +
  • [16] K. Vesely, A. Ghoshal, L. Burget and D. Povey, + "Sequence-discriminative training of deep neural networks", Proc. Interspeech 2013
  • + +
  • [17] Damianos Karakos et al., "Score normalization and system combination for improved keyword spotting", ASRU 2013.
  • + +
  • [18] Evermann, Gunnar, and P. C. Woodland. "Posterior probability decoding, confidence estimation and system combination." Proc. Speech Transcription Workshop. Vol. 27. 2000.
  • + +
+ + diff --git a/egs/babel/s5d/cmd.sh b/egs/babel/s5d/cmd.sh new file mode 100644 index 00000000000..a4a11bef039 --- /dev/null +++ b/egs/babel/s5d/cmd.sh @@ -0,0 +1,29 @@ +# "queue.pl" uses qsub. The options to it are +# options to qsub. If you have GridEngine installed, +# change this to a queue you have access to. +# Otherwise, use "run.pl", which will run jobs locally +# (make sure your --num-jobs options are no more than +# the number of cpus on your machine. + +#a) JHU cluster options +export train_cmd="queue.pl -l arch=*64" +export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" +export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" + +#export cuda_cmd="..." + + +#b) BUT cluster options +#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M" +#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M" +#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G" + +#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1" +#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu" +#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G" + +#c) run it locally... +#export train_cmd=run.pl +#export decode_cmd=run.pl +#export cuda_cmd=run.pl +#export mkgraph_cmd=run.pl diff --git a/egs/babel/s5d/conf/bnf/config_full.py b/egs/babel/s5d/conf/bnf/config_full.py new file mode 100755 index 00000000000..5ea3ddbb1d9 --- /dev/null +++ b/egs/babel/s5d/conf/bnf/config_full.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +################################################# +## PTDNN - Python Toolkit for Deep Neural Network +## Author: Yajie Miao +################################################# + +import os +import sys + +from utils.learn_rates import LearningRateExpDecay + + +class BnfExpConfig(object): + + def __init__(self): + + # working directory; by default, the pfiles should be here + self.wdir = "WORK/" + self.pretrain_data = self.wdir + 'train.pfile.gz' # pretraining data + self.pretrain_output = self.wdir + "rbm.ptr" # pretraining output + + # finetuning data + self.finetune_train_data = self.wdir + 'train.pfile.gz' # finetune training data + self.finetune_valid_data = self.wdir + 'valid.pfile.gz' # finetune validation data + self.finetune_output = self.wdir + "final.nnet.raw" # finetune output + self.nnet_kaldi_fmt = self.wdir + "final.nnet" + + # global config for nnet topo + self.n_ins=250 # size of input data + self.n_outs=N_OUTS # number of output targets.. we'll replace this with + # the correct number when we move this to the right place. + self.hidden_layers_sizes=[1024, 1024, 1024, 1024, 1024, 42, 1024] # hidden layer sizes + self.bnf_layer_index = 6 # the index of the Bottleneck layer + self.pretrain_layer_num = 5 # number of hidden layers to be pretrained + + # global config for data + self.shuffle = True + self.chunk_size = '200m' + + # pretraining batch size + self.pretrain_batch_size = 128 # batch-size in pretraining + + # pretraining schedule + self.pretrain_gbrbm_lr = 0.005 # learning rate for Gaussian-Bernoulli RBM + self.pretrain_rbm_lr = 0.08 # learning rate for Bernoulli-Bernoulli RBM + self.initial_momentum = 0.5 # initial momentum + self.final_momentum = 0.9 # final momentum + self.initial_momentum_epoch = 2 # for how many epochs do we use initial_momentum + self.pretraining_epochs = 4 # total epochs + + # finetuning batch size + self.finetune_batch_size = 256 # batch-size for finetuning + + # finetuning schedule + self.finetune_momentum = 0.5 # momentum for finetuning + self.lrate = LearningRateExpDecay(start_rate=0.04, # starting learning rate + scale_by = 0.5, # decaying factor in ramping + max_epochs = 1000, # 'dump' epoch limit, never can be reached + min_derror_ramp_start = 0.01, # min validation error difference to trigger ramping + min_derror_stop = 0.01, # min validation error difference to stop finetuning, after ramping + init_error = 100) diff --git a/egs/babel/s5d/conf/bnf/config_limited.py b/egs/babel/s5d/conf/bnf/config_limited.py new file mode 100755 index 00000000000..f63c3640d68 --- /dev/null +++ b/egs/babel/s5d/conf/bnf/config_limited.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +################################################# +## PTDNN - Python Toolkit for Deep Neural Network +## Author: Yajie Miao +################################################# + +import os +import sys + +from utils.learn_rates import LearningRateExpDecay + + +class BnfExpConfig(object): + + def __init__(self): + + # working directory; by default, the pfiles should be here + self.wdir = "WORK/" # Note: we'll replace CWD with the current directory + # when we move this to the right place. + self.pretrain_data = self.wdir + 'train.pfile.gz' # pretraining data + self.pretrain_output = self.wdir + "rbm.ptr" # pretraining output + + # finetuning data + self.finetune_train_data = self.wdir + 'train.pfile.gz' # finetune training data + self.finetune_valid_data = self.wdir + 'valid.pfile.gz' # finetune validation data + self.finetune_output = self.wdir + "final.nnet.raw" # finetune output + self.nnet_kaldi_fmt = self.wdir + "final.nnet" + + # global config for nnet topo + self.n_ins=250 # size of input data + self.n_outs=N_OUTS # number of output targets.. we'll replace this with + # the correct number when we move this to the right place. + self.hidden_layers_sizes=[1024, 1024, 1024, 1024, 42, 1024] # hidden layer sizes + self.bnf_layer_index = 5 # the index of the Bottleneck layer + self.pretrain_layer_num = 4 # number of hidden layers to be pretrained + + # global config for data + self.shuffle = True + self.chunk_size = '200m' + + # pretraining batch size + self.pretrain_batch_size = 128 # batch-size in pretraining + + # pretraining schedule + self.pretrain_gbrbm_lr = 0.005 # learning rate for Gaussian-Bernoulli RBM + self.pretrain_rbm_lr = 0.08 # learning rate for Bernoulli-Bernoulli RBM + self.initial_momentum = 0.5 # initial momentum + self.final_momentum = 0.9 # final momentum + self.initial_momentum_epoch = 5 # for how many epochs do we use initial_momentum + self.pretraining_epochs=10 # total epochs + + # finetuning batch size + self.finetune_batch_size = 256 # batch-size for finetuning + + # finetuning schedule + self.finetune_momentum = 0.5 # momentum for finetuning + self.lrate = LearningRateExpDecay(start_rate=0.08, # starting learning rate + scale_by = 0.5, # decaying factor in ramping + max_epochs = 1000, # 'dump' epoch limit, never can be reached + min_derror_ramp_start = 0.01, # min validation error difference to trigger ramping + min_derror_stop = 0.01, # min validation error difference to stop finetuning, after ramping + init_error = 100) diff --git a/egs/babel/s5d/conf/common.fullLP b/egs/babel/s5d/conf/common.fullLP new file mode 100644 index 00000000000..05dea74beb0 --- /dev/null +++ b/egs/babel/s5d/conf/common.fullLP @@ -0,0 +1,124 @@ +# BNF training parameters +bnf_num_hidden_layers=6 +bottleneck_dim=42 +bnf_hidden_layer_dim=2048 +bnf_minibatch_size=512 +bnf_init_learning_rate=0.008 +bnf_final_learning_rate=0.0008 +bnf_max_change=40 +bnf_num_jobs=4 +bnf_num_threads=1 +bnf_mixup=10000 +bnf_mpe_learning_rate=0.00009 +bnf_mpe_last_layer_factor=0.1 +bnf_num_gauss_ubm=550 # use fewer UBM Gaussians than the + # non-bottleneck system (which has 800) +bnf_num_gauss_sgmm=50000 # use fewer SGMM sub-states than the + # non-bottleneck system (which has 80000). +bnf_decode_acwt=0.066666 + + +# DNN hybrid system training parameters +dnn_num_hidden_layers=4 +dnn_input_dim=4000 +dnn_output_dim=400 +dnn_init_learning_rate=0.008 +dnn_final_learning_rate=0.0008 +dnn_mixup=12000 + +dnn_mpe_learning_rate=0.00008 +dnn_mpe_last_layer_factor=0.1 +dnn_mpe_retroactive=true + +bnf_every_nth_frame=2 # take every 2nd frame. +babel_type=full + +use_pitch=true + +lmwt_plp_extra_opts=( --min-lmwt 9 --max-lmwt 13 ) +lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 ) +lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 ) +lmwt_chain_extra_opts=( --min-lmwt 9 --max-lmwt 13 ) + +dnn_beam=16.0 +dnn_lat_beam=8.5 + +icu_opt=(--use-icu true --icu-transform Any-Lower) + +if [[ `hostname` == *.tacc.utexas.edu ]] ; then + decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" ) + sgmm_train_extra_opts=( ) + sgmm_group_extra_opts=( --num_iters 25 ) + sgmm_denlats_extra_opts=( --num-threads 2 ) + sgmm_mmi_extra_opts=(--cmd "local/lonestar.py -pe smp 2") + dnn_denlats_extra_opts=( --num-threads 2 ) + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" ) + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1) + + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1) + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1) + dnn_parallel_opts="-l gpu=1" +else + decode_extra_opts=(--num-threads 6 --parallel-opts "--num-threads 6 --mem 4G") + sgmm_train_extra_opts=( --num-iters 25 ) + sgmm_group_extra_opts=(--group 3 --parallel-opts "--num-threads 7 --mem 6G") + sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4" ) + sgmm_mmi_extra_opts=() + dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4") + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "--num-threads 16") + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1 \ + --parallel-opts "--gpu 1" ) + dnn_parallel_opts="--gpu 1" + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1 \ + --parallel-opts "--gpu 1") +fi + +icu_transform="Any-Lower" +case_insensitive=true + + +max_states=150000 +wip=0.5 + + +phoneme_mapping= + +minimize=true + +proxy_phone_beam=-1 +proxy_phone_nbest=-1 +proxy_beam=5 +proxy_nbest=500 + +extlex_proxy_phone_beam=5 +extlex_proxy_phone_nbest=300 +extlex_proxy_beam=-1 +extlex_proxy_nbest=-1 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/common.limitedLP b/egs/babel/s5d/conf/common.limitedLP new file mode 100644 index 00000000000..a73080a5b65 --- /dev/null +++ b/egs/babel/s5d/conf/common.limitedLP @@ -0,0 +1,128 @@ +# BNF training parameters +bnf_num_hidden_layers=5 +bottleneck_dim=42 +bnf_hidden_layer_dim=1024 +bnf_minibatch_size=512 +bnf_init_learning_rate=0.008 +bnf_final_learning_rate=0.0008 +bnf_max_change=40 +bnf_num_jobs=4 +bnf_num_threads=1 +bnf_mixup=5000 +bnf_mpe_learning_rate=0.00009 +bnf_mpe_last_layer_factor=0.1 +bnf_num_gauss_ubm=500 # use fewer UBM Gaussians than the + # non-bottleneck system (which has 750) +bnf_num_gauss_sgmm=10000 # use fewer SGMM sub-states than the + # non-bottleneck system (which has 18000). +bnf_decode_acwt=0.066666 + + +## DNN hybrid system training parameters +dnn_num_hidden_layers=3 +dnn_input_dim=2000 +dnn_output_dim=200 +dnn_init_learning_rate=0.008 +dnn_final_learning_rate=0.0008 +dnn_mixup=5000 + +dnn_mpe_learning_rate=0.00009 +dnn_mpe_last_layer_factor=0.1 +dnn_mpe_retroactive=true + +bnf_every_nth_frame=1 # take all frames. +babel_type=limited + +use_pitch=true + +lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 12 ) +lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 ) +lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 ) + +dnn_beam=16.0 +dnn_lat_beam=8.5 + +icu_opt=(--use-icu true --icu-transform Any-Lower) + +# Semi-supervised examples options +dnn_update_egs_opts=(--weight-threshold 0.7 --splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 4 --io-opts "-tc 5" ) + +if [[ `hostname` == *.tacc.utexas.edu ]] ; then + decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" ) + sgmm_train_extra_opts=( --num-iters 25 ) + sgmm_group_extra_opts=( ) + sgmm_denlats_extra_opts=( --num-threads 1 ) + dnn_denlats_extra_opts=( --num-threads 1 ) + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" ) + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 + --parallel-opts "-pe smp 16" ) + + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1) + + dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 ) +else + decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=4.0G") + sgmm_train_extra_opts=( --num-iters 25 ) + sgmm_group_extra_opts=(--group 3 --parallel-opts "-pe smp 3 -l mem_free=7G,ram_free=7.0G" --cmd "queue.pl -l arch=*64 -l mem_free=2.0G,ram_free=2.0G") + sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G") + sgmm_mmi_extra_opts=(--cmd "queue.pl -l arch=*64 -l mem_free=1.5G,ram_free=1.5G") + dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G") + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 \ + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + dnn_parallel_opts="-l gpu=1" + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1 \ + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + + dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 ) +fi + +icu_transform="Any-Lower" +case_insensitive=true + + +max_states=150000 +wip=0.5 + + +phoneme_mapping= + +minimize=true + +proxy_phone_beam=-1 +proxy_phone_nbest=-1 +proxy_beam=5 +proxy_nbest=500 + +extlex_proxy_phone_beam=5 +extlex_proxy_phone_nbest=300 +extlex_proxy_beam=-1 +extlex_proxy_nbest=-1 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/common.semisupervised.limitedLP b/egs/babel/s5d/conf/common.semisupervised.limitedLP new file mode 100644 index 00000000000..63118af268c --- /dev/null +++ b/egs/babel/s5d/conf/common.semisupervised.limitedLP @@ -0,0 +1,27 @@ +## DNN hybrid system training parameters +dnn_num_hidden_layers=3 +dnn_input_dim=2000 +dnn_output_dim=200 +dnn_init_learning_rate=0.008 +dnn_final_learning_rate=0.0008 +dnn_mixup=5000 +num_epochs=15 +num_epochs_extra=5 +num_iters_final=20 + +babel_type=limited + +# Supervised tuning options +# To update only the last layer using only the supervised data after +# semi-supervised training is done +do_supervised_tuning=true +dnn_update_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") +dnn_update_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 \ + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + +# Semi-supervised examples options +egs_gpu_opts=(--splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 4 --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") +egs_cpu_opts=(--splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 8 --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") +egs_io_opts="-tc 5" +weight_threshold=0.7 diff --git a/egs/babel/s5d/conf/common_vars.sh b/egs/babel/s5d/conf/common_vars.sh new file mode 100644 index 00000000000..3d81a3fcc6c --- /dev/null +++ b/egs/babel/s5d/conf/common_vars.sh @@ -0,0 +1,29 @@ +#keyword search default +glmFile=conf/glm +duptime=0.5 +case_insensitive=false +use_pitch=true +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="-oov " +boost_sil=1.5 # note from Dan: I expect 1.0 might be better (equivalent to not + # having the option)... should test. +cer=0 + +#Declaring here to make the definition inside the language conf files more +# transparent and nice +declare -A train_kwlists +declare -A dev10h_kwlists +declare -A dev2h_kwlists +declare -A evalpart1_kwlists +declare -A eval_kwlists +declare -A shadow_kwlists + +# just for back-compatibility +declare -A dev10h_more_kwlists +declare -A dev2h_more_kwlists +declare -A evalpart1_more_kwlists +declare -A eval_more_kwlists +declare -A shadow_more_kwlists +[ -f ./path.sh ] && . ./path.sh; # source the path. +[ -f ./cmd.sh ] && . ./cmd.sh; # source train and decode cmds. diff --git a/egs/babel/s5d/conf/glm b/egs/babel/s5d/conf/glm new file mode 100644 index 00000000000..cdf9c42feaa --- /dev/null +++ b/egs/babel/s5d/conf/glm @@ -0,0 +1,13 @@ +;; +;; File: ma970904.glm +;; Desc: This file contains the transcript filtering rules for the ARPA +;; Mandarin Hub5-NE Evaluation. +;; +;; Date: 970904 +;; - initial creation +;; +;; Hesitation mappings + => %HESITATION / [ ] __ [ ] + => %HESITATION / [ ] __ [ ] + => %HESITATION / [ ] __ [ ] + diff --git a/egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf b/egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf new file mode 100644 index 00000000000..7d2da3715fb --- /dev/null +++ b/egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf @@ -0,0 +1,104 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/101-cantonese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Cantonese_Babel101/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.3hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev2h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev10h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.kwlist.xml +eval_nj=64 + +evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +evalpart1_data_list=conf/lists/101-cantonese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/101-cantonese/release-current/conversational/dev + /export/babel/data/101-cantonese/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Cantonese_Babel101/dev.list + /export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm +lexicon_file=/export/babel/data/101-cantonese/release-current/conversational/reference_materials/lexicon.txt +cer=1 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf b/egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf new file mode 100644 index 00000000000..66347522065 --- /dev/null +++ b/egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf @@ -0,0 +1,112 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/101-cantonese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Cantonese_Babel101/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.3hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev2h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev10h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.kwlist.xml +eval_nj=64 + +evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +evalpart1_data_list=conf/lists/101-cantonese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/101-cantonese/release-current/conversational/dev + /export/babel/data/101-cantonese/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Cantonese_Babel101/dev.list + /export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Pashto_Babel104/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm +lexicon_file=/export/babel/data/101-cantonese/release-babel101b-v0.4c_sub-train1/conversational/reference_materials/lexicon.sub-train1.txt +cer=1 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf b/egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf new file mode 100644 index 00000000000..f00afb53454 --- /dev/null +++ b/egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf @@ -0,0 +1,105 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/102-assamese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Assamese_Babel102/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Assamese_Babel102/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=24 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Assamese_Babel102//dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ +eval_data_list=/export/babel/data/splits/Assamese_Babel102/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/102-assamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/102-assamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/102-assamese/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Assamese_Babel102/dev.list + /export/babel/data/splits/Assamese_Babel102/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/102-assamese/release-current/conversational/reference_materials/lexicon.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf b/egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf new file mode 100644 index 00000000000..937166caf7d --- /dev/null +++ b/egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf @@ -0,0 +1,114 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/102-assamese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Assamese_Babel102/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Assamese_Babel102/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=24 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Assamese_Babel102//dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ +eval_data_list=/export/babel/data/splits/Assamese_Babel102/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/102-assamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/102-assamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/102-assamese/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Assamese_Babel102/dev.list + /export/babel/data/splits/Assamese_Babel102/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/102-assamese//release-current/conversational/training/ + /export/babel/data/102-assamese//release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Assamese_Babel102/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Assamese_Babel102/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/102-assamese/release-current/conversational/reference_materials/lexicon.sub-train.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf b/egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf new file mode 100644 index 00000000000..d283be30d16 --- /dev/null +++ b/egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf @@ -0,0 +1,105 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/103-bengali/release-current/conversational/training +train_data_list=/export/babel/data/splits/Bengali_Babel103/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=12 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval +eval_data_list=/export/babel/data/splits/Bengali_Babel103//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/103-bengali/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/103-bengali/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/103-bengali/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Bengali_Babel103/dev.list + /export/babel/data/splits/Bengali_Babel103/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf b/egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf new file mode 100644 index 00000000000..3799653db68 --- /dev/null +++ b/egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf @@ -0,0 +1,114 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/103-bengali//release-current/conversational/training +train_data_list=/export/babel/data/splits/Bengali_Babel103/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=12 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval +eval_data_list=/export/babel/data/splits/Bengali_Babel103//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/103-bengali/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/103-bengali/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/103-bengali/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Bengali_Babel103/dev.list + /export/babel/data/splits/Bengali_Babel103/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/103-bengali/release-current/conversational/training/ + /export/babel/data/103-bengali/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Bengali_Babel103/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Bengali_Babel103/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.sub-train.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf b/egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf new file mode 100644 index 00000000000..9fbaf629935 --- /dev/null +++ b/egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf @@ -0,0 +1,114 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training +train_data_list=./conf/lists/104-pashto/train.40HrFLP.list +train_nj=32 + +#RADICAL DEV2H data files +dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev10h_data_list=./conf/lists/104-pashto/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval +evalpart1_data_list=conf/lists/104-pashto/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/dev + /export/babel/data/104-pashto/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Pashto_Babel104/dev.list + /export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/104-pashto/release-current/conversational/reference_materials/lexicon.txt + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf b/egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf new file mode 100644 index 00000000000..af1bbb132f7 --- /dev/null +++ b/egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf @@ -0,0 +1,114 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training +train_data_list=./conf/lists/104-pashto/training.list +train_nj=32 + +#RADICAL DEV2H data files +dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev10h_data_list=./conf/lists/104-pashto/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval +evalpart1_data_list=conf/lists/104-pashto/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/dev + /export/babel/data/104-pashto/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Pashto_Babel104/dev.list + /export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/104-pashto/release-current/conversational/reference_materials/lexicon.txt + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf b/egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf new file mode 100644 index 00000000000..41bc3ba85ef --- /dev/null +++ b/egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf @@ -0,0 +1,110 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training +train_data_list=/export/babel/data/splits/Pashto_Babel104/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV2H data files +dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Pashto_Babel104/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval +evalpart1_data_list=conf/lists/104-pashto/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/dev + /export/babel/data/104-pashto/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Pashto_Babel104/dev.list + /export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Pashto_Babel104/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/104-pashto/release-current-subtrain/conversational/reference_materials/lexicon.sub-train.txt + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf b/egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf new file mode 100644 index 00000000000..d6ae1007ac9 --- /dev/null +++ b/egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf @@ -0,0 +1,118 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training +train_data_list=/export/babel/data/splits/Turkish_Babel105/train.fullLP.list +#train_nj=32 +train_ecf_file=./data/train/ecf.train.xml +train_rttm_file=./exp/tri5/rttm +train_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +train_nj=64 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev2h_data_list=/export/babel/data/splits/Turkish_Babel105/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev10h_data_list=/export/babel/data/splits/Turkish_Babel105/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +eval_data_list=/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +evalpart1_data_list=conf/lists/105-turkish/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/105-turkish/release-current-b/conversational/dev + /export/babel/data/105-turkish/release-current-b/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Turkish_Babel105/dev.list + /export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +glmFile=./conf/glm +lexicon_file=/export/babel/data/105-turkish/release-current-b/conversational/reference_materials/lexicon.txt +#http://demo.icu-project.org/icu-bin/translit +icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();' ) +#icu_opt=(--use-icu true --icu-transform "'\\\\\\\\İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();'" ) +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf b/egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf new file mode 100644 index 00000000000..f7ca60c6f25 --- /dev/null +++ b/egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf @@ -0,0 +1,111 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training +train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.official.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev2h_data_list=/export/babel/data/splits/Turkish_Babel105/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev10h_data_list=/export/babel/data/splits/Turkish_Babel105/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +eval_data_list=/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +evalpart1_data_list=conf/lists/105-turkish/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/105-turkish/release-current-b/conversational/dev + /export/babel/data/105-turkish/release-current-b/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Turkish_Babel105/dev.list + /export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/105-turkish/release-current-b/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Turkish_Babel105/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=600 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/105-turkish/release-babel105b-v0.4-rc1/conversational/reference_materials/lexicon.sub-train.txt +#http://demo.icu-project.org/icu-bin/translit +icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();' ) +#icu_opt=(--use-icu true --icu-transform "'\\\\\\\\İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();'" ) +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf b/egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf new file mode 100644 index 00000000000..fa1afe4717e --- /dev/null +++ b/egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf @@ -0,0 +1,108 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Tagalog_Babel106/babel106b-v0.2g_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=23 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/106-tagalog/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/106-tagalog/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/106-tagalog/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/106-tagalog/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf b/egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf new file mode 100644 index 00000000000..86148300e0c --- /dev/null +++ b/egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf @@ -0,0 +1,108 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.official.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Tagalog_Babel106/babel106b-v0.2g_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=23 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/106-tagalog/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/106-tagalog/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/106-tagalog/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/106-tagalog/release-babel106b-v0.2g-sub-train/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf b/egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf new file mode 100644 index 00000000000..e09ef9df4fd --- /dev/null +++ b/egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf @@ -0,0 +1,107 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Vietnamese_Babel107/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev2h_subset_ecf=true +dev2h_nj=27 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev/ +dev10h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Vietnamese_Babel107/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.kwlist3.xml +eval_nj=81 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/107-vietnamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.annot.kwlist3.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/107-vietnamese/release-current/conversational/dev/ + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Vietnamese_Babel107/dev.list + /export/babel/data/splits/Vietnamese_Babel107/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml + + ) +shadow_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/107-vietnamese/release-current/conversational/reference_materials/lexicon.txt + +phoneme_mapping="i@U=i @ U;oaI=o a I;oaI:=o a I:;u@I=u @ I;uI@= u I @;1@I=1 @ I;1@U=1 @ U; + a:I=a: I; a:U=a: U; aU=a U; @U=@ U; aI=a I; @I=@ I; EU=E U; eU=e U; i@=i @; iU=i U; Oa:=O a: ; Oa=O a; + OE=O E; OI=O I; oI=o I; @:I=@: I; u@=u @; 1@=1 @; ue=u e; uI=u I; 1I=1 I; u@:=u @:; 1U=1 U; ui:=u i:" +# + + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf b/egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf new file mode 100644 index 00000000000..a659c44ecc4 --- /dev/null +++ b/egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf @@ -0,0 +1,115 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Vietnamese_Babel107/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev2h_subset_ecf=true +dev2h_nj=27 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev/ +dev10h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Vietnamese_Babel107/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.kwlist3.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/107-vietnamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.annot.kwlist3.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/107-vietnamese/release-current/conversational/dev/ + /export/babel/data/107-vietnamese/release-current/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Vietnamese_Babel107/dev.list + /export/babel/data/splits/Vietnamese_Babel107/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/107-vietnamese/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Vietnamese_Babel107/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/107-vietnamese/release-current/conversational/reference_materials/lexicon.sub-train.txt + +phoneme_mapping="i@U=i @ U;oaI=o a I;oaI:=o a I:;u@I=u @ I;uI@= u I @;1@I=1 @ I;1@U=1 @ U; + a:I=a: I; a:U=a: U; aU=a U; @U=@ U; aI=a I; @I=@ I; EU=E U; eU=e U; i@=i @; iU=i U; Oa:=O a: ; Oa=O a; + OE=O E; OI=O I; oI=o I; @:I=@: I; u@=u @; 1@=1 @; ue=u e; uI=u I; 1I=1 I; u@:=u @:; 1U=1 U; ui:=u i:" +# + + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf b/egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf new file mode 100644 index 00000000000..b92a52b7bb6 --- /dev/null +++ b/egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf @@ -0,0 +1,80 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Haitian_Babel201/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval +eval_data_list=/export/babel/data/splits/Haitian_Babel201//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/201-haitian/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/201-haitian/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Haitian_Babel201/dev.list + /export/babel/data/splits/Haitian_Babel201/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.txt + diff --git a/egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf b/egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf new file mode 100644 index 00000000000..d1320fd0245 --- /dev/null +++ b/egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf @@ -0,0 +1,89 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Haitian_Babel201/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval +eval_data_list=/export/babel/data/splits/Haitian_Babel201//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/201-haitian/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/201-haitian/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Haitian_Babel201/dev.list + /export/babel/data/splits/Haitian_Babel201/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/201-haitian/release-current/conversational/training/ + /export/babel/data/201-haitian/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Haitian_Babel201/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Haitian_Babel201/train.untranscribed.list + ) +unsup_nj=64 + + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.sub-train.txt + diff --git a/egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf b/egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf new file mode 100644 index 00000000000..d24eb1b73a4 --- /dev/null +++ b/egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf @@ -0,0 +1,93 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/training +train_data_list=./conf/lists/202-swahili//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev2h_data_list=./conf/lists/202-swahili//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev10h_data_list=./conf/lists/202-swahili//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +eval_data_list=./conf/lists/202-swahili//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.ecf.xml +eval_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +evalpart1_data_list=./conf/lists/202-swahili//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist3.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/202-swahili//dev.list + ./conf/lists/202-swahili//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +shadow_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/untranscribed-training +unsup_data_list=./conf/lists/202-swahili//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf b/egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf new file mode 100644 index 00000000000..761e6c6e0ab --- /dev/null +++ b/egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf @@ -0,0 +1,99 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/training +train_data_list=./conf/lists/202-swahili//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev2h_data_list=./conf/lists/202-swahili//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev10h_data_list=./conf/lists/202-swahili//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +eval_data_list=./conf/lists/202-swahili//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.ecf.xml +eval_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +evalpart1_data_list=./conf/lists/202-swahili//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist3.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/202-swahili//dev.list + ./conf/lists/202-swahili//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +shadow_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/202-swahili//untranscribed-training.list + ./conf/lists/202-swahili//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/untranscribed-training + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf b/egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf new file mode 100644 index 00000000000..052aa6bbc50 --- /dev/null +++ b/egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf @@ -0,0 +1,101 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/203-lao/release-current/conversational/training +train_data_list=/export/babel/data/splits/Lao_Babel203/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Lao_Babel203/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Lao_Babel203/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval +eval_data_list=/export/babel/data/splits/Lao_Babel203//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/203-lao/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/203-lao/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/203-lao/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Lao_Babel203/dev.list + /export/babel/data/splits/Lao_Babel203/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/203-lao/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf b/egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf new file mode 100644 index 00000000000..1e12a529361 --- /dev/null +++ b/egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf @@ -0,0 +1,110 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/203-lao/release-current/conversational/training +train_data_list=/export/babel/data/splits/Lao_Babel203/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Lao_Babel203/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Lao_Babel203/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval +eval_data_list=/export/babel/data/splits/Lao_Babel203//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/203-lao/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/203-lao/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/203-lao/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Lao_Babel203/dev.list + /export/babel/data/splits/Lao_Babel203/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/203-lao/release-current/conversational/training/ + /export/babel/data/203-lao/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Lao_Babel203/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Lao_Babel203/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/203-lao/release-current/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf b/egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf new file mode 100644 index 00000000000..700ae3d5dfb --- /dev/null +++ b/egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf @@ -0,0 +1,112 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/204-tamil/release-current/conversational/training +train_data_list=/export/babel/data/splits/Tamil_Babel204/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +dev2h_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +dev10h_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Tamil_Babel204/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +eval_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +eval_nj=64 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/204-tamil/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +evalpart1_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist5.xml + ) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/204-tamil/release-current/conversational/dev/ + /export/babel/data/204-tamil/release-current/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Tamil_Babel204/dev.list + /export/babel/data/splits/Tamil_Babel204/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +shadow_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +shadow_nj=64 + + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/204-tamil/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf b/egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf new file mode 100644 index 00000000000..7e16fcd8be5 --- /dev/null +++ b/egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf @@ -0,0 +1,122 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/204-tamil/release-current/conversational/training +train_data_list=/export/babel/data/splits/Tamil_Babel204/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev2h_kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev10h_kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Tamil_Babel204/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +eval_kwlists=( + [eval]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +) +eval_nj=64 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/204-tamil/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist5.xml + ) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/204-tamil/release-current/conversational/dev/ + /export/babel/data/204-tamil/release-current/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Tamil_Babel204/dev.list + /export/babel/data/splits/Tamil_Babel204/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +shadow__kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +shadow_nj=64 + + +unsup_data_dir=(/export/babel/data/204-tamil/release-current/conversational/training/ + /export/babel/data/204-tamil/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Tamil_Babel204/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Tamil_Babel204/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/204-tamil/release-current/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf b/egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf new file mode 100644 index 00000000000..74e006e2692 --- /dev/null +++ b/egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf @@ -0,0 +1,94 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/training +train_data_list=./conf/lists/205-kurmanji//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev2h_data_list=./conf/lists/205-kurmanji//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev10h_data_list=./conf/lists/205-kurmanji//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +eval_data_list=./conf/lists/205-kurmanji//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +evalpart1_data_list=./conf/lists/205-kurmanji//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/205-kurmanji//dev.list + ./conf/lists/205-kurmanji//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/untranscribed-training +unsup_data_list=./conf/lists/205-kurmanji//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf b/egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf new file mode 100644 index 00000000000..fc5fdd4aa52 --- /dev/null +++ b/egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/training +train_data_list=./conf/lists/205-kurmanji//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev2h_data_list=./conf/lists/205-kurmanji//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev10h_data_list=./conf/lists/205-kurmanji//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +eval_data_list=./conf/lists/205-kurmanji//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +evalpart1_data_list=./conf/lists/205-kurmanji//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/205-kurmanji//dev.list + ./conf/lists/205-kurmanji//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/205-kurmanji//untranscribed-training.list + ./conf/lists/205-kurmanji//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/untranscribed-training + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf b/egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf new file mode 100644 index 00000000000..675dc83780d --- /dev/null +++ b/egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf @@ -0,0 +1,129 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training +train_data_list=/export/babel/data/splits/Zulu_Babel206/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list +dev2h_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/db-dev-jhuseg-v7-utt.dat +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Zulu_Babel206//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/206-zulu/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.stm +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1.ecf.xml +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/206-zulu/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval/ + ) +shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-cleaned-utt.dat +shadow_data_list=( + /export/babel/data/splits/Zulu_Babel206/dev.list + /export/babel/data/splits/Zulu_Babel206/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + + ) +shadow_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +#Zulu seems to need much larger LM Weights +lmwt_plp_extra_opts=( --min-lmwt 10 --max-lmwt 17 ) +lmwt_bnf_extra_opts=( --min-lmwt 13 --max-lmwt 18 ) +lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 17 ) + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " +phoneme_mapping="k_>=g_<; 3=e; R=l; o=O; b_<=b; t_>=th;" + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf b/egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf new file mode 100644 index 00000000000..caaf8cdc023 --- /dev/null +++ b/egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf @@ -0,0 +1,126 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Zulu_Babel206/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list +dev2h_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/db-dev-jhuseg-v7-utt.dat +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Zulu_Babel206//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/206-zulu/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.stm +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1.ecf.xml +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist4.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/206-zulu/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval/ + ) +shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-cleaned-utt.dat +shadow_data_list=( + /export/babel/data/splits/Zulu_Babel206/dev.list + /export/babel/data/splits/Zulu_Babel206/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + + +unsup_data_dir=(/export/babel/data/206-zulu/release-current/conversational/training/ + /export/babel/data/206-zulu/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Zulu_Babel206/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Zulu_Babel206/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +#Zulu seems to need larger LM Weights +lmwt_plp_extra_opts=( --min-lmwt 10 --max-lmwt 17 ) +lmwt_bnf_extra_opts=( --min-lmwt 17 --max-lmwt 24 ) +lmwt_dnn_extra_opts=( --min-lmwt 12 --max-lmwt 17 ) + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " +phoneme_mapping="k_>=g_<; 3=e; R=l; o=O; b_<=b; t_>=th;" + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true + +proxy_phone_beam=-1 +proxy_phone_nbest=-1 +proxy_beam=5 +proxy_nbest=500 +proxy_cutoff=0 + diff --git a/egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf b/egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf new file mode 100644 index 00000000000..0653c16fd8f --- /dev/null +++ b/egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf @@ -0,0 +1,93 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/training +train_data_list=./conf/lists/207-tokpisin//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev2h_data_list=./conf/lists/207-tokpisin//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev2h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev10h_data_list=./conf/lists/207-tokpisin//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev10h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +eval_data_list=./conf/lists/207-tokpisin//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.ecf.xml +eval_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +evalpart1_data_list=./conf/lists/207-tokpisin//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist4.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist5.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/207-tokpisin//dev.list + ./conf/lists/207-tokpisin//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +shadow_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/untranscribed-training +unsup_data_list=./conf/lists/207-tokpisin//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf b/egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf new file mode 100644 index 00000000000..d48f3196686 --- /dev/null +++ b/egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf @@ -0,0 +1,99 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/training +train_data_list=./conf/lists/207-tokpisin//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev2h_data_list=./conf/lists/207-tokpisin//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev2h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev10h_data_list=./conf/lists/207-tokpisin//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev10h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +eval_data_list=./conf/lists/207-tokpisin//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.ecf.xml +eval_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +evalpart1_data_list=./conf/lists/207-tokpisin//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist4.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist5.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/207-tokpisin//dev.list + ./conf/lists/207-tokpisin//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +shadow_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/207-tokpisin//untranscribed-training.list + ./conf/lists/207-tokpisin//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/untranscribed-training + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf b/egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf new file mode 100644 index 00000000000..4e552e919f8 --- /dev/null +++ b/egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/training +train_data_list=./conf/lists/301-cebuano//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev2h_data_list=./conf/lists/301-cebuano//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev10h_data_list=./conf/lists/301-cebuano//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +eval_data_list=./conf/lists/301-cebuano//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +evalpart1_data_list=./conf/lists/301-cebuano//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/301-cebuano//dev.list + ./conf/lists/301-cebuano//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/untranscribed-training +unsup_data_list=./conf/lists/301-cebuano//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf b/egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf new file mode 100644 index 00000000000..6ae02781972 --- /dev/null +++ b/egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf @@ -0,0 +1,106 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/training +train_data_list=./conf/lists/301-cebuano//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev2h_data_list=./conf/lists/301-cebuano//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev10h_data_list=./conf/lists/301-cebuano//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +eval_data_list=./conf/lists/301-cebuano//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +evalpart1_data_list=./conf/lists/301-cebuano//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/301-cebuano//dev.list + ./conf/lists/301-cebuano//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/301-cebuano//untranscribed-training.list + ./conf/lists/301-cebuano//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/untranscribed-training + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf b/egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf new file mode 100644 index 00000000000..d3a02dc1075 --- /dev/null +++ b/egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf @@ -0,0 +1,101 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training +train_data_list=./conf/lists/302-kazakh//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev2h_data_list=./conf/lists/302-kazakh//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev10h_data_list=./conf/lists/302-kazakh//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +eval_data_list=./conf/lists/302-kazakh//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +evalpart1_data_list=./conf/lists/302-kazakh//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/302-kazakh//dev.list + ./conf/lists/302-kazakh//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/untranscribed-training +unsup_data_list=./conf/lists/302-kazakh//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf b/egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf new file mode 100644 index 00000000000..2049c820695 --- /dev/null +++ b/egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf @@ -0,0 +1,107 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training +train_data_list=./conf/lists/302-kazakh//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev2h_data_list=./conf/lists/302-kazakh//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev10h_data_list=./conf/lists/302-kazakh//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +eval_data_list=./conf/lists/302-kazakh//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +evalpart1_data_list=./conf/lists/302-kazakh//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/302-kazakh//dev.list + ./conf/lists/302-kazakh//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/302-kazakh//untranscribed-training.list + ./conf/lists/302-kazakh//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/untranscribed-training + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf b/egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf new file mode 100644 index 00000000000..5ba3f8a1606 --- /dev/null +++ b/egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/training +train_data_list=./conf/lists/303-telugu//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev2h_data_list=./conf/lists/303-telugu//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev10h_data_list=./conf/lists/303-telugu//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +eval_data_list=./conf/lists/303-telugu//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +evalpart1_data_list=./conf/lists/303-telugu//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/303-telugu//dev.list + ./conf/lists/303-telugu//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/untranscribed-training +unsup_data_list=./conf/lists/303-telugu//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf b/egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf new file mode 100644 index 00000000000..b916b5b27e6 --- /dev/null +++ b/egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf @@ -0,0 +1,107 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/training +train_data_list=./conf/lists/303-telugu//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev2h_data_list=./conf/lists/303-telugu//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev10h_data_list=./conf/lists/303-telugu//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +eval_data_list=./conf/lists/303-telugu//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +evalpart1_data_list=./conf/lists/303-telugu//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/303-telugu//dev.list + ./conf/lists/303-telugu//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/303-telugu//untranscribed-training.list + ./conf/lists/303-telugu//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/untranscribed-training + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf b/egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf new file mode 100644 index 00000000000..8459ca096a0 --- /dev/null +++ b/egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/training +train_data_list=./conf/lists/304-lithuanian//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev2h_data_list=./conf/lists/304-lithuanian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev10h_data_list=./conf/lists/304-lithuanian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +eval_data_list=./conf/lists/304-lithuanian//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +evalpart1_data_list=./conf/lists/304-lithuanian//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/304-lithuanian//dev.list + ./conf/lists/304-lithuanian//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/untranscribed-training +unsup_data_list=./conf/lists/304-lithuanian//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf b/egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf new file mode 100644 index 00000000000..a571161390e --- /dev/null +++ b/egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf @@ -0,0 +1,106 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/training +train_data_list=./conf/lists/304-lithuanian//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev2h_data_list=./conf/lists/304-lithuanian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev10h_data_list=./conf/lists/304-lithuanian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +eval_data_list=./conf/lists/304-lithuanian//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +evalpart1_data_list=./conf/lists/304-lithuanian//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/304-lithuanian//dev.list + ./conf/lists/304-lithuanian//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/304-lithuanian//untranscribed-training.list + ./conf/lists/304-lithuanian//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/untranscribed-training + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf b/egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf new file mode 100644 index 00000000000..b1dd7f5b4f5 --- /dev/null +++ b/egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf @@ -0,0 +1,47 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/training +train_data_list=./conf/lists/305-guarani//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev2h_data_list=./conf/lists/305-guarani//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.stm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev10h_data_list=./conf/lists/305-guarani//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.stm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/untranscribed-training +unsup_data_list=./conf/lists/305-guarani//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf b/egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf new file mode 100644 index 00000000000..c0d9cc97524 --- /dev/null +++ b/egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/training +train_data_list=./conf/lists/305-guarani//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev2h_data_list=./conf/lists/305-guarani//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev10h_data_list=./conf/lists/305-guarani//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/305-guarani//untranscribed-training.list + ./conf/lists/305-guarani//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/untranscribed-training + /export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf b/egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf new file mode 100644 index 00000000000..15a0264de61 --- /dev/null +++ b/egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf @@ -0,0 +1,47 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/training +train_data_list=./conf/lists/306-igbo//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev2h_data_list=./conf/lists/306-igbo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev10h_data_list=./conf/lists/306-igbo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/untranscribed-training +unsup_data_list=./conf/lists/306-igbo//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf b/egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf new file mode 100644 index 00000000000..70642537caf --- /dev/null +++ b/egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/training +train_data_list=./conf/lists/306-igbo//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev2h_data_list=./conf/lists/306-igbo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev10h_data_list=./conf/lists/306-igbo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/306-igbo//untranscribed-training.list + ./conf/lists/306-igbo//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/untranscribed-training + /export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf b/egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf new file mode 100644 index 00000000000..8ae1b53eb2b --- /dev/null +++ b/egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf @@ -0,0 +1,52 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/training +train_data_list=./conf/lists/307-amharic//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev2h_data_list=./conf/lists/307-amharic//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev10h_data_list=./conf/lists/307-amharic//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/untranscribed-training +unsup_data_list=./conf/lists/307-amharic//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + +extlex_proxy_phone_beam=5 +extlex_proxy_phone_nbest=300 +extlex_proxy_beam=-1 +extlex_proxy_nbest=-1 + + diff --git a/egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf b/egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf new file mode 100644 index 00000000000..3c49d4356ce --- /dev/null +++ b/egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf @@ -0,0 +1,52 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/training +train_data_list=./conf/lists/307-amharic//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev2h_data_list=./conf/lists/307-amharic//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev10h_data_list=./conf/lists/307-amharic//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/307-amharic//untranscribed-training.list + ./conf/lists/307-amharic//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/untranscribed-training + /export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf b/egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf new file mode 100644 index 00000000000..aac78e77a80 --- /dev/null +++ b/egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf @@ -0,0 +1,48 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/training +train_data_list=./conf/lists/401-mongolian//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev2h_data_list=./conf/lists/401-mongolian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev10h_data_list=./conf/lists/401-mongolian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/untranscribed-training +unsup_data_list=./conf/lists/401-mongolian//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf b/egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf new file mode 100644 index 00000000000..e3bd46c7e68 --- /dev/null +++ b/egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf @@ -0,0 +1,52 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/training +train_data_list=./conf/lists/401-mongolian//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev2h_data_list=./conf/lists/401-mongolian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev10h_data_list=./conf/lists/401-mongolian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/401-mongolian//untranscribed-training.list + ./conf/lists/401-mongolian//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/untranscribed-training + /export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf b/egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf new file mode 100644 index 00000000000..d0f86207484 --- /dev/null +++ b/egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf @@ -0,0 +1,47 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/training +train_data_list=./conf/lists/402-javanese//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev2h_data_list=./conf/lists/402-javanese//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev10h_data_list=./conf/lists/402-javanese//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/untranscribed-training +unsup_data_list=./conf/lists/402-javanese//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf b/egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf new file mode 100644 index 00000000000..99438159ae6 --- /dev/null +++ b/egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/training +train_data_list=./conf/lists/402-javanese//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev2h_data_list=./conf/lists/402-javanese//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev10h_data_list=./conf/lists/402-javanese//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/402-javanese//untranscribed-training.list + ./conf/lists/402-javanese//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/untranscribed-training + /export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf b/egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf new file mode 100644 index 00000000000..9096a21fdc4 --- /dev/null +++ b/egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf @@ -0,0 +1,47 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/training +train_data_list=./conf/lists/403-dholuo//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev2h_data_list=./conf/lists/403-dholuo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev10h_data_list=./conf/lists/403-dholuo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/untranscribed-training +unsup_data_list=./conf/lists/403-dholuo//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf b/egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf new file mode 100644 index 00000000000..827a1ca5ed0 --- /dev/null +++ b/egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/training +train_data_list=./conf/lists/403-dholuo//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev2h_data_list=./conf/lists/403-dholuo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev10h_data_list=./conf/lists/403-dholuo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/403-dholuo//untranscribed-training.list + ./conf/lists/403-dholuo//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/untranscribed-training + /export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf b/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf new file mode 100644 index 00000000000..4c36a8878fd --- /dev/null +++ b/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf @@ -0,0 +1,78 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training +train_data_list=./conf/lists/404-georgian//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev +dev2h_data_list=./conf/lists/404-georgian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev +dev10h_data_list=./conf/lists/404-georgian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/eval +eval_data_list=./conf/lists/404-georgian//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml +) # eval_kwlists +eval_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev + /export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/404-georgian//dev.list + ./conf/lists/404-georgian//eval.list +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm +shadow_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/untranscribed-training +unsup_data_list=./conf/lists/404-georgian//untranscribed-training.list +unsup_nj=32 + + +lexicon_file= +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/404-georgian.LLP.official.conf b/egs/babel/s5d/conf/lang/404-georgian.LLP.official.conf new file mode 100644 index 00000000000..570bcab68ec --- /dev/null +++ b/egs/babel/s5d/conf/lang/404-georgian.LLP.official.conf @@ -0,0 +1,54 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training +train_data_list=./conf/lists/404-georgian//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev +dev2h_data_list=./conf/lists/404-georgian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev +dev10h_data_list=./conf/lists/404-georgian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/404-georgian//untranscribed-training.list + ./conf/lists/404-georgian//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/untranscribed-training + /export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file= +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lists/101-cantonese/dev.list b/egs/babel/s5d/conf/lists/101-cantonese/dev.list new file mode 100644 index 00000000000..581862a9701 --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/dev.list @@ -0,0 +1,120 @@ +BABEL_BP_101_10470_20111118_172644_inLine +BABEL_BP_101_10470_20111118_172644_outLine +BABEL_BP_101_10713_20111024_220917_inLine +BABEL_BP_101_10713_20111024_220917_outLine +BABEL_BP_101_10733_20111021_141006_inLine +BABEL_BP_101_10733_20111021_141006_outLine +BABEL_BP_101_11982_20111027_140138_inLine +BABEL_BP_101_11982_20111027_140138_outLine +BABEL_BP_101_15916_20111129_174019_inLine +BABEL_BP_101_15916_20111129_174019_outLine +BABEL_BP_101_16346_20111117_212011_inLine +BABEL_BP_101_16346_20111117_212011_outLine +BABEL_BP_101_17983_20111027_140721_inLine +BABEL_BP_101_17983_20111027_140721_outLine +BABEL_BP_101_19656_20111103_235107_inLine +BABEL_BP_101_19656_20111103_235107_outLine +BABEL_BP_101_20471_20111102_141335_inLine +BABEL_BP_101_20471_20111102_141335_outLine +BABEL_BP_101_20741_20111018_195422_inLine +BABEL_BP_101_20741_20111018_195422_outLine +BABEL_BP_101_24833_20111031_142944_inLine +BABEL_BP_101_24833_20111031_142944_outLine +BABEL_BP_101_29290_20111031_003657_inLine +BABEL_BP_101_29290_20111031_003657_outLine +BABEL_BP_101_29589_20111126_175320_inLine +BABEL_BP_101_29589_20111126_175320_outLine +BABEL_BP_101_36722_20111104_030316_inLine +BABEL_BP_101_36722_20111104_030316_outLine +BABEL_BP_101_37784_20111208_190128_inLine +BABEL_BP_101_37784_20111208_190128_outLine +BABEL_BP_101_39963_20111026_150832_inLine +BABEL_BP_101_39963_20111026_150832_outLine +BABEL_BP_101_41146_20111026_153646_inLine +BABEL_BP_101_41146_20111026_153646_outLine +BABEL_BP_101_41541_20111206_172913_inLine +BABEL_BP_101_41541_20111206_172913_outLine +BABEL_BP_101_43306_20111103_161140_inLine +BABEL_BP_101_43306_20111103_161140_outLine +BABEL_BP_101_49582_20111027_141449_inLine +BABEL_BP_101_49582_20111027_141449_outLine +BABEL_BP_101_50718_20111020_135643_inLine +BABEL_BP_101_50718_20111020_135643_outLine +BABEL_BP_101_50798_20111026_223324_inLine +BABEL_BP_101_50798_20111026_223324_outLine +BABEL_BP_101_50883_20111102_204642_inLine +BABEL_BP_101_50883_20111102_204642_outLine +BABEL_BP_101_52335_20111203_155425_inLine +BABEL_BP_101_52335_20111203_155425_outLine +BABEL_BP_101_53994_20111202_163841_inLine +BABEL_BP_101_53994_20111202_163841_outLine +BABEL_BP_101_54339_20111124_170134_inLine +BABEL_BP_101_54339_20111124_170134_outLine +BABEL_BP_101_54621_20111125_183536_inLine +BABEL_BP_101_54621_20111125_183536_outLine +BABEL_BP_101_57724_20111027_181810_inLine +BABEL_BP_101_57724_20111027_181810_outLine +BABEL_BP_101_59175_20111027_151958_inLine +BABEL_BP_101_59175_20111027_151958_outLine +BABEL_BP_101_60193_20111102_144921_inLine +BABEL_BP_101_60193_20111102_144921_outLine +BABEL_BP_101_63114_20111123_012206_inLine +BABEL_BP_101_63114_20111123_012206_outLine +BABEL_BP_101_64351_20111124_153905_inLine +BABEL_BP_101_64351_20111124_153905_outLine +BABEL_BP_101_67411_20111030_182522_inLine +BABEL_BP_101_67411_20111030_182522_outLine +BABEL_BP_101_67750_20111025_140818_inLine +BABEL_BP_101_67750_20111025_140818_outLine +BABEL_BP_101_70285_20111026_191056_inLine +BABEL_BP_101_70285_20111026_191056_outLine +BABEL_BP_101_70625_20111129_171555_inLine +BABEL_BP_101_70625_20111129_171555_outLine +BABEL_BP_101_76192_20111102_164411_inLine +BABEL_BP_101_76192_20111102_164411_outLine +BABEL_BP_101_77137_20111125_163632_inLine +BABEL_BP_101_77137_20111125_163632_outLine +BABEL_BP_101_77591_20111114_194820_inLine +BABEL_BP_101_77591_20111114_194820_outLine +BABEL_BP_101_80150_20111117_003728_inLine +BABEL_BP_101_80150_20111117_003728_outLine +BABEL_BP_101_81119_20111118_140013_inLine +BABEL_BP_101_81119_20111118_140013_outLine +BABEL_BP_101_81717_20111118_145402_inLine +BABEL_BP_101_81717_20111118_145402_outLine +BABEL_BP_101_83531_20111104_002551_inLine +BABEL_BP_101_83531_20111104_002551_outLine +BABEL_BP_101_85573_20111019_141646_inLine +BABEL_BP_101_85573_20111019_141646_outLine +BABEL_BP_101_87539_20111201_130219_inLine +BABEL_BP_101_87539_20111201_130219_outLine +BABEL_BP_101_87607_20111125_162304_inLine +BABEL_BP_101_87607_20111125_162304_outLine +BABEL_BP_101_90082_20111127_153333_inLine +BABEL_BP_101_90082_20111127_153333_outLine +BABEL_BP_101_90559_20111203_144741_inLine +BABEL_BP_101_90559_20111203_144741_outLine +BABEL_BP_101_91723_20111104_231255_inLine +BABEL_BP_101_91723_20111104_231255_outLine +BABEL_BP_101_92602_20111029_191642_inLine +BABEL_BP_101_92602_20111029_191642_outLine +BABEL_BP_101_94235_20111119_200950_inLine +BABEL_BP_101_94235_20111119_200950_outLine +BABEL_BP_101_95120_20111120_194049_inLine +BABEL_BP_101_95120_20111120_194049_outLine +BABEL_BP_101_95121_20111204_185315_inLine +BABEL_BP_101_95121_20111204_185315_outLine +BABEL_BP_101_95350_20111018_202556_inLine +BABEL_BP_101_95350_20111018_202556_outLine +BABEL_BP_101_95514_20111203_141811_inLine +BABEL_BP_101_95514_20111203_141811_outLine +BABEL_BP_101_95637_20111024_141608_inLine +BABEL_BP_101_95637_20111024_141608_outLine +BABEL_BP_101_95736_20111102_184136_inLine +BABEL_BP_101_95736_20111102_184136_outLine +BABEL_BP_101_97518_20111130_230103_inLine +BABEL_BP_101_97518_20111130_230103_outLine +BABEL_BP_101_98402_20111203_194645_inLine +BABEL_BP_101_98402_20111203_194645_outLine +BABEL_BP_101_98675_20111117_190458_inLine +BABEL_BP_101_98675_20111117_190458_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/eval.list b/egs/babel/s5d/conf/lists/101-cantonese/eval.list new file mode 100644 index 00000000000..d2301ae3d82 --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/eval.list @@ -0,0 +1,220 @@ +BABEL_BP_101_11267_20111202_163633_inLine +BABEL_BP_101_11267_20111202_163633_outLine +BABEL_BP_101_11311_20111017_201941_inLine +BABEL_BP_101_11311_20111017_201941_outLine +BABEL_BP_101_12535_20111203_130510_inLine +BABEL_BP_101_12535_20111203_130510_outLine +BABEL_BP_101_13065_20111118_192048_inLine +BABEL_BP_101_13065_20111118_192048_outLine +BABEL_BP_101_13476_20111121_181636_inLine +BABEL_BP_101_13476_20111121_181636_outLine +BABEL_BP_101_14707_20111122_145307_inLine +BABEL_BP_101_14836_20111124_161142_inLine +BABEL_BP_101_14836_20111124_161142_outLine +BABEL_BP_101_14836_20111124_162649_inLine +BABEL_BP_101_14836_20111124_162649_outLine +BABEL_BP_101_15146_20111017_171639_inLine +BABEL_BP_101_15146_20111017_171639_outLine +BABEL_BP_101_15859_20111129_022308_inLine +BABEL_BP_101_15859_20111129_022308_outLine +BABEL_BP_101_16299_20111029_221723_inLine +BABEL_BP_101_16299_20111029_221723_outLine +BABEL_BP_101_16646_20111116_212752_inLine +BABEL_BP_101_16646_20111116_212752_outLine +BABEL_BP_101_17900_20111025_234518_inLine +BABEL_BP_101_17900_20111025_234518_outLine +BABEL_BP_101_19063_20111117_154053_inLine +BABEL_BP_101_19619_20111027_130540_inLine +BABEL_BP_101_19619_20111027_130540_outLine +BABEL_BP_101_20347_20111115_190811_inLine +BABEL_BP_101_20347_20111115_190811_outLine +BABEL_BP_101_21050_20111127_140516_inLine +BABEL_BP_101_21052_20111117_134126_inLine +BABEL_BP_101_21052_20111117_134126_outLine +BABEL_BP_101_22351_20111117_141906_inLine +BABEL_BP_101_22351_20111117_141906_outLine +BABEL_BP_101_22351_20111117_142946_inLine +BABEL_BP_101_22351_20111117_142946_outLine +BABEL_BP_101_24589_20111122_200522_inLine +BABEL_BP_101_25106_20111103_002754_inLine +BABEL_BP_101_25106_20111103_002754_outLine +BABEL_BP_101_26598_20111117_165818_inLine +BABEL_BP_101_26598_20111117_165818_outLine +BABEL_BP_101_27724_20111128_203411_inLine +BABEL_BP_101_27724_20111128_203411_outLine +BABEL_BP_101_28990_20111120_210441_inLine +BABEL_BP_101_28990_20111120_210441_outLine +BABEL_BP_101_30642_20111116_150618_inLine +BABEL_BP_101_30642_20111116_150618_outLine +BABEL_BP_101_32011_20111201_004544_inLine +BABEL_BP_101_32011_20111201_004544_outLine +BABEL_BP_101_32045_20111104_024613_inLine +BABEL_BP_101_32045_20111104_024613_outLine +BABEL_BP_101_32132_20111119_185103_inLine +BABEL_BP_101_32132_20111119_185103_outLine +BABEL_BP_101_33540_20111027_144812_inLine +BABEL_BP_101_33540_20111027_144812_outLine +BABEL_BP_101_35074_20111203_144945_inLine +BABEL_BP_101_35074_20111203_144945_outLine +BABEL_BP_101_35612_20111110_210341_inLine +BABEL_BP_101_35612_20111110_210341_outLine +BABEL_BP_101_36143_20111029_193157_inLine +BABEL_BP_101_36143_20111029_193157_outLine +BABEL_BP_101_36155_20111120_144557_inLine +BABEL_BP_101_36155_20111120_144557_outLine +BABEL_BP_101_36155_20111120_150859_inLine +BABEL_BP_101_36155_20111120_150859_outLine +BABEL_BP_101_36868_20111117_210558_inLine +BABEL_BP_101_37348_20111130_223024_inLine +BABEL_BP_101_37348_20111130_223024_outLine +BABEL_BP_101_38635_20111120_180033_inLine +BABEL_BP_101_38635_20111120_180033_outLine +BABEL_BP_101_38640_20111028_200532_inLine +BABEL_BP_101_38640_20111028_200532_outLine +BABEL_BP_101_38640_20111028_202051_inLine +BABEL_BP_101_38640_20111028_202051_outLine +BABEL_BP_101_39114_20111128_134323_inLine +BABEL_BP_101_39114_20111128_134323_outLine +BABEL_BP_101_41797_20111117_181049_inLine +BABEL_BP_101_41797_20111117_181049_outLine +BABEL_BP_101_42768_20111115_173157_inLine +BABEL_BP_101_42768_20111115_173157_outLine +BABEL_BP_101_42853_20111014_121048_inLine +BABEL_BP_101_42853_20111014_121048_outLine +BABEL_BP_101_43317_20111115_183049_inLine +BABEL_BP_101_43317_20111115_183049_outLine +BABEL_BP_101_43991_20111121_191522_inLine +BABEL_BP_101_43991_20111121_191522_outLine +BABEL_BP_101_46409_20111103_190907_inLine +BABEL_BP_101_46409_20111103_190907_outLine +BABEL_BP_101_46464_20111119_154431_inLine +BABEL_BP_101_46464_20111119_154431_outLine +BABEL_BP_101_46521_20111027_144539_inLine +BABEL_BP_101_46521_20111027_144539_outLine +BABEL_BP_101_46950_20111102_231112_inLine +BABEL_BP_101_46950_20111102_231112_outLine +BABEL_BP_101_47185_20111116_191402_inLine +BABEL_BP_101_47185_20111116_191402_outLine +BABEL_BP_101_48536_20111028_200823_inLine +BABEL_BP_101_48536_20111028_200823_outLine +BABEL_BP_101_48645_20111121_182116_inLine +BABEL_BP_101_48645_20111121_182116_outLine +BABEL_BP_101_48645_20111121_183054_inLine +BABEL_BP_101_48645_20111121_183054_outLine +BABEL_BP_101_49042_20111030_233559_inLine +BABEL_BP_101_49042_20111030_233559_outLine +BABEL_BP_101_49173_20111128_203628_inLine +BABEL_BP_101_49173_20111128_203628_outLine +BABEL_BP_101_49173_20111128_204848_inLine +BABEL_BP_101_49173_20111128_204848_outLine +BABEL_BP_101_49239_20111122_153732_inLine +BABEL_BP_101_49239_20111122_153732_outLine +BABEL_BP_101_49552_20111114_230835_inLine +BABEL_BP_101_49552_20111114_230835_outLine +BABEL_BP_101_50555_20111120_155930_inLine +BABEL_BP_101_50555_20111120_155930_outLine +BABEL_BP_101_51042_20111204_200010_inLine +BABEL_BP_101_51042_20111204_200010_outLine +BABEL_BP_101_53278_20111122_170608_inLine +BABEL_BP_101_53463_20111120_193926_inLine +BABEL_BP_101_53463_20111120_193926_outLine +BABEL_BP_101_53982_20111122_191730_inLine +BABEL_BP_101_57422_20111122_180847_inLine +BABEL_BP_101_57551_20111019_214945_inLine +BABEL_BP_101_57551_20111019_214945_outLine +BABEL_BP_101_59169_20111122_141419_inLine +BABEL_BP_101_59671_20111027_145636_inLine +BABEL_BP_101_59671_20111027_145636_outLine +BABEL_BP_101_59891_20111124_143157_inLine +BABEL_BP_101_60064_20111203_191808_inLine +BABEL_BP_101_60064_20111203_191808_outLine +BABEL_BP_101_60277_20111126_194551_inLine +BABEL_BP_101_60277_20111126_194551_outLine +BABEL_BP_101_60277_20111126_200232_inLine +BABEL_BP_101_60277_20111126_200232_outLine +BABEL_BP_101_61203_20111030_130830_inLine +BABEL_BP_101_61203_20111030_130830_outLine +BABEL_BP_101_61906_20111117_202948_inLine +BABEL_BP_101_61988_20111028_001218_inLine +BABEL_BP_101_61988_20111028_001219_outLine +BABEL_BP_101_64946_20111201_195421_inLine +BABEL_BP_101_64946_20111201_195421_outLine +BABEL_BP_101_65601_20111103_222906_inLine +BABEL_BP_101_65601_20111103_222906_outLine +BABEL_BP_101_66709_20111119_145638_inLine +BABEL_BP_101_66709_20111119_145638_outLine +BABEL_BP_101_67304_20111129_183928_inLine +BABEL_BP_101_67304_20111129_183928_outLine +BABEL_BP_101_68861_20111030_183357_inLine +BABEL_BP_101_68861_20111030_183357_outLine +BABEL_BP_101_72647_20111128_150245_inLine +BABEL_BP_101_72746_20111121_191752_inLine +BABEL_BP_101_73782_20111126_201918_inLine +BABEL_BP_101_73782_20111126_201918_outLine +BABEL_BP_101_74295_20111121_152402_inLine +BABEL_BP_101_74295_20111121_152402_outLine +BABEL_BP_101_74607_20111022_195251_inLine +BABEL_BP_101_74607_20111022_195251_outLine +BABEL_BP_101_74986_20111116_153007_inLine +BABEL_BP_101_74986_20111116_153007_outLine +BABEL_BP_101_75151_20111203_163659_inLine +BABEL_BP_101_75151_20111203_163659_outLine +BABEL_BP_101_75799_20111122_163729_inLine +BABEL_BP_101_75932_20111111_151802_inLine +BABEL_BP_101_75932_20111111_151802_outLine +BABEL_BP_101_76451_20111026_184920_inLine +BABEL_BP_101_76451_20111026_184920_outLine +BABEL_BP_101_76451_20111026_190345_inLine +BABEL_BP_101_76451_20111026_190345_outLine +BABEL_BP_101_76763_20111017_191052_inLine +BABEL_BP_101_76763_20111017_191052_outLine +BABEL_BP_101_76925_20111103_205340_inLine +BABEL_BP_101_76925_20111103_205340_outLine +BABEL_BP_101_77465_20111120_175215_inLine +BABEL_BP_101_77465_20111120_175215_outLine +BABEL_BP_101_78046_20111125_134944_inLine +BABEL_BP_101_78046_20111125_134944_outLine +BABEL_BP_101_79619_20111119_194350_inLine +BABEL_BP_101_79619_20111119_194350_outLine +BABEL_BP_101_79860_20111102_155320_inLine +BABEL_BP_101_79860_20111102_155320_outLine +BABEL_BP_101_80874_20111125_172008_inLine +BABEL_BP_101_80874_20111125_172008_outLine +BABEL_BP_101_81053_20111114_221753_inLine +BABEL_BP_101_81053_20111114_221753_outLine +BABEL_BP_101_81261_20111104_210152_inLine +BABEL_BP_101_81261_20111104_210152_outLine +BABEL_BP_101_81261_20111104_211429_inLine +BABEL_BP_101_81261_20111104_211429_outLine +BABEL_BP_101_81583_20111022_221726_inLine +BABEL_BP_101_81583_20111022_221726_outLine +BABEL_BP_101_81642_20111124_172127_inLine +BABEL_BP_101_81642_20111124_172127_outLine +BABEL_BP_101_83053_20111118_151047_inLine +BABEL_BP_101_83053_20111118_151047_outLine +BABEL_BP_101_83700_20111121_152308_inLine +BABEL_BP_101_83700_20111121_152308_outLine +BABEL_BP_101_83713_20111104_193756_inLine +BABEL_BP_101_83713_20111104_193756_outLine +BABEL_BP_101_86014_20111120_171648_inLine +BABEL_BP_101_88982_20111126_152512_inLine +BABEL_BP_101_88982_20111126_152512_outLine +BABEL_BP_101_89301_20111128_210850_inLine +BABEL_BP_101_89301_20111128_210850_outLine +BABEL_BP_101_89993_20111125_174226_inLine +BABEL_BP_101_89993_20111125_174226_outLine +BABEL_BP_101_90817_20111118_004749_inLine +BABEL_BP_101_90817_20111118_004749_outLine +BABEL_BP_101_91677_20111122_233646_inLine +BABEL_BP_101_91677_20111122_233646_outLine +BABEL_BP_101_91703_20111116_145954_inLine +BABEL_BP_101_91703_20111116_145954_outLine +BABEL_BP_101_94162_20111118_160545_inLine +BABEL_BP_101_94162_20111118_160545_outLine +BABEL_BP_101_95861_20111028_214238_inLine +BABEL_BP_101_95861_20111028_214238_outLine +BABEL_BP_101_96108_20111122_132644_inLine +BABEL_BP_101_97254_20111117_145052_inLine +BABEL_BP_101_97254_20111117_145052_outLine +BABEL_BP_101_97486_20111104_200750_inLine +BABEL_BP_101_97486_20111104_200750_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list b/egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list new file mode 100644 index 00000000000..1980d99ef3e --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list @@ -0,0 +1,63 @@ +BABEL_BP_101_15859_20111129_022308_inLine +BABEL_BP_101_15859_20111129_022308_outLine +BABEL_BP_101_17900_20111025_234518_inLine +BABEL_BP_101_17900_20111025_234518_outLine +BABEL_BP_101_20347_20111115_190811_inLine +BABEL_BP_101_20347_20111115_190811_outLine +BABEL_BP_101_33540_20111027_144812_inLine +BABEL_BP_101_33540_20111027_144812_outLine +BABEL_BP_101_36143_20111029_193157_inLine +BABEL_BP_101_36143_20111029_193157_outLine +BABEL_BP_101_38635_20111120_180033_inLine +BABEL_BP_101_38635_20111120_180033_outLine +BABEL_BP_101_39114_20111128_134323_outLine +BABEL_BP_101_42768_20111115_173157_inLine +BABEL_BP_101_42768_20111115_173157_outLine +BABEL_BP_101_42853_20111014_121048_inLine +BABEL_BP_101_42853_20111014_121048_outLine +BABEL_BP_101_43317_20111115_183049_inLine +BABEL_BP_101_43317_20111115_183049_outLine +BABEL_BP_101_43991_20111121_191522_inLine +BABEL_BP_101_43991_20111121_191522_outLine +BABEL_BP_101_46464_20111119_154431_inLine +BABEL_BP_101_46464_20111119_154431_outLine +BABEL_BP_101_47185_20111116_191402_inLine +BABEL_BP_101_47185_20111116_191402_outLine +BABEL_BP_101_48536_20111028_200823_inLine +BABEL_BP_101_48536_20111028_200823_outLine +BABEL_BP_101_49552_20111114_230835_inLine +BABEL_BP_101_49552_20111114_230835_outLine +BABEL_BP_101_51042_20111204_200010_inLine +BABEL_BP_101_51042_20111204_200010_outLine +BABEL_BP_101_57551_20111019_214945_inLine +BABEL_BP_101_57551_20111019_214945_outLine +BABEL_BP_101_60064_20111203_191808_inLine +BABEL_BP_101_60064_20111203_191808_outLine +BABEL_BP_101_66709_20111119_145638_inLine +BABEL_BP_101_66709_20111119_145638_outLine +BABEL_BP_101_67304_20111129_183928_inLine +BABEL_BP_101_67304_20111129_183928_outLine +BABEL_BP_101_68861_20111030_183357_inLine +BABEL_BP_101_68861_20111030_183357_outLine +BABEL_BP_101_74295_20111121_152402_inLine +BABEL_BP_101_74295_20111121_152402_outLine +BABEL_BP_101_74607_20111022_195251_inLine +BABEL_BP_101_74607_20111022_195251_outLine +BABEL_BP_101_75151_20111203_163659_inLine +BABEL_BP_101_75151_20111203_163659_outLine +BABEL_BP_101_75932_20111111_151802_inLine +BABEL_BP_101_75932_20111111_151802_outLine +BABEL_BP_101_76451_20111026_184920_inLine +BABEL_BP_101_76451_20111026_184920_outLine +BABEL_BP_101_76451_20111026_190345_inLine +BABEL_BP_101_76451_20111026_190345_outLine +BABEL_BP_101_76763_20111017_191052_inLine +BABEL_BP_101_76763_20111017_191052_outLine +BABEL_BP_101_81642_20111124_172127_inLine +BABEL_BP_101_81642_20111124_172127_outLine +BABEL_BP_101_83053_20111118_151047_inLine +BABEL_BP_101_83053_20111118_151047_outLine +BABEL_BP_101_90817_20111118_004749_inLine +BABEL_BP_101_90817_20111118_004749_outLine +BABEL_BP_101_97486_20111104_200750_inLine +BABEL_BP_101_97486_20111104_200750_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list b/egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list new file mode 100644 index 00000000000..a7db2aa2a23 --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list @@ -0,0 +1,965 @@ +BABEL_BP_101_10033_20111024_205740_inLine +BABEL_BP_101_10033_20111024_205740_outLine +BABEL_BP_101_10066_20111120_165933_inLine +BABEL_BP_101_10066_20111120_165933_outLine +BABEL_BP_101_10160_20111017_201159_inLine +BABEL_BP_101_10160_20111017_201159_outLine +BABEL_BP_101_10211_20111026_234151_inLine +BABEL_BP_101_10211_20111026_234151_outLine +BABEL_BP_101_10900_20111029_155829_inLine +BABEL_BP_101_10900_20111029_155829_outLine +BABEL_BP_101_10925_20111025_152502_inLine +BABEL_BP_101_10925_20111025_152502_outLine +BABEL_BP_101_10945_20111030_173950_inLine +BABEL_BP_101_10945_20111030_173950_outLine +BABEL_BP_101_10973_20111019_183249_inLine +BABEL_BP_101_10973_20111019_183249_outLine +BABEL_BP_101_11031_20111024_203919_inLine +BABEL_BP_101_11031_20111024_203920_outLine +BABEL_BP_101_11036_20111019_192958_inLine +BABEL_BP_101_11036_20111019_192958_outLine +BABEL_BP_101_11371_20111018_183136_inLine +BABEL_BP_101_11371_20111018_183136_outLine +BABEL_BP_101_11422_20111019_145654_inLine +BABEL_BP_101_11422_20111019_145654_outLine +BABEL_BP_101_11479_20111021_205756_inLine +BABEL_BP_101_11479_20111021_205756_outLine +BABEL_BP_101_11690_20111206_171715_inLine +BABEL_BP_101_11690_20111206_171715_outLine +BABEL_BP_101_11694_20111204_205320_inLine +BABEL_BP_101_11694_20111204_205320_outLine +BABEL_BP_101_11827_20111025_190953_inLine +BABEL_BP_101_11827_20111025_190954_outLine +BABEL_BP_101_11868_20111203_180801_inLine +BABEL_BP_101_11868_20111203_180801_outLine +BABEL_BP_101_12003_20111116_132035_inLine +BABEL_BP_101_12003_20111116_132035_outLine +BABEL_BP_101_12552_20111115_153047_inLine +BABEL_BP_101_12552_20111115_153047_outLine +BABEL_BP_101_12631_20111020_140550_inLine +BABEL_BP_101_12631_20111020_140550_outLine +BABEL_BP_101_12807_20111207_142617_inLine +BABEL_BP_101_12807_20111207_142617_outLine +BABEL_BP_101_12897_20111115_165516_inLine +BABEL_BP_101_12897_20111115_165516_outLine +BABEL_BP_101_13229_20111127_140526_inLine +BABEL_BP_101_13229_20111127_140526_outLine +BABEL_BP_101_13272_20111027_193738_inLine +BABEL_BP_101_13272_20111027_193738_outLine +BABEL_BP_101_13530_20111203_184256_inLine +BABEL_BP_101_13530_20111203_184256_outLine +BABEL_BP_101_13781_20111125_145211_inLine +BABEL_BP_101_13781_20111125_145211_outLine +BABEL_BP_101_14054_20111119_163712_inLine +BABEL_BP_101_14054_20111119_163712_outLine +BABEL_BP_101_14294_20111103_134040_inLine +BABEL_BP_101_14294_20111103_134040_outLine +BABEL_BP_101_14500_20111114_202424_inLine +BABEL_BP_101_14500_20111114_202424_outLine +BABEL_BP_101_14666_20111122_125103_inLine +BABEL_BP_101_14666_20111122_125103_outLine +BABEL_BP_101_14729_20111114_200940_inLine +BABEL_BP_101_14729_20111114_200940_outLine +BABEL_BP_101_14769_20111121_155034_inLine +BABEL_BP_101_14769_20111121_155034_outLine +BABEL_BP_101_14891_20111018_130049_inLine +BABEL_BP_101_14891_20111018_130049_outLine +BABEL_BP_101_14915_20111119_165151_inLine +BABEL_BP_101_14915_20111119_165151_outLine +BABEL_BP_101_14936_20111026_202920_inLine +BABEL_BP_101_14936_20111026_202920_outLine +BABEL_BP_101_14997_20111126_152707_inLine +BABEL_BP_101_14997_20111126_152707_outLine +BABEL_BP_101_15142_20111029_163819_inLine +BABEL_BP_101_15142_20111029_163819_outLine +BABEL_BP_101_15460_20111121_223019_inLine +BABEL_BP_101_15460_20111121_223019_outLine +BABEL_BP_101_15473_20111031_131455_inLine +BABEL_BP_101_15473_20111031_131455_outLine +BABEL_BP_101_15696_20111022_193230_inLine +BABEL_BP_101_15696_20111022_193230_outLine +BABEL_BP_101_15873_20111027_121806_inLine +BABEL_BP_101_15873_20111027_121806_outLine +BABEL_BP_101_15881_20111024_141728_inLine +BABEL_BP_101_15881_20111024_141729_outLine +BABEL_BP_101_16066_20111020_145228_inLine +BABEL_BP_101_16066_20111020_145228_outLine +BABEL_BP_101_16266_20111027_153525_inLine +BABEL_BP_101_16266_20111027_153525_outLine +BABEL_BP_101_16313_20111022_221750_inLine +BABEL_BP_101_16313_20111022_221750_outLine +BABEL_BP_101_16406_20111103_000453_inLine +BABEL_BP_101_16406_20111103_000453_outLine +BABEL_BP_101_16617_20111030_144124_inLine +BABEL_BP_101_16617_20111030_144124_outLine +BABEL_BP_101_16660_20111020_211620_inLine +BABEL_BP_101_16660_20111020_211620_outLine +BABEL_BP_101_16669_20111019_142510_inLine +BABEL_BP_101_16669_20111019_142510_outLine +BABEL_BP_101_16883_20111122_184255_inLine +BABEL_BP_101_16883_20111122_184255_outLine +BABEL_BP_101_17013_20111117_011741_inLine +BABEL_BP_101_17013_20111117_011741_outLine +BABEL_BP_101_17018_20111020_161922_inLine +BABEL_BP_101_17018_20111020_161922_outLine +BABEL_BP_101_17080_20111020_184025_inLine +BABEL_BP_101_17080_20111020_184025_outLine +BABEL_BP_101_17093_20111124_155145_inLine +BABEL_BP_101_17093_20111124_155145_outLine +BABEL_BP_101_17203_20111026_142831_inLine +BABEL_BP_101_17203_20111026_142831_outLine +BABEL_BP_101_17203_20111026_145429_inLine +BABEL_BP_101_17203_20111026_145429_outLine +BABEL_BP_101_17572_20111116_155402_inLine +BABEL_BP_101_17572_20111116_155402_outLine +BABEL_BP_101_17606_20111130_231145_inLine +BABEL_BP_101_17606_20111130_231145_outLine +BABEL_BP_101_17933_20111120_204846_inLine +BABEL_BP_101_17933_20111120_204846_outLine +BABEL_BP_101_18701_20111121_171853_inLine +BABEL_BP_101_18701_20111121_171853_outLine +BABEL_BP_101_18950_20111127_144125_inLine +BABEL_BP_101_18950_20111127_144125_outLine +BABEL_BP_101_19012_20111122_173413_inLine +BABEL_BP_101_19012_20111122_173413_outLine +BABEL_BP_101_19147_20111021_174406_inLine +BABEL_BP_101_19147_20111021_174406_outLine +BABEL_BP_101_20320_20111027_210504_inLine +BABEL_BP_101_20320_20111027_210504_outLine +BABEL_BP_101_20408_20111101_210200_inLine +BABEL_BP_101_20408_20111101_210200_outLine +BABEL_BP_101_20518_20111119_174458_inLine +BABEL_BP_101_20518_20111119_174458_outLine +BABEL_BP_101_20582_20111023_162723_inLine +BABEL_BP_101_20582_20111023_162723_outLine +BABEL_BP_101_20590_20111017_172008_inLine +BABEL_BP_101_20590_20111017_172008_outLine +BABEL_BP_101_20685_20111019_125028_inLine +BABEL_BP_101_20685_20111019_125028_outLine +BABEL_BP_101_20740_20111125_195727_inLine +BABEL_BP_101_20740_20111125_195727_outLine +BABEL_BP_101_21367_20111126_132150_inLine +BABEL_BP_101_21367_20111126_132150_outLine +BABEL_BP_101_21430_20111027_145918_inLine +BABEL_BP_101_21430_20111027_145918_outLine +BABEL_BP_101_21477_20111031_155928_inLine +BABEL_BP_101_21477_20111031_155928_outLine +BABEL_BP_101_21584_20111030_210806_inLine +BABEL_BP_101_21584_20111030_210807_outLine +BABEL_BP_101_21929_20111025_182511_inLine +BABEL_BP_101_21929_20111025_182511_outLine +BABEL_BP_101_21946_20111122_150655_inLine +BABEL_BP_101_21946_20111122_150655_outLine +BABEL_BP_101_22898_20111022_141857_inLine +BABEL_BP_101_22898_20111022_141857_outLine +BABEL_BP_101_22903_20111116_132430_inLine +BABEL_BP_101_22903_20111116_132430_outLine +BABEL_BP_101_22910_20111028_190802_inLine +BABEL_BP_101_22910_20111028_190802_outLine +BABEL_BP_101_22979_20111129_142742_inLine +BABEL_BP_101_22979_20111129_142742_outLine +BABEL_BP_101_23167_20111026_194856_inLine +BABEL_BP_101_23167_20111026_194856_outLine +BABEL_BP_101_23168_20111120_192134_inLine +BABEL_BP_101_23168_20111120_192134_outLine +BABEL_BP_101_23571_20111128_232031_inLine +BABEL_BP_101_23571_20111128_232031_outLine +BABEL_BP_101_23719_20111103_143124_inLine +BABEL_BP_101_23719_20111103_143124_outLine +BABEL_BP_101_23930_20111125_132944_inLine +BABEL_BP_101_23930_20111125_132944_outLine +BABEL_BP_101_24420_20111122_215626_inLine +BABEL_BP_101_24420_20111122_215626_outLine +BABEL_BP_101_24608_20111019_191348_inLine +BABEL_BP_101_24608_20111019_191348_outLine +BABEL_BP_101_24642_20111129_132050_inLine +BABEL_BP_101_24642_20111129_132050_outLine +BABEL_BP_101_24661_20111207_131837_inLine +BABEL_BP_101_24661_20111207_131837_outLine +BABEL_BP_101_25021_20111018_200603_inLine +BABEL_BP_101_25021_20111018_200603_outLine +BABEL_BP_101_25035_20111028_135038_inLine +BABEL_BP_101_25035_20111028_135038_outLine +BABEL_BP_101_25236_20111129_194650_inLine +BABEL_BP_101_25236_20111129_194650_outLine +BABEL_BP_101_25278_20111125_162450_inLine +BABEL_BP_101_25278_20111125_162450_outLine +BABEL_BP_101_25576_20111022_203923_inLine +BABEL_BP_101_25576_20111022_203923_outLine +BABEL_BP_101_25934_20111014_130931_inLine +BABEL_BP_101_25934_20111014_130931_outLine +BABEL_BP_101_26017_20111030_202851_inLine +BABEL_BP_101_26017_20111030_202851_outLine +BABEL_BP_101_26350_20111019_203820_inLine +BABEL_BP_101_26350_20111019_203820_outLine +BABEL_BP_101_26684_20111119_145219_inLine +BABEL_BP_101_26684_20111119_145219_outLine +BABEL_BP_101_27064_20111019_132106_inLine +BABEL_BP_101_27064_20111019_132106_outLine +BABEL_BP_101_27178_20111025_174857_inLine +BABEL_BP_101_27178_20111025_174857_outLine +BABEL_BP_101_27427_20111021_132850_inLine +BABEL_BP_101_27427_20111021_132850_outLine +BABEL_BP_101_27503_20111021_175113_inLine +BABEL_BP_101_27503_20111021_175113_outLine +BABEL_BP_101_27619_20111102_201443_inLine +BABEL_BP_101_27619_20111102_201443_outLine +BABEL_BP_101_28107_20111019_140723_inLine +BABEL_BP_101_28107_20111019_140723_outLine +BABEL_BP_101_28132_20111023_133733_inLine +BABEL_BP_101_28132_20111023_133733_outLine +BABEL_BP_101_28161_20111024_180609_inLine +BABEL_BP_101_28161_20111024_180609_outLine +BABEL_BP_101_28204_20111025_133714_inLine +BABEL_BP_101_28204_20111025_133714_outLine +BABEL_BP_101_28260_20111021_184044_inLine +BABEL_BP_101_28260_20111021_184044_outLine +BABEL_BP_101_28675_20111118_185525_inLine +BABEL_BP_101_28675_20111118_185525_outLine +BABEL_BP_101_28740_20111028_214620_inLine +BABEL_BP_101_28740_20111028_214620_outLine +BABEL_BP_101_29097_20111018_135944_inLine +BABEL_BP_101_29097_20111018_135944_outLine +BABEL_BP_101_29133_20111024_182947_inLine +BABEL_BP_101_29133_20111024_182947_outLine +BABEL_BP_101_29302_20111023_172339_inLine +BABEL_BP_101_29302_20111023_172339_outLine +BABEL_BP_101_29328_20111019_133534_inLine +BABEL_BP_101_29328_20111019_133534_outLine +BABEL_BP_101_29335_20111121_164238_inLine +BABEL_BP_101_29335_20111121_164238_outLine +BABEL_BP_101_29444_20111024_213300_inLine +BABEL_BP_101_29444_20111024_213300_outLine +BABEL_BP_101_29959_20111116_201012_inLine +BABEL_BP_101_29959_20111116_201012_outLine +BABEL_BP_101_30168_20111118_132348_inLine +BABEL_BP_101_30168_20111118_132348_outLine +BABEL_BP_101_30530_20111024_153842_inLine +BABEL_BP_101_30530_20111024_153842_outLine +BABEL_BP_101_30722_20111208_204304_inLine +BABEL_BP_101_30722_20111208_204304_outLine +BABEL_BP_101_31265_20111207_131905_inLine +BABEL_BP_101_31265_20111207_131905_outLine +BABEL_BP_101_31393_20111018_154135_inLine +BABEL_BP_101_31393_20111018_154135_outLine +BABEL_BP_101_31441_20111026_004058_inLine +BABEL_BP_101_31441_20111026_004058_outLine +BABEL_BP_101_31451_20111024_213113_inLine +BABEL_BP_101_31451_20111024_213113_outLine +BABEL_BP_101_31460_20111019_144918_inLine +BABEL_BP_101_31460_20111019_144918_outLine +BABEL_BP_101_31917_20111124_151225_inLine +BABEL_BP_101_31917_20111124_151225_outLine +BABEL_BP_101_31980_20111025_130427_inLine +BABEL_BP_101_31980_20111025_130427_outLine +BABEL_BP_101_32274_20111024_160835_inLine +BABEL_BP_101_32274_20111024_160835_outLine +BABEL_BP_101_32295_20111111_144923_inLine +BABEL_BP_101_32295_20111111_144923_outLine +BABEL_BP_101_32452_20111022_135256_inLine +BABEL_BP_101_32452_20111022_135256_outLine +BABEL_BP_101_32710_20111119_133220_inLine +BABEL_BP_101_32710_20111119_133220_outLine +BABEL_BP_101_32890_20111130_220957_inLine +BABEL_BP_101_32890_20111130_220957_outLine +BABEL_BP_101_33023_20111024_133813_inLine +BABEL_BP_101_33023_20111024_133813_outLine +BABEL_BP_101_33671_20111019_130712_inLine +BABEL_BP_101_33671_20111019_130712_outLine +BABEL_BP_101_33742_20111118_231555_inLine +BABEL_BP_101_33742_20111118_231555_outLine +BABEL_BP_101_34194_20111024_173622_inLine +BABEL_BP_101_34194_20111024_173622_outLine +BABEL_BP_101_34446_20111019_005315_inLine +BABEL_BP_101_34446_20111019_005315_outLine +BABEL_BP_101_34930_20111024_143654_inLine +BABEL_BP_101_34930_20111024_143654_outLine +BABEL_BP_101_34961_20111027_175107_inLine +BABEL_BP_101_34961_20111027_175107_outLine +BABEL_BP_101_35006_20111120_181354_inLine +BABEL_BP_101_35006_20111120_181354_outLine +BABEL_BP_101_35016_20111203_203519_inLine +BABEL_BP_101_35016_20111203_203519_outLine +BABEL_BP_101_35179_20111124_131132_inLine +BABEL_BP_101_35179_20111124_131132_outLine +BABEL_BP_101_35357_20111203_170817_inLine +BABEL_BP_101_35357_20111203_170817_outLine +BABEL_BP_101_35391_20111130_144901_inLine +BABEL_BP_101_35391_20111130_144901_outLine +BABEL_BP_101_35576_20111118_131203_inLine +BABEL_BP_101_35576_20111118_131203_outLine +BABEL_BP_101_35932_20111023_151638_inLine +BABEL_BP_101_35932_20111023_151638_outLine +BABEL_BP_101_36268_20111028_174826_inLine +BABEL_BP_101_36268_20111028_174826_outLine +BABEL_BP_101_36383_20111129_181746_inLine +BABEL_BP_101_36383_20111129_181746_outLine +BABEL_BP_101_36424_20111119_145307_inLine +BABEL_BP_101_36424_20111119_145307_outLine +BABEL_BP_101_36502_20111025_145704_inLine +BABEL_BP_101_36502_20111025_145704_outLine +BABEL_BP_101_36711_20111104_142236_inLine +BABEL_BP_101_36711_20111104_142236_outLine +BABEL_BP_101_37094_20111019_184657_inLine +BABEL_BP_101_37094_20111019_184657_outLine +BABEL_BP_101_37110_20111019_203150_inLine +BABEL_BP_101_37110_20111019_203150_outLine +BABEL_BP_101_37203_20111103_180606_inLine +BABEL_BP_101_37203_20111103_180606_outLine +BABEL_BP_101_37210_20111102_172955_inLine +BABEL_BP_101_37210_20111102_172955_outLine +BABEL_BP_101_37258_20111110_203745_inLine +BABEL_BP_101_37258_20111110_203745_outLine +BABEL_BP_101_37285_20111028_003951_inLine +BABEL_BP_101_37285_20111028_003951_outLine +BABEL_BP_101_37461_20111022_210313_inLine +BABEL_BP_101_37461_20111022_210313_outLine +BABEL_BP_101_37766_20111130_012017_inLine +BABEL_BP_101_37766_20111130_012017_outLine +BABEL_BP_101_38108_20111125_153427_inLine +BABEL_BP_101_38108_20111125_153427_outLine +BABEL_BP_101_38698_20111025_183746_inLine +BABEL_BP_101_38698_20111025_183746_outLine +BABEL_BP_101_38879_20111029_193700_inLine +BABEL_BP_101_38879_20111029_193701_outLine +BABEL_BP_101_38912_20111120_214951_inLine +BABEL_BP_101_38912_20111120_214951_outLine +BABEL_BP_101_38956_20111025_175018_inLine +BABEL_BP_101_38956_20111025_175018_outLine +BABEL_BP_101_39080_20111124_182207_inLine +BABEL_BP_101_39080_20111124_182207_outLine +BABEL_BP_101_39140_20111026_125824_inLine +BABEL_BP_101_39140_20111026_125824_outLine +BABEL_BP_101_39246_20111119_185410_inLine +BABEL_BP_101_39246_20111119_185410_outLine +BABEL_BP_101_39287_20111119_192815_inLine +BABEL_BP_101_39287_20111119_192815_outLine +BABEL_BP_101_39317_20111020_162113_inLine +BABEL_BP_101_39317_20111020_162113_outLine +BABEL_BP_101_39756_20111207_162851_inLine +BABEL_BP_101_39756_20111207_162851_outLine +BABEL_BP_101_39809_20111025_182053_inLine +BABEL_BP_101_39809_20111025_182053_outLine +BABEL_BP_101_39915_20111101_164819_inLine +BABEL_BP_101_39915_20111101_164819_outLine +BABEL_BP_101_39997_20111124_152508_inLine +BABEL_BP_101_39997_20111124_152508_outLine +BABEL_BP_101_40046_20111018_185918_inLine +BABEL_BP_101_40046_20111018_185918_outLine +BABEL_BP_101_40123_20111129_182232_inLine +BABEL_BP_101_40123_20111129_182232_outLine +BABEL_BP_101_40346_20111018_165337_inLine +BABEL_BP_101_40346_20111018_165337_outLine +BABEL_BP_101_40439_20111203_182814_inLine +BABEL_BP_101_40439_20111203_182814_outLine +BABEL_BP_101_40510_20111126_151543_inLine +BABEL_BP_101_40510_20111126_151543_outLine +BABEL_BP_101_40980_20111119_150324_inLine +BABEL_BP_101_40980_20111119_150324_outLine +BABEL_BP_101_41170_20111018_182942_inLine +BABEL_BP_101_41170_20111018_182942_outLine +BABEL_BP_101_41456_20111117_162327_inLine +BABEL_BP_101_41456_20111117_162327_outLine +BABEL_BP_101_41513_20111121_142105_inLine +BABEL_BP_101_41513_20111121_142105_outLine +BABEL_BP_101_41661_20111102_131955_inLine +BABEL_BP_101_41661_20111102_131955_outLine +BABEL_BP_101_42145_20111117_131023_inLine +BABEL_BP_101_42145_20111117_131023_outLine +BABEL_BP_101_42266_20111031_233515_inLine +BABEL_BP_101_42266_20111031_233515_outLine +BABEL_BP_101_42615_20111018_173023_inLine +BABEL_BP_101_42615_20111018_173023_outLine +BABEL_BP_101_42766_20111124_150047_inLine +BABEL_BP_101_42766_20111124_150047_outLine +BABEL_BP_101_42788_20111120_201122_inLine +BABEL_BP_101_42788_20111120_201122_outLine +BABEL_BP_101_43086_20111025_160708_inLine +BABEL_BP_101_43086_20111025_160708_outLine +BABEL_BP_101_43383_20111019_135432_inLine +BABEL_BP_101_43383_20111019_135432_outLine +BABEL_BP_101_44129_20111118_210653_inLine +BABEL_BP_101_44129_20111118_210653_outLine +BABEL_BP_101_44209_20111120_131002_inLine +BABEL_BP_101_44209_20111120_131002_outLine +BABEL_BP_101_44403_20111023_151830_inLine +BABEL_BP_101_44403_20111023_151830_outLine +BABEL_BP_101_44403_20111023_152732_inLine +BABEL_BP_101_44403_20111023_152732_outLine +BABEL_BP_101_44535_20111021_153223_inLine +BABEL_BP_101_44535_20111021_153223_outLine +BABEL_BP_101_44836_20111119_154154_inLine +BABEL_BP_101_44836_20111119_154154_outLine +BABEL_BP_101_45361_20111124_141850_inLine +BABEL_BP_101_45361_20111124_141850_outLine +BABEL_BP_101_45472_20111020_171106_inLine +BABEL_BP_101_45472_20111020_171106_outLine +BABEL_BP_101_45511_20111025_204720_inLine +BABEL_BP_101_45511_20111025_204720_outLine +BABEL_BP_101_45642_20111027_171601_inLine +BABEL_BP_101_45642_20111027_171601_outLine +BABEL_BP_101_45678_20111020_155310_inLine +BABEL_BP_101_45678_20111020_155310_outLine +BABEL_BP_101_45702_20111130_133011_inLine +BABEL_BP_101_45702_20111130_133011_outLine +BABEL_BP_101_45738_20111129_143901_inLine +BABEL_BP_101_45738_20111129_143901_outLine +BABEL_BP_101_45931_20111021_190814_inLine +BABEL_BP_101_45931_20111021_190815_outLine +BABEL_BP_101_46243_20111020_204505_inLine +BABEL_BP_101_46243_20111020_204505_outLine +BABEL_BP_101_46332_20111103_181337_inLine +BABEL_BP_101_46332_20111103_181337_outLine +BABEL_BP_101_46603_20111128_205449_inLine +BABEL_BP_101_46603_20111128_205449_outLine +BABEL_BP_101_47128_20111027_143246_inLine +BABEL_BP_101_47128_20111027_143246_outLine +BABEL_BP_101_47634_20111026_134005_inLine +BABEL_BP_101_47634_20111026_134005_outLine +BABEL_BP_101_47646_20111126_015509_inLine +BABEL_BP_101_47646_20111126_015509_outLine +BABEL_BP_101_47661_20111028_183156_inLine +BABEL_BP_101_47661_20111028_183156_outLine +BABEL_BP_101_47794_20111204_021008_inLine +BABEL_BP_101_47794_20111204_021008_outLine +BABEL_BP_101_47823_20111129_204026_inLine +BABEL_BP_101_47823_20111129_204026_outLine +BABEL_BP_101_47906_20111119_130308_inLine +BABEL_BP_101_47906_20111119_130308_outLine +BABEL_BP_101_48053_20111020_130943_inLine +BABEL_BP_101_48053_20111020_130943_outLine +BABEL_BP_101_48188_20111117_210754_inLine +BABEL_BP_101_48188_20111117_210754_outLine +BABEL_BP_101_48410_20111021_230709_inLine +BABEL_BP_101_48410_20111021_230709_outLine +BABEL_BP_101_48418_20111203_171145_inLine +BABEL_BP_101_48418_20111203_171145_outLine +BABEL_BP_101_48511_20111026_133629_inLine +BABEL_BP_101_48511_20111026_133629_outLine +BABEL_BP_101_48559_20111118_125850_inLine +BABEL_BP_101_48559_20111118_125850_outLine +BABEL_BP_101_48733_20111117_140942_inLine +BABEL_BP_101_48733_20111117_140942_outLine +BABEL_BP_101_49306_20111130_170120_inLine +BABEL_BP_101_49306_20111130_170120_outLine +BABEL_BP_101_49452_20111027_171653_inLine +BABEL_BP_101_49452_20111027_171653_outLine +BABEL_BP_101_49541_20111104_192333_inLine +BABEL_BP_101_49541_20111104_192333_outLine +BABEL_BP_101_49624_20111120_201437_inLine +BABEL_BP_101_49624_20111120_201437_outLine +BABEL_BP_101_49689_20111125_174904_inLine +BABEL_BP_101_49689_20111125_174904_outLine +BABEL_BP_101_49773_20111021_195218_inLine +BABEL_BP_101_49773_20111021_195218_outLine +BABEL_BP_101_49864_20111023_192125_inLine +BABEL_BP_101_49864_20111023_192125_outLine +BABEL_BP_101_50101_20111019_173327_inLine +BABEL_BP_101_50101_20111019_173327_outLine +BABEL_BP_101_50201_20111026_154228_inLine +BABEL_BP_101_50201_20111026_154228_outLine +BABEL_BP_101_50409_20111204_161529_inLine +BABEL_BP_101_50409_20111204_161529_outLine +BABEL_BP_101_50416_20111129_170514_inLine +BABEL_BP_101_50416_20111129_170514_outLine +BABEL_BP_101_50476_20111130_010429_inLine +BABEL_BP_101_50476_20111130_010429_outLine +BABEL_BP_101_50589_20111025_190441_inLine +BABEL_BP_101_50589_20111025_190441_outLine +BABEL_BP_101_50842_20111030_171650_inLine +BABEL_BP_101_50842_20111030_171650_outLine +BABEL_BP_101_51052_20111121_175102_inLine +BABEL_BP_101_51052_20111121_175102_outLine +BABEL_BP_101_51117_20111025_175138_inLine +BABEL_BP_101_51117_20111025_175138_outLine +BABEL_BP_101_51374_20111020_152431_inLine +BABEL_BP_101_51374_20111020_152431_outLine +BABEL_BP_101_51446_20111127_145511_inLine +BABEL_BP_101_51446_20111127_145511_outLine +BABEL_BP_101_51569_20111019_201532_inLine +BABEL_BP_101_51569_20111019_201532_outLine +BABEL_BP_101_51727_20111117_003536_inLine +BABEL_BP_101_51727_20111117_003536_outLine +BABEL_BP_101_52366_20111018_140013_inLine +BABEL_BP_101_52366_20111018_140013_outLine +BABEL_BP_101_52642_20111129_221057_inLine +BABEL_BP_101_52642_20111129_221057_outLine +BABEL_BP_101_53181_20111025_171245_inLine +BABEL_BP_101_53181_20111025_171246_outLine +BABEL_BP_101_53464_20111020_132212_inLine +BABEL_BP_101_53464_20111020_132212_outLine +BABEL_BP_101_53544_20111205_190859_inLine +BABEL_BP_101_53544_20111205_190859_outLine +BABEL_BP_101_53703_20111026_123307_inLine +BABEL_BP_101_53703_20111026_123307_outLine +BABEL_BP_101_53824_20111115_174804_inLine +BABEL_BP_101_53824_20111115_174804_outLine +BABEL_BP_101_53985_20111027_134232_inLine +BABEL_BP_101_53985_20111027_134232_outLine +BABEL_BP_101_54315_20111018_150809_inLine +BABEL_BP_101_54315_20111018_150809_outLine +BABEL_BP_101_54787_20111027_003335_inLine +BABEL_BP_101_54787_20111027_003335_outLine +BABEL_BP_101_55369_20111022_150532_inLine +BABEL_BP_101_55369_20111022_150532_outLine +BABEL_BP_101_55786_20111023_175604_inLine +BABEL_BP_101_55786_20111023_175604_outLine +BABEL_BP_101_55786_20111023_181021_inLine +BABEL_BP_101_55786_20111023_181022_outLine +BABEL_BP_101_55944_20111124_180312_inLine +BABEL_BP_101_55944_20111124_180312_outLine +BABEL_BP_101_56070_20111030_192056_inLine +BABEL_BP_101_56070_20111030_192056_outLine +BABEL_BP_101_56117_20111120_230517_inLine +BABEL_BP_101_56117_20111120_230517_outLine +BABEL_BP_101_56648_20111126_183128_inLine +BABEL_BP_101_56648_20111126_183128_outLine +BABEL_BP_101_57457_20111104_004433_inLine +BABEL_BP_101_57457_20111104_004433_outLine +BABEL_BP_101_57629_20111018_150159_inLine +BABEL_BP_101_57629_20111018_150200_outLine +BABEL_BP_101_58137_20111121_200320_inLine +BABEL_BP_101_58137_20111121_200320_outLine +BABEL_BP_101_58190_20111124_203150_inLine +BABEL_BP_101_58190_20111124_203150_outLine +BABEL_BP_101_58357_20111122_155154_inLine +BABEL_BP_101_58357_20111122_155154_outLine +BABEL_BP_101_58536_20111103_202702_inLine +BABEL_BP_101_58536_20111103_202702_outLine +BABEL_BP_101_58715_20111025_173420_inLine +BABEL_BP_101_58715_20111025_173420_outLine +BABEL_BP_101_58863_20111029_204335_inLine +BABEL_BP_101_58863_20111029_204335_outLine +BABEL_BP_101_58923_20111021_133326_inLine +BABEL_BP_101_58923_20111021_133326_outLine +BABEL_BP_101_59028_20111130_201120_inLine +BABEL_BP_101_59028_20111130_201120_outLine +BABEL_BP_101_59032_20111130_125508_inLine +BABEL_BP_101_59032_20111130_125508_outLine +BABEL_BP_101_59454_20111117_203722_inLine +BABEL_BP_101_59454_20111117_203722_outLine +BABEL_BP_101_59544_20111027_165941_inLine +BABEL_BP_101_59544_20111027_165941_outLine +BABEL_BP_101_59868_20111021_213412_inLine +BABEL_BP_101_59868_20111021_213412_outLine +BABEL_BP_101_59925_20111203_131501_inLine +BABEL_BP_101_59925_20111203_131501_outLine +BABEL_BP_101_59961_20111031_203903_inLine +BABEL_BP_101_59961_20111031_203903_outLine +BABEL_BP_101_60106_20111024_194048_inLine +BABEL_BP_101_60106_20111024_194048_outLine +BABEL_BP_101_60110_20111102_211956_inLine +BABEL_BP_101_60110_20111102_211956_outLine +BABEL_BP_101_60183_20111129_192036_inLine +BABEL_BP_101_60183_20111129_192036_outLine +BABEL_BP_101_60605_20111124_131048_inLine +BABEL_BP_101_60605_20111124_131048_outLine +BABEL_BP_101_60826_20111120_164851_inLine +BABEL_BP_101_60826_20111120_164851_outLine +BABEL_BP_101_61073_20111102_190426_inLine +BABEL_BP_101_61073_20111102_190426_outLine +BABEL_BP_101_61119_20111125_210556_inLine +BABEL_BP_101_61119_20111125_210556_outLine +BABEL_BP_101_61408_20111204_193348_inLine +BABEL_BP_101_61408_20111204_193348_outLine +BABEL_BP_101_61446_20111019_151903_inLine +BABEL_BP_101_61446_20111019_151903_outLine +BABEL_BP_101_61449_20111117_151606_inLine +BABEL_BP_101_61449_20111117_151606_outLine +BABEL_BP_101_61762_20111028_180944_inLine +BABEL_BP_101_61762_20111028_180944_outLine +BABEL_BP_101_61822_20111022_202742_inLine +BABEL_BP_101_61822_20111022_202742_outLine +BABEL_BP_101_62177_20111019_143057_inLine +BABEL_BP_101_62177_20111019_143057_outLine +BABEL_BP_101_63339_20111019_193743_inLine +BABEL_BP_101_63339_20111019_193743_outLine +BABEL_BP_101_63459_20111120_165000_inLine +BABEL_BP_101_63459_20111120_165000_outLine +BABEL_BP_101_63711_20111025_215436_inLine +BABEL_BP_101_63711_20111025_215436_outLine +BABEL_BP_101_64205_20111203_135507_inLine +BABEL_BP_101_64205_20111203_135507_outLine +BABEL_BP_101_64404_20111018_165302_inLine +BABEL_BP_101_64404_20111018_165302_outLine +BABEL_BP_101_64889_20111124_220757_inLine +BABEL_BP_101_64889_20111124_220757_outLine +BABEL_BP_101_65606_20111116_140731_inLine +BABEL_BP_101_65606_20111116_140731_outLine +BABEL_BP_101_65743_20111019_161830_inLine +BABEL_BP_101_65743_20111019_161830_outLine +BABEL_BP_101_66188_20111206_204246_inLine +BABEL_BP_101_66188_20111206_204246_outLine +BABEL_BP_101_66798_20111026_145101_inLine +BABEL_BP_101_66798_20111026_145101_outLine +BABEL_BP_101_66839_20111120_192904_inLine +BABEL_BP_101_66839_20111120_192904_outLine +BABEL_BP_101_66866_20111128_183933_inLine +BABEL_BP_101_66866_20111128_183933_outLine +BABEL_BP_101_66903_20111021_130004_inLine +BABEL_BP_101_66903_20111021_130004_outLine +BABEL_BP_101_66964_20111117_173710_inLine +BABEL_BP_101_66964_20111117_173710_outLine +BABEL_BP_101_67555_20111023_140926_inLine +BABEL_BP_101_67555_20111023_140926_outLine +BABEL_BP_101_67798_20111104_013951_inLine +BABEL_BP_101_67798_20111104_013951_outLine +BABEL_BP_101_68129_20111120_133854_inLine +BABEL_BP_101_68129_20111120_133854_outLine +BABEL_BP_101_68295_20111124_181015_inLine +BABEL_BP_101_68295_20111124_181015_outLine +BABEL_BP_101_68545_20111121_132438_inLine +BABEL_BP_101_68545_20111121_132438_outLine +BABEL_BP_101_68706_20111025_141920_inLine +BABEL_BP_101_68706_20111025_141920_outLine +BABEL_BP_101_68767_20111029_174711_inLine +BABEL_BP_101_68767_20111029_174711_outLine +BABEL_BP_101_69049_20111102_140355_inLine +BABEL_BP_101_69049_20111102_140355_outLine +BABEL_BP_101_69137_20111121_162510_inLine +BABEL_BP_101_69137_20111121_162510_outLine +BABEL_BP_101_69236_20111029_183129_inLine +BABEL_BP_101_69236_20111029_183130_outLine +BABEL_BP_101_69295_20111130_003858_inLine +BABEL_BP_101_69295_20111130_003858_outLine +BABEL_BP_101_69368_20111020_193935_inLine +BABEL_BP_101_69368_20111020_193935_outLine +BABEL_BP_101_69548_20111024_183457_inLine +BABEL_BP_101_69548_20111024_192648_inLine +BABEL_BP_101_69548_20111024_192648_outLine +BABEL_BP_101_69650_20111025_220513_inLine +BABEL_BP_101_69650_20111025_220513_outLine +BABEL_BP_101_69764_20111026_211954_inLine +BABEL_BP_101_69764_20111026_211954_outLine +BABEL_BP_101_70511_20111119_201802_inLine +BABEL_BP_101_70511_20111119_201802_outLine +BABEL_BP_101_70548_20111127_144545_inLine +BABEL_BP_101_70548_20111127_144545_outLine +BABEL_BP_101_70615_20111019_192646_inLine +BABEL_BP_101_70615_20111019_192646_outLine +BABEL_BP_101_70680_20111018_151854_inLine +BABEL_BP_101_70680_20111018_151854_outLine +BABEL_BP_101_71741_20111026_164112_inLine +BABEL_BP_101_71741_20111026_164112_outLine +BABEL_BP_101_71778_20111121_153418_inLine +BABEL_BP_101_71778_20111121_153418_outLine +BABEL_BP_101_72330_20111021_174758_inLine +BABEL_BP_101_72330_20111021_174758_outLine +BABEL_BP_101_72718_20111129_164931_inLine +BABEL_BP_101_72718_20111129_164931_outLine +BABEL_BP_101_72879_20111018_234432_inLine +BABEL_BP_101_72879_20111018_234432_outLine +BABEL_BP_101_73170_20111023_213358_inLine +BABEL_BP_101_73170_20111023_213358_outLine +BABEL_BP_101_73542_20111019_212519_inLine +BABEL_BP_101_73542_20111019_212520_outLine +BABEL_BP_101_73761_20111115_130043_inLine +BABEL_BP_101_73761_20111115_130043_outLine +BABEL_BP_101_73786_20111019_133350_inLine +BABEL_BP_101_73786_20111019_133350_outLine +BABEL_BP_101_73911_20111026_220612_inLine +BABEL_BP_101_73911_20111026_220612_outLine +BABEL_BP_101_73923_20111017_171925_inLine +BABEL_BP_101_73923_20111017_171925_outLine +BABEL_BP_101_74234_20111102_161626_inLine +BABEL_BP_101_74234_20111102_161626_outLine +BABEL_BP_101_74317_20111115_154736_inLine +BABEL_BP_101_74317_20111115_154736_outLine +BABEL_BP_101_74395_20111117_135831_inLine +BABEL_BP_101_74395_20111117_135831_outLine +BABEL_BP_101_74404_20111020_190145_inLine +BABEL_BP_101_74404_20111020_190145_outLine +BABEL_BP_101_74451_20111117_164153_inLine +BABEL_BP_101_74451_20111117_164153_outLine +BABEL_BP_101_74823_20111024_162421_inLine +BABEL_BP_101_74823_20111024_162421_outLine +BABEL_BP_101_74884_20111030_143437_inLine +BABEL_BP_101_74884_20111030_143437_outLine +BABEL_BP_101_75020_20111020_153842_inLine +BABEL_BP_101_75020_20111020_153842_outLine +BABEL_BP_101_75243_20111204_182510_inLine +BABEL_BP_101_75243_20111204_182510_outLine +BABEL_BP_101_75815_20111029_172800_inLine +BABEL_BP_101_75815_20111029_172800_outLine +BABEL_BP_101_76341_20111027_132615_inLine +BABEL_BP_101_76341_20111027_132615_outLine +BABEL_BP_101_76585_20111025_150729_inLine +BABEL_BP_101_76585_20111025_150729_outLine +BABEL_BP_101_76661_20111024_190704_inLine +BABEL_BP_101_76661_20111024_190704_outLine +BABEL_BP_101_76661_20111024_194723_inLine +BABEL_BP_101_76661_20111024_194723_outLine +BABEL_BP_101_76733_20111128_133322_inLine +BABEL_BP_101_76733_20111128_133322_outLine +BABEL_BP_101_76919_20111120_202312_inLine +BABEL_BP_101_76919_20111120_202312_outLine +BABEL_BP_101_76944_20111114_210715_inLine +BABEL_BP_101_76944_20111114_210715_outLine +BABEL_BP_101_77238_20111024_161359_inLine +BABEL_BP_101_77238_20111024_161359_outLine +BABEL_BP_101_77244_20111127_021035_inLine +BABEL_BP_101_77244_20111127_021035_outLine +BABEL_BP_101_77348_20111018_171727_inLine +BABEL_BP_101_77348_20111018_171727_outLine +BABEL_BP_101_77802_20111125_191137_inLine +BABEL_BP_101_77802_20111125_191137_outLine +BABEL_BP_101_77886_20111026_163310_inLine +BABEL_BP_101_77886_20111026_163311_outLine +BABEL_BP_101_77965_20111019_133612_inLine +BABEL_BP_101_77965_20111019_133612_outLine +BABEL_BP_101_77965_20111019_134901_inLine +BABEL_BP_101_77965_20111019_134901_outLine +BABEL_BP_101_78094_20111026_132018_inLine +BABEL_BP_101_78094_20111026_132018_outLine +BABEL_BP_101_78514_20111026_204851_inLine +BABEL_BP_101_78514_20111026_204851_outLine +BABEL_BP_101_78879_20111102_133430_inLine +BABEL_BP_101_78879_20111102_133430_outLine +BABEL_BP_101_79412_20111026_010314_inLine +BABEL_BP_101_79412_20111026_010314_outLine +BABEL_BP_101_79495_20111017_200437_inLine +BABEL_BP_101_79495_20111017_200438_outLine +BABEL_BP_101_80008_20111031_180815_inLine +BABEL_BP_101_80008_20111031_180815_outLine +BABEL_BP_101_80156_20111024_145349_inLine +BABEL_BP_101_80156_20111024_145349_outLine +BABEL_BP_101_80535_20111206_193024_inLine +BABEL_BP_101_80535_20111206_193024_outLine +BABEL_BP_101_80786_20111030_205240_inLine +BABEL_BP_101_80786_20111030_205240_outLine +BABEL_BP_101_80817_20111125_182621_inLine +BABEL_BP_101_80817_20111125_182621_outLine +BABEL_BP_101_80953_20111122_205857_inLine +BABEL_BP_101_80953_20111122_205857_outLine +BABEL_BP_101_81056_20111130_220634_inLine +BABEL_BP_101_81056_20111130_220634_outLine +BABEL_BP_101_81308_20111021_143922_inLine +BABEL_BP_101_81308_20111021_143922_outLine +BABEL_BP_101_81321_20111028_124244_inLine +BABEL_BP_101_81321_20111028_124244_outLine +BABEL_BP_101_81486_20111027_163851_inLine +BABEL_BP_101_81486_20111027_163851_outLine +BABEL_BP_101_82023_20111024_151938_inLine +BABEL_BP_101_82023_20111024_151938_outLine +BABEL_BP_101_82025_20111024_170514_inLine +BABEL_BP_101_82025_20111024_170514_outLine +BABEL_BP_101_82217_20111115_191956_inLine +BABEL_BP_101_82217_20111115_191956_outLine +BABEL_BP_101_82484_20111103_172542_inLine +BABEL_BP_101_82484_20111103_172542_outLine +BABEL_BP_101_82591_20111030_152731_inLine +BABEL_BP_101_82591_20111030_152731_outLine +BABEL_BP_101_82766_20111026_195127_inLine +BABEL_BP_101_82766_20111026_195127_outLine +BABEL_BP_101_82881_20111025_194316_inLine +BABEL_BP_101_82881_20111025_194316_outLine +BABEL_BP_101_83362_20111018_185746_inLine +BABEL_BP_101_83362_20111018_185746_outLine +BABEL_BP_101_83791_20111017_205314_inLine +BABEL_BP_101_83791_20111017_205314_outLine +BABEL_BP_101_84042_20111129_190132_inLine +BABEL_BP_101_84042_20111129_190132_outLine +BABEL_BP_101_84088_20111020_184621_inLine +BABEL_BP_101_84088_20111020_184621_outLine +BABEL_BP_101_84335_20111116_205244_inLine +BABEL_BP_101_84335_20111116_205244_outLine +BABEL_BP_101_84540_20111102_204218_inLine +BABEL_BP_101_84540_20111102_204218_outLine +BABEL_BP_101_84543_20111124_200551_inLine +BABEL_BP_101_84543_20111124_200551_outLine +BABEL_BP_101_84943_20111020_144955_inLine +BABEL_BP_101_84943_20111020_144955_outLine +BABEL_BP_101_85083_20111123_195138_inLine +BABEL_BP_101_85083_20111123_195138_outLine +BABEL_BP_101_85533_20111029_135232_inLine +BABEL_BP_101_85533_20111029_135232_outLine +BABEL_BP_101_85617_20111126_195610_inLine +BABEL_BP_101_85617_20111126_195610_outLine +BABEL_BP_101_85883_20111126_183750_inLine +BABEL_BP_101_85883_20111126_183750_outLine +BABEL_BP_101_85948_20111020_171625_inLine +BABEL_BP_101_85948_20111020_171626_outLine +BABEL_BP_101_86016_20111118_140325_inLine +BABEL_BP_101_86016_20111118_140326_outLine +BABEL_BP_101_86029_20111027_190831_inLine +BABEL_BP_101_86029_20111027_190831_outLine +BABEL_BP_101_86227_20111020_213628_inLine +BABEL_BP_101_86227_20111020_213628_outLine +BABEL_BP_101_86258_20111128_161415_inLine +BABEL_BP_101_86258_20111128_161415_outLine +BABEL_BP_101_86419_20111019_211829_inLine +BABEL_BP_101_86419_20111019_211829_outLine +BABEL_BP_101_86752_20111206_182753_inLine +BABEL_BP_101_86752_20111206_182753_outLine +BABEL_BP_101_86900_20111029_140540_inLine +BABEL_BP_101_86900_20111029_140540_outLine +BABEL_BP_101_87107_20111028_193807_inLine +BABEL_BP_101_87107_20111028_193807_outLine +BABEL_BP_101_87351_20111021_224242_inLine +BABEL_BP_101_87351_20111021_224242_outLine +BABEL_BP_101_87481_20111129_131455_inLine +BABEL_BP_101_87481_20111129_131455_outLine +BABEL_BP_101_87564_20111130_175930_inLine +BABEL_BP_101_87564_20111130_175930_outLine +BABEL_BP_101_87634_20111019_151449_inLine +BABEL_BP_101_87634_20111019_151449_outLine +BABEL_BP_101_87634_20111019_152457_inLine +BABEL_BP_101_87634_20111019_152457_outLine +BABEL_BP_101_88243_20111024_193201_inLine +BABEL_BP_101_88243_20111024_193201_outLine +BABEL_BP_101_88294_20111026_023525_inLine +BABEL_BP_101_88294_20111026_023525_outLine +BABEL_BP_101_88464_20111119_194433_inLine +BABEL_BP_101_88464_20111119_194433_outLine +BABEL_BP_101_88506_20111115_203514_inLine +BABEL_BP_101_88506_20111115_203514_outLine +BABEL_BP_101_88929_20111118_201818_inLine +BABEL_BP_101_88929_20111118_201818_outLine +BABEL_BP_101_89345_20111021_155741_inLine +BABEL_BP_101_89345_20111021_155741_outLine +BABEL_BP_101_89573_20111025_201747_inLine +BABEL_BP_101_89573_20111025_201747_outLine +BABEL_BP_101_89619_20111029_215743_inLine +BABEL_BP_101_89619_20111029_215743_outLine +BABEL_BP_101_89630_20111125_193140_inLine +BABEL_BP_101_89630_20111125_193140_outLine +BABEL_BP_101_89674_20111025_190234_inLine +BABEL_BP_101_89674_20111025_190234_outLine +BABEL_BP_101_89818_20111019_181821_inLine +BABEL_BP_101_89818_20111019_181821_outLine +BABEL_BP_101_89965_20111129_175314_inLine +BABEL_BP_101_89965_20111129_175314_outLine +BABEL_BP_101_90313_20111019_155232_inLine +BABEL_BP_101_90313_20111019_155232_outLine +BABEL_BP_101_90393_20111103_165919_inLine +BABEL_BP_101_90393_20111103_165919_outLine +BABEL_BP_101_90490_20111017_192604_inLine +BABEL_BP_101_90490_20111017_192604_outLine +BABEL_BP_101_90506_20111026_182007_inLine +BABEL_BP_101_90506_20111026_182007_outLine +BABEL_BP_101_90511_20111024_132449_inLine +BABEL_BP_101_90511_20111024_132449_outLine +BABEL_BP_101_90577_20111014_144604_inLine +BABEL_BP_101_90577_20111014_144605_outLine +BABEL_BP_101_90730_20111025_154632_inLine +BABEL_BP_101_90730_20111025_154632_outLine +BABEL_BP_101_90819_20111126_173557_inLine +BABEL_BP_101_90819_20111126_173557_outLine +BABEL_BP_101_90890_20111018_143525_inLine +BABEL_BP_101_90890_20111018_143526_outLine +BABEL_BP_101_90960_20111024_132656_outLine +BABEL_BP_101_91007_20111203_160119_inLine +BABEL_BP_101_91007_20111203_160119_outLine +BABEL_BP_101_91358_20111207_171552_inLine +BABEL_BP_101_91358_20111207_171552_outLine +BABEL_BP_101_91401_20111028_174554_inLine +BABEL_BP_101_91401_20111028_174554_outLine +BABEL_BP_101_91406_20111114_221433_inLine +BABEL_BP_101_91406_20111114_221433_outLine +BABEL_BP_101_91409_20111023_181828_inLine +BABEL_BP_101_91409_20111023_181828_outLine +BABEL_BP_101_91481_20111124_213929_inLine +BABEL_BP_101_91481_20111124_213929_outLine +BABEL_BP_101_91491_20111021_160657_outLine +BABEL_BP_101_91668_20111127_133044_inLine +BABEL_BP_101_91668_20111127_133044_outLine +BABEL_BP_101_91777_20111025_195108_outLine +BABEL_BP_101_91873_20111129_214832_inLine +BABEL_BP_101_91873_20111129_214832_outLine +BABEL_BP_101_91905_20111120_211325_inLine +BABEL_BP_101_91905_20111120_211325_outLine +BABEL_BP_101_91979_20111019_195336_inLine +BABEL_BP_101_91979_20111019_195336_outLine +BABEL_BP_101_92321_20111125_152246_inLine +BABEL_BP_101_92321_20111125_152246_outLine +BABEL_BP_101_92436_20111024_214516_inLine +BABEL_BP_101_92436_20111024_214516_outLine +BABEL_BP_101_92560_20111025_142040_outLine +BABEL_BP_101_92628_20111102_131604_inLine +BABEL_BP_101_92628_20111102_131604_outLine +BABEL_BP_101_92642_20111025_170509_inLine +BABEL_BP_101_92642_20111025_170509_outLine +BABEL_BP_101_92735_20111024_171657_inLine +BABEL_BP_101_92735_20111024_171658_outLine +BABEL_BP_101_92800_20111030_222032_inLine +BABEL_BP_101_92800_20111030_222032_outLine +BABEL_BP_101_93004_20111121_190213_inLine +BABEL_BP_101_93004_20111121_190213_outLine +BABEL_BP_101_93091_20111022_191333_inLine +BABEL_BP_101_93091_20111022_191333_outLine +BABEL_BP_101_93151_20111023_184643_inLine +BABEL_BP_101_93151_20111023_184644_outLine +BABEL_BP_101_93192_20111020_130226_inLine +BABEL_BP_101_93192_20111020_130226_outLine +BABEL_BP_101_93454_20111027_014223_inLine +BABEL_BP_101_93454_20111027_014223_outLine +BABEL_BP_101_93597_20111020_195543_outLine +BABEL_BP_101_93643_20111021_154435_inLine +BABEL_BP_101_93643_20111021_154435_outLine +BABEL_BP_101_94149_20111027_125107_inLine +BABEL_BP_101_94149_20111027_125107_outLine +BABEL_BP_101_94222_20111021_144043_outLine +BABEL_BP_101_94223_20111026_220859_inLine +BABEL_BP_101_94223_20111026_220859_outLine +BABEL_BP_101_94226_20111125_140433_inLine +BABEL_BP_101_94226_20111125_140433_outLine +BABEL_BP_101_94514_20111127_130706_inLine +BABEL_BP_101_94514_20111127_130706_outLine +BABEL_BP_101_94696_20111203_191827_inLine +BABEL_BP_101_94696_20111203_191827_outLine +BABEL_BP_101_94989_20111028_152522_inLine +BABEL_BP_101_94989_20111028_152522_outLine +BABEL_BP_101_95034_20111126_193931_inLine +BABEL_BP_101_95034_20111126_193931_outLine +BABEL_BP_101_95423_20111116_164510_inLine +BABEL_BP_101_95423_20111116_164510_outLine +BABEL_BP_101_95533_20111129_183735_inLine +BABEL_BP_101_95533_20111129_183735_outLine +BABEL_BP_101_95542_20111026_190957_inLine +BABEL_BP_101_95542_20111026_190957_outLine +BABEL_BP_101_95589_20111118_214545_inLine +BABEL_BP_101_95589_20111118_214545_outLine +BABEL_BP_101_95650_20111019_144529_inLine +BABEL_BP_101_95650_20111019_144529_outLine +BABEL_BP_101_95815_20111024_155626_inLine +BABEL_BP_101_95815_20111024_155626_outLine +BABEL_BP_101_96216_20111021_181529_inLine +BABEL_BP_101_96216_20111021_181529_outLine +BABEL_BP_101_96283_20111115_154603_inLine +BABEL_BP_101_96283_20111115_154603_outLine +BABEL_BP_101_96322_20111031_190734_inLine +BABEL_BP_101_96322_20111031_190734_outLine +BABEL_BP_101_96347_20111024_201758_inLine +BABEL_BP_101_96347_20111024_201758_outLine +BABEL_BP_101_96438_20111125_195114_inLine +BABEL_BP_101_96438_20111125_195114_outLine +BABEL_BP_101_96630_20111104_005203_inLine +BABEL_BP_101_96630_20111104_005203_outLine +BABEL_BP_101_97274_20111023_151720_inLine +BABEL_BP_101_97274_20111023_151720_outLine +BABEL_BP_101_97405_20111019_151334_inLine +BABEL_BP_101_97405_20111019_151334_outLine +BABEL_BP_101_97629_20111130_000852_inLine +BABEL_BP_101_97629_20111130_000852_outLine +BABEL_BP_101_97650_20111126_144341_inLine +BABEL_BP_101_97650_20111126_144341_outLine +BABEL_BP_101_98086_20111129_161539_inLine +BABEL_BP_101_98086_20111129_161539_outLine +BABEL_BP_101_98099_20111120_130108_inLine +BABEL_BP_101_98099_20111120_130108_outLine +BABEL_BP_101_98219_20111125_155849_inLine +BABEL_BP_101_98219_20111125_155849_outLine +BABEL_BP_101_98279_20111122_195453_inLine +BABEL_BP_101_98279_20111122_195453_outLine +BABEL_BP_101_98345_20111020_205712_outLine +BABEL_BP_101_98467_20111020_152253_inLine +BABEL_BP_101_98467_20111020_152253_outLine +BABEL_BP_101_98476_20111114_220758_inLine +BABEL_BP_101_98476_20111114_220758_outLine +BABEL_BP_101_99061_20111020_183348_outLine +BABEL_BP_101_99562_20111205_235804_inLine +BABEL_BP_101_99562_20111205_235804_outLine +BABEL_BP_101_99571_20111024_164204_inLine +BABEL_BP_101_99571_20111024_164204_outLine +BABEL_BP_101_99856_20111125_184505_inLine +BABEL_BP_101_99856_20111125_184505_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list b/egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list new file mode 100644 index 00000000000..84f6e984f4b --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list @@ -0,0 +1,120 @@ +BABEL_BP_101_11694_20111204_205320_inLine +BABEL_BP_101_11694_20111204_205320_outLine +BABEL_BP_101_14054_20111119_163712_inLine +BABEL_BP_101_14054_20111119_163712_outLine +BABEL_BP_101_14729_20111114_200940_inLine +BABEL_BP_101_14729_20111114_200940_outLine +BABEL_BP_101_15873_20111027_121806_inLine +BABEL_BP_101_15873_20111027_121806_outLine +BABEL_BP_101_16617_20111030_144124_inLine +BABEL_BP_101_16617_20111030_144124_outLine +BABEL_BP_101_16883_20111122_184255_inLine +BABEL_BP_101_16883_20111122_184255_outLine +BABEL_BP_101_17933_20111120_204846_inLine +BABEL_BP_101_17933_20111120_204846_outLine +BABEL_BP_101_19012_20111122_173413_inLine +BABEL_BP_101_19012_20111122_173413_outLine +BABEL_BP_101_20408_20111101_210200_inLine +BABEL_BP_101_20408_20111101_210200_outLine +BABEL_BP_101_21367_20111126_132150_inLine +BABEL_BP_101_21367_20111126_132150_outLine +BABEL_BP_101_21946_20111122_150655_inLine +BABEL_BP_101_21946_20111122_150655_outLine +BABEL_BP_101_22979_20111129_142742_inLine +BABEL_BP_101_22979_20111129_142742_outLine +BABEL_BP_101_23168_20111120_192134_inLine +BABEL_BP_101_23168_20111120_192134_outLine +BABEL_BP_101_23571_20111128_232031_inLine +BABEL_BP_101_23571_20111128_232031_outLine +BABEL_BP_101_28204_20111025_133714_inLine +BABEL_BP_101_28204_20111025_133714_outLine +BABEL_BP_101_36424_20111119_145307_inLine +BABEL_BP_101_36424_20111119_145307_outLine +BABEL_BP_101_37285_20111028_003951_inLine +BABEL_BP_101_37285_20111028_003951_outLine +BABEL_BP_101_38108_20111125_153427_inLine +BABEL_BP_101_38108_20111125_153427_outLine +BABEL_BP_101_38879_20111029_193700_inLine +BABEL_BP_101_38879_20111029_193701_outLine +BABEL_BP_101_40123_20111129_182232_inLine +BABEL_BP_101_40123_20111129_182232_outLine +BABEL_BP_101_40439_20111203_182814_inLine +BABEL_BP_101_40439_20111203_182814_outLine +BABEL_BP_101_42145_20111117_131023_inLine +BABEL_BP_101_42145_20111117_131023_outLine +BABEL_BP_101_44836_20111119_154154_inLine +BABEL_BP_101_44836_20111119_154154_outLine +BABEL_BP_101_46332_20111103_181337_inLine +BABEL_BP_101_46332_20111103_181337_outLine +BABEL_BP_101_50409_20111204_161529_inLine +BABEL_BP_101_50409_20111204_161529_outLine +BABEL_BP_101_50476_20111130_010429_inLine +BABEL_BP_101_50476_20111130_010429_outLine +BABEL_BP_101_53985_20111027_134232_inLine +BABEL_BP_101_53985_20111027_134232_outLine +BABEL_BP_101_54787_20111027_003335_inLine +BABEL_BP_101_54787_20111027_003335_outLine +BABEL_BP_101_56648_20111126_183128_inLine +BABEL_BP_101_56648_20111126_183128_outLine +BABEL_BP_101_58190_20111124_203150_inLine +BABEL_BP_101_58190_20111124_203150_outLine +BABEL_BP_101_58357_20111122_155154_inLine +BABEL_BP_101_58357_20111122_155154_outLine +BABEL_BP_101_59028_20111130_201120_inLine +BABEL_BP_101_59028_20111130_201120_outLine +BABEL_BP_101_59925_20111203_131501_inLine +BABEL_BP_101_59925_20111203_131501_outLine +BABEL_BP_101_63459_20111120_165000_inLine +BABEL_BP_101_63459_20111120_165000_outLine +BABEL_BP_101_66839_20111120_192904_inLine +BABEL_BP_101_66839_20111120_192904_outLine +BABEL_BP_101_66964_20111117_173710_inLine +BABEL_BP_101_66964_20111117_173710_outLine +BABEL_BP_101_67798_20111104_013951_inLine +BABEL_BP_101_67798_20111104_013951_outLine +BABEL_BP_101_68129_20111120_133854_inLine +BABEL_BP_101_68129_20111120_133854_outLine +BABEL_BP_101_69049_20111102_140355_inLine +BABEL_BP_101_69049_20111102_140355_outLine +BABEL_BP_101_74395_20111117_135831_inLine +BABEL_BP_101_74395_20111117_135831_outLine +BABEL_BP_101_76944_20111114_210715_inLine +BABEL_BP_101_76944_20111114_210715_outLine +BABEL_BP_101_77244_20111127_021035_inLine +BABEL_BP_101_77244_20111127_021035_outLine +BABEL_BP_101_78879_20111102_133430_inLine +BABEL_BP_101_78879_20111102_133430_outLine +BABEL_BP_101_80008_20111031_180815_inLine +BABEL_BP_101_80008_20111031_180815_outLine +BABEL_BP_101_80535_20111206_193024_inLine +BABEL_BP_101_80535_20111206_193024_outLine +BABEL_BP_101_81486_20111027_163851_inLine +BABEL_BP_101_81486_20111027_163851_outLine +BABEL_BP_101_82217_20111115_191956_inLine +BABEL_BP_101_82217_20111115_191956_outLine +BABEL_BP_101_86016_20111118_140325_inLine +BABEL_BP_101_86016_20111118_140326_outLine +BABEL_BP_101_88464_20111119_194433_inLine +BABEL_BP_101_88464_20111119_194433_outLine +BABEL_BP_101_91358_20111207_171552_inLine +BABEL_BP_101_91358_20111207_171552_outLine +BABEL_BP_101_91406_20111114_221433_inLine +BABEL_BP_101_91406_20111114_221433_outLine +BABEL_BP_101_92321_20111125_152246_inLine +BABEL_BP_101_92321_20111125_152246_outLine +BABEL_BP_101_92628_20111102_131604_inLine +BABEL_BP_101_92628_20111102_131604_outLine +BABEL_BP_101_94696_20111203_191827_inLine +BABEL_BP_101_94696_20111203_191827_outLine +BABEL_BP_101_94989_20111028_152522_inLine +BABEL_BP_101_94989_20111028_152522_outLine +BABEL_BP_101_95542_20111026_190957_inLine +BABEL_BP_101_95542_20111026_190957_outLine +BABEL_BP_101_96438_20111125_195114_inLine +BABEL_BP_101_96438_20111125_195114_outLine +BABEL_BP_101_96630_20111104_005203_inLine +BABEL_BP_101_96630_20111104_005203_outLine +BABEL_BP_101_98086_20111129_161539_inLine +BABEL_BP_101_98086_20111129_161539_outLine +BABEL_BP_101_98219_20111125_155849_inLine +BABEL_BP_101_98219_20111125_155849_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/dev.list b/egs/babel/s5d/conf/lists/102-assamese/dev.list new file mode 100644 index 00000000000..044d46cc85a --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/dev.list @@ -0,0 +1,126 @@ +BABEL_OP1_102_10408_20121105_223454_inLine +BABEL_OP1_102_10408_20121105_223454_outLine +BABEL_OP1_102_10925_20120329_192327_inLine +BABEL_OP1_102_10925_20120329_192327_outLine +BABEL_OP1_102_13450_20120421_200138_inLine +BABEL_OP1_102_13450_20120421_200138_outLine +BABEL_OP1_102_13879_20121112_220931_inLine +BABEL_OP1_102_13879_20121112_220931_outLine +BABEL_OP1_102_17900_20120331_195842_inLine +BABEL_OP1_102_17900_20120331_195842_outLine +BABEL_OP1_102_18672_20120614_212320_inLine +BABEL_OP1_102_18672_20120614_212320_outLine +BABEL_OP1_102_20518_20120618_155945_inLine +BABEL_OP1_102_20518_20120618_155945_outLine +BABEL_OP1_102_21370_20120410_231048_inLine +BABEL_OP1_102_21370_20120410_231048_outLine +BABEL_OP1_102_25502_20120404_190523_inLine +BABEL_OP1_102_25502_20120404_190523_outLine +BABEL_OP1_102_27178_20120409_211226_inLine +BABEL_OP1_102_27178_20120409_211226_outLine +BABEL_OP1_102_27698_20120328_165641_inLine +BABEL_OP1_102_27698_20120328_165641_outLine +BABEL_OP1_102_29988_20120805_160211_inLine +BABEL_OP1_102_29988_20120805_160211_outLine +BABEL_OP1_102_31345_20121010_194432_inLine +BABEL_OP1_102_31345_20121010_194432_outLine +BABEL_OP1_102_31345_20121010_195905_inLine +BABEL_OP1_102_31345_20121010_195905_outLine +BABEL_OP1_102_32962_20120427_215011_inLine +BABEL_OP1_102_32962_20120427_215011_outLine +BABEL_OP1_102_33704_20130204_172729_inLine +BABEL_OP1_102_33704_20130204_172729_outLine +BABEL_OP1_102_33969_20130123_165132_inLine +BABEL_OP1_102_33969_20130123_165132_outLine +BABEL_OP1_102_34446_20120426_194557_inLine +BABEL_OP1_102_34446_20120426_194557_outLine +BABEL_OP1_102_34446_20120426_195519_inLine +BABEL_OP1_102_34446_20120426_195519_outLine +BABEL_OP1_102_35470_20130122_212719_inLine +BABEL_OP1_102_35470_20130122_212719_outLine +BABEL_OP1_102_36391_20130127_213459_inLine +BABEL_OP1_102_36391_20130127_213459_outLine +BABEL_OP1_102_38879_20120410_224941_inLine +BABEL_OP1_102_38879_20120410_224941_outLine +BABEL_OP1_102_40385_20121224_164959_inLine +BABEL_OP1_102_40385_20121224_164959_outLine +BABEL_OP1_102_41989_20120410_220614_inLine +BABEL_OP1_102_41989_20120410_220614_outLine +BABEL_OP1_102_43587_20120607_204145_inLine +BABEL_OP1_102_43587_20120607_204145_outLine +BABEL_OP1_102_45106_20120318_191747_inLine +BABEL_OP1_102_45106_20120318_191747_outLine +BABEL_OP1_102_45678_20120328_224850_inLine +BABEL_OP1_102_45678_20120328_224850_outLine +BABEL_OP1_102_45786_20121016_025157_inLine +BABEL_OP1_102_45786_20121016_025157_outLine +BABEL_OP1_102_46593_20121010_023019_inLine +BABEL_OP1_102_46593_20121010_023019_outLine +BABEL_OP1_102_47429_20130121_172000_inLine +BABEL_OP1_102_47429_20130121_172000_outLine +BABEL_OP1_102_47469_20120411_181423_inLine +BABEL_OP1_102_47469_20120411_181423_outLine +BABEL_OP1_102_48812_20120420_004425_inLine +BABEL_OP1_102_48812_20120420_004425_outLine +BABEL_OP1_102_49351_20121214_224227_inLine +BABEL_OP1_102_49351_20121214_224227_outLine +BABEL_OP1_102_50589_20120401_163239_inLine +BABEL_OP1_102_50589_20120401_163239_outLine +BABEL_OP1_102_53179_20121009_225324_inLine +BABEL_OP1_102_53179_20121009_225324_outLine +BABEL_OP1_102_54358_20120908_182858_inLine +BABEL_OP1_102_54358_20120908_182858_outLine +BABEL_OP1_102_54785_20120928_184426_inLine +BABEL_OP1_102_54785_20120928_184426_outLine +BABEL_OP1_102_55355_20121007_010642_inLine +BABEL_OP1_102_55355_20121007_010642_outLine +BABEL_OP1_102_56868_20120410_224604_inLine +BABEL_OP1_102_56868_20120410_224604_outLine +BABEL_OP1_102_59544_20120401_222134_inLine +BABEL_OP1_102_59544_20120401_222134_outLine +BABEL_OP1_102_59746_20120820_014637_inLine +BABEL_OP1_102_59746_20120820_014637_outLine +BABEL_OP1_102_62160_20120422_220826_inLine +BABEL_OP1_102_62160_20120422_220826_outLine +BABEL_OP1_102_64661_20120422_194219_inLine +BABEL_OP1_102_64661_20120422_194219_outLine +BABEL_OP1_102_64880_20121107_190955_inLine +BABEL_OP1_102_64880_20121107_190955_outLine +BABEL_OP1_102_66103_20121006_184826_inLine +BABEL_OP1_102_66103_20121006_184826_outLine +BABEL_OP1_102_66668_20120409_185702_inLine +BABEL_OP1_102_66668_20120409_185702_outLine +BABEL_OP1_102_68706_20120412_221059_inLine +BABEL_OP1_102_68706_20120412_221100_outLine +BABEL_OP1_102_69052_20120506_162432_inLine +BABEL_OP1_102_69052_20120506_162432_outLine +BABEL_OP1_102_70643_20121108_030513_inLine +BABEL_OP1_102_70643_20121108_030513_outLine +BABEL_OP1_102_73122_20120427_225442_inLine +BABEL_OP1_102_73122_20120427_225442_outLine +BABEL_OP1_102_73122_20120427_230538_inLine +BABEL_OP1_102_73122_20120427_230538_outLine +BABEL_OP1_102_77886_20120407_215452_inLine +BABEL_OP1_102_77886_20120407_215452_outLine +BABEL_OP1_102_79519_20121008_214502_inLine +BABEL_OP1_102_79519_20121008_214502_outLine +BABEL_OP1_102_80856_20120423_184225_inLine +BABEL_OP1_102_80856_20120423_184225_outLine +BABEL_OP1_102_84042_20120806_194540_inLine +BABEL_OP1_102_84042_20120806_194540_outLine +BABEL_OP1_102_84532_20121222_152400_inLine +BABEL_OP1_102_84532_20121222_152400_outLine +BABEL_OP1_102_84700_20130104_162152_inLine +BABEL_OP1_102_84700_20130104_162152_outLine +BABEL_OP1_102_86305_20120408_170901_inLine +BABEL_OP1_102_86305_20120408_170901_outLine +BABEL_OP1_102_87671_20120401_172420_inLine +BABEL_OP1_102_87671_20120401_172420_outLine +BABEL_OP1_102_87885_20121113_193407_inLine +BABEL_OP1_102_87885_20121113_193407_outLine +BABEL_OP1_102_88245_20121010_173153_inLine +BABEL_OP1_102_88245_20121010_173153_outLine +BABEL_OP1_102_88464_20120612_191239_inLine +BABEL_OP1_102_88464_20120612_191239_outLine +BABEL_OP1_102_90313_20120407_173340_inLine +BABEL_OP1_102_90313_20120407_173340_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/eval.list b/egs/babel/s5d/conf/lists/102-assamese/eval.list new file mode 100644 index 00000000000..f9c825384ea --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/eval.list @@ -0,0 +1,189 @@ +BABEL_OP1_102_10033_20120330_194952_inLine +BABEL_OP1_102_10033_20120330_194952_outLine +BABEL_OP1_102_11824_20120425_195521_inLine +BABEL_OP1_102_11824_20120425_195521_outLine +BABEL_OP1_102_13635_20121106_201907_inLine +BABEL_OP1_102_13635_20121106_201907_outLine +BABEL_OP1_102_14075_20120729_184929_inLine +BABEL_OP1_102_14075_20120729_184929_outLine +BABEL_OP1_102_16875_20121224_191839_inLine +BABEL_OP1_102_16875_20121224_191839_outLine +BABEL_OP1_102_16984_20120817_222744_inLine +BABEL_OP1_102_16984_20120817_222744_outLine +BABEL_OP1_102_18648_20121220_162525_inLine +BABEL_OP1_102_18648_20121220_162525_outLine +BABEL_OP1_102_18858_20120328_182337_inLine +BABEL_OP1_102_18858_20120328_182337_outLine +BABEL_OP1_102_19479_20130126_224027_inLine +BABEL_OP1_102_19479_20130126_224027_outLine +BABEL_OP1_102_20483_20120427_223135_inLine +BABEL_OP1_102_20483_20120427_223135_outLine +BABEL_OP1_102_20685_20120327_193652_inLine +BABEL_OP1_102_20685_20120327_193652_outLine +BABEL_OP1_102_22566_20121106_194723_inLine +BABEL_OP1_102_22566_20121106_194723_outLine +BABEL_OP1_102_24379_20120928_162955_inLine +BABEL_OP1_102_24379_20120928_162955_outLine +BABEL_OP1_102_27363_20121106_193315_inLine +BABEL_OP1_102_27363_20121106_193315_outLine +BABEL_OP1_102_27645_20121001_010501_inLine +BABEL_OP1_102_27645_20121001_010501_outLine +BABEL_OP1_102_28754_20130128_193759_inLine +BABEL_OP1_102_28754_20130128_193759_outLine +BABEL_OP1_102_28754_20130128_194940_inLine +BABEL_OP1_102_28754_20130128_194940_outLine +BABEL_OP1_102_28768_20121219_231954_inLine +BABEL_OP1_102_28768_20121219_231954_outLine +BABEL_OP1_102_29268_20120410_182212_inLine +BABEL_OP1_102_29268_20120410_182212_outLine +BABEL_OP1_102_29290_20120408_172044_inLine +BABEL_OP1_102_29290_20120408_172044_outLine +BABEL_OP1_102_30210_20121104_182918_outLine +BABEL_OP1_102_32452_20120427_181559_inLine +BABEL_OP1_102_32452_20120427_181559_outLine +BABEL_OP1_102_32452_20120427_183038_inLine +BABEL_OP1_102_32452_20120427_183038_outLine +BABEL_OP1_102_32890_20121114_200236_inLine +BABEL_OP1_102_32890_20121114_200236_outLine +BABEL_OP1_102_34439_20121106_033220_inLine +BABEL_OP1_102_34439_20121106_033220_outLine +BABEL_OP1_102_39915_20130126_231519_inLine +BABEL_OP1_102_39915_20130126_231519_outLine +BABEL_OP1_102_41590_20121114_173839_inLine +BABEL_OP1_102_41590_20121114_173839_outLine +BABEL_OP1_102_42212_20121108_215733_inLine +BABEL_OP1_102_42212_20121108_215733_outLine +BABEL_OP1_102_42768_20120719_001335_inLine +BABEL_OP1_102_42768_20120719_001335_outLine +BABEL_OP1_102_44369_20121104_184516_inLine +BABEL_OP1_102_44369_20121104_184516_outLine +BABEL_OP1_102_44827_20130127_025842_inLine +BABEL_OP1_102_44827_20130127_025842_outLine +BABEL_OP1_102_45472_20120328_164753_inLine +BABEL_OP1_102_45472_20120328_164753_outLine +BABEL_OP1_102_45570_20120716_014312_inLine +BABEL_OP1_102_45570_20120716_014312_outLine +BABEL_OP1_102_46409_20130127_020220_inLine +BABEL_OP1_102_46409_20130127_020220_outLine +BABEL_OP1_102_46427_20120623_181054_inLine +BABEL_OP1_102_46427_20120623_181054_outLine +BABEL_OP1_102_46813_20120722_222747_inLine +BABEL_OP1_102_46813_20120722_222747_outLine +BABEL_OP1_102_46950_20130128_024910_inLine +BABEL_OP1_102_46950_20130128_024910_outLine +BABEL_OP1_102_47249_20121110_184344_inLine +BABEL_OP1_102_47249_20121110_184344_outLine +BABEL_OP1_102_48072_20120405_174716_inLine +BABEL_OP1_102_48072_20120405_174716_outLine +BABEL_OP1_102_48188_20121114_175337_inLine +BABEL_OP1_102_48188_20121114_175337_outLine +BABEL_OP1_102_48191_20121222_233713_inLine +BABEL_OP1_102_48191_20121222_233713_outLine +BABEL_OP1_102_48404_20121223_171643_inLine +BABEL_OP1_102_48404_20121223_171643_outLine +BABEL_OP1_102_49020_20121114_165007_inLine +BABEL_OP1_102_49020_20121114_165007_outLine +BABEL_OP1_102_49306_20120807_210522_inLine +BABEL_OP1_102_49306_20120807_210522_outLine +BABEL_OP1_102_49476_20120623_191532_inLine +BABEL_OP1_102_49476_20120623_191532_outLine +BABEL_OP1_102_50915_20130127_185334_inLine +BABEL_OP1_102_50915_20130127_185334_outLine +BABEL_OP1_102_51374_20120328_232452_inLine +BABEL_OP1_102_51374_20120328_232452_outLine +BABEL_OP1_102_51791_20120729_183323_inLine +BABEL_OP1_102_51791_20120729_183323_outLine +BABEL_OP1_102_53866_20120401_203758_inLine +BABEL_OP1_102_53866_20120401_203758_outLine +BABEL_OP1_102_54315_20120420_202214_inLine +BABEL_OP1_102_54315_20120420_202214_outLine +BABEL_OP1_102_55144_20120418_220307_inLine +BABEL_OP1_102_55144_20120418_220307_outLine +BABEL_OP1_102_55369_20120331_183350_inLine +BABEL_OP1_102_55369_20120331_183350_outLine +BABEL_OP1_102_55369_20120331_184706_inLine +BABEL_OP1_102_55369_20120331_184706_outLine +BABEL_OP1_102_55678_20120411_170804_inLine +BABEL_OP1_102_55678_20120411_170804_outLine +BABEL_OP1_102_57071_20120806_181947_inLine +BABEL_OP1_102_57071_20120806_181947_outLine +BABEL_OP1_102_57551_20120423_192651_inLine +BABEL_OP1_102_57551_20120423_192651_outLine +BABEL_OP1_102_57609_20121003_192352_inLine +BABEL_OP1_102_57609_20121003_192352_outLine +BABEL_OP1_102_57625_20121002_011432_inLine +BABEL_OP1_102_57625_20121002_011432_outLine +BABEL_OP1_102_57637_20130127_030012_inLine +BABEL_OP1_102_57637_20130127_030012_outLine +BABEL_OP1_102_59147_20120329_204323_inLine +BABEL_OP1_102_59147_20120329_204323_outLine +BABEL_OP1_102_65783_20130127_014613_inLine +BABEL_OP1_102_65783_20130127_014613_outLine +BABEL_OP1_102_66798_20120401_215538_inLine +BABEL_OP1_102_66798_20120401_215538_outLine +BABEL_OP1_102_67555_20120401_162516_inLine +BABEL_OP1_102_67555_20120401_162516_outLine +BABEL_OP1_102_68028_20121014_031021_inLine +BABEL_OP1_102_68028_20121014_031021_outLine +BABEL_OP1_102_68136_20130127_022217_inLine +BABEL_OP1_102_68136_20130127_022217_outLine +BABEL_OP1_102_69473_20121104_215944_inLine +BABEL_OP1_102_69473_20121104_215944_outLine +BABEL_OP1_102_70906_20121104_210914_inLine +BABEL_OP1_102_70906_20121104_210914_outLine +BABEL_OP1_102_70975_20130126_220855_inLine +BABEL_OP1_102_70975_20130126_220855_outLine +BABEL_OP1_102_73205_20120409_210950_inLine +BABEL_OP1_102_73205_20120409_210950_outLine +BABEL_OP1_102_74062_20121225_190622_inLine +BABEL_OP1_102_74062_20121225_190622_outLine +BABEL_OP1_102_74607_20120425_221930_inLine +BABEL_OP1_102_74607_20120425_221930_outLine +BABEL_OP1_102_75020_20120328_234502_inLine +BABEL_OP1_102_75020_20120328_234502_outLine +BABEL_OP1_102_76333_20130127_032712_inLine +BABEL_OP1_102_76333_20130127_032712_outLine +BABEL_OP1_102_76372_20121112_041800_inLine +BABEL_OP1_102_76372_20121112_041800_outLine +BABEL_OP1_102_76763_20120330_231328_inLine +BABEL_OP1_102_76763_20120330_231328_outLine +BABEL_OP1_102_76878_20121112_041639_inLine +BABEL_OP1_102_76878_20121112_041639_outLine +BABEL_OP1_102_76925_20130127_021046_inLine +BABEL_OP1_102_76925_20130127_021046_outLine +BABEL_OP1_102_77584_20121114_173809_inLine +BABEL_OP1_102_77584_20121114_173809_outLine +BABEL_OP1_102_77965_20120327_195119_inLine +BABEL_OP1_102_77965_20120327_195119_outLine +BABEL_OP1_102_78245_20120421_181224_inLine +BABEL_OP1_102_78245_20120421_181224_outLine +BABEL_OP1_102_78728_20120430_194848_inLine +BABEL_OP1_102_78728_20120430_194848_outLine +BABEL_OP1_102_81944_20121112_011411_inLine +BABEL_OP1_102_81944_20121112_011411_outLine +BABEL_OP1_102_83053_20130209_201738_inLine +BABEL_OP1_102_83053_20130209_201738_outLine +BABEL_OP1_102_83053_20130209_224536_inLine +BABEL_OP1_102_83053_20130209_224536_outLine +BABEL_OP1_102_83362_20120419_230220_inLine +BABEL_OP1_102_83362_20120419_230220_outLine +BABEL_OP1_102_83585_20120428_191954_inLine +BABEL_OP1_102_83585_20120428_191954_outLine +BABEL_OP1_102_86014_20120607_010221_inLine +BABEL_OP1_102_86014_20120607_010221_outLine +BABEL_OP1_102_88385_20121226_173154_inLine +BABEL_OP1_102_88385_20121226_173154_outLine +BABEL_OP1_102_88932_20120428_164025_inLine +BABEL_OP1_102_88932_20120428_164025_outLine +BABEL_OP1_102_89301_20120927_001102_inLine +BABEL_OP1_102_89301_20120927_001102_outLine +BABEL_OP1_102_91660_20130123_181342_inLine +BABEL_OP1_102_91660_20130123_181342_outLine +BABEL_OP1_102_93000_20120426_203959_inLine +BABEL_OP1_102_93000_20120426_203959_outLine +BABEL_OP1_102_93454_20120331_220854_inLine +BABEL_OP1_102_93454_20120331_220854_outLine +BABEL_OP1_102_95572_20130128_023142_inLine +BABEL_OP1_102_95572_20130128_023142_outLine +BABEL_OP1_102_95952_20121111_182203_inLine +BABEL_OP1_102_95952_20121111_182203_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/evalpart1.list b/egs/babel/s5d/conf/lists/102-assamese/evalpart1.list new file mode 100644 index 00000000000..b6a7ec78017 --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/evalpart1.list @@ -0,0 +1,65 @@ +BABEL_OP1_102_11824_20120425_195521_inLine +BABEL_OP1_102_11824_20120425_195521_outLine +BABEL_OP1_102_16984_20120817_222744_inLine +BABEL_OP1_102_16984_20120817_222744_outLine +BABEL_OP1_102_18858_20120328_182337_inLine +BABEL_OP1_102_18858_20120328_182337_outLine +BABEL_OP1_102_20685_20120327_193652_inLine +BABEL_OP1_102_20685_20120327_193652_outLine +BABEL_OP1_102_22566_20121106_194723_inLine +BABEL_OP1_102_22566_20121106_194723_outLine +BABEL_OP1_102_24379_20120928_162955_inLine +BABEL_OP1_102_24379_20120928_162955_outLine +BABEL_OP1_102_27645_20121001_010501_inLine +BABEL_OP1_102_27645_20121001_010501_outLine +BABEL_OP1_102_28754_20130128_193759_inLine +BABEL_OP1_102_28754_20130128_193759_outLine +BABEL_OP1_102_28754_20130128_194940_inLine +BABEL_OP1_102_28754_20130128_194940_outLine +BABEL_OP1_102_28768_20121219_231954_inLine +BABEL_OP1_102_28768_20121219_231954_outLine +BABEL_OP1_102_29268_20120410_182212_inLine +BABEL_OP1_102_29268_20120410_182212_outLine +BABEL_OP1_102_30210_20121104_182918_outLine +BABEL_OP1_102_42768_20120719_001335_inLine +BABEL_OP1_102_42768_20120719_001335_outLine +BABEL_OP1_102_45570_20120716_014312_inLine +BABEL_OP1_102_45570_20120716_014312_outLine +BABEL_OP1_102_46427_20120623_181054_inLine +BABEL_OP1_102_46427_20120623_181054_outLine +BABEL_OP1_102_46813_20120722_222747_inLine +BABEL_OP1_102_46813_20120722_222747_outLine +BABEL_OP1_102_47249_20121110_184344_inLine +BABEL_OP1_102_47249_20121110_184344_outLine +BABEL_OP1_102_49476_20120623_191532_inLine +BABEL_OP1_102_49476_20120623_191532_outLine +BABEL_OP1_102_51791_20120729_183323_inLine +BABEL_OP1_102_51791_20120729_183323_outLine +BABEL_OP1_102_57551_20120423_192651_inLine +BABEL_OP1_102_57551_20120423_192651_outLine +BABEL_OP1_102_57625_20121002_011432_inLine +BABEL_OP1_102_57625_20121002_011432_outLine +BABEL_OP1_102_66798_20120401_215538_inLine +BABEL_OP1_102_66798_20120401_215538_outLine +BABEL_OP1_102_70906_20121104_210914_inLine +BABEL_OP1_102_70906_20121104_210914_outLine +BABEL_OP1_102_73205_20120409_210950_inLine +BABEL_OP1_102_73205_20120409_210950_outLine +BABEL_OP1_102_74062_20121225_190622_inLine +BABEL_OP1_102_74062_20121225_190622_outLine +BABEL_OP1_102_78245_20120421_181224_inLine +BABEL_OP1_102_78245_20120421_181224_outLine +BABEL_OP1_102_81944_20121112_011411_inLine +BABEL_OP1_102_81944_20121112_011411_outLine +BABEL_OP1_102_83053_20130209_201738_inLine +BABEL_OP1_102_83053_20130209_201738_outLine +BABEL_OP1_102_83053_20130209_224536_inLine +BABEL_OP1_102_83053_20130209_224536_outLine +BABEL_OP1_102_83362_20120419_230220_inLine +BABEL_OP1_102_83362_20120419_230220_outLine +BABEL_OP1_102_83585_20120428_191954_inLine +BABEL_OP1_102_83585_20120428_191954_outLine +BABEL_OP1_102_93000_20120426_203959_inLine +BABEL_OP1_102_93000_20120426_203959_outLine +BABEL_OP1_102_93454_20120331_220854_inLine +BABEL_OP1_102_93454_20120331_220854_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list b/egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list new file mode 100644 index 00000000000..4e388dab16c --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list @@ -0,0 +1,790 @@ +BABEL_OP1_102_10187_20120405_173448_inLine +BABEL_OP1_102_10187_20120405_173448_outLine +BABEL_OP1_102_10271_20120729_173749_inLine +BABEL_OP1_102_10271_20120729_173749_outLine +BABEL_OP1_102_10713_20120401_204236_inLine +BABEL_OP1_102_10713_20120401_204236_outLine +BABEL_OP1_102_11004_20120420_213442_inLine +BABEL_OP1_102_11004_20120420_213442_outLine +BABEL_OP1_102_11031_20120926_231829_inLine +BABEL_OP1_102_11031_20120926_231829_outLine +BABEL_OP1_102_11036_20120406_202335_inLine +BABEL_OP1_102_11036_20120406_202335_outLine +BABEL_OP1_102_11158_20121008_011850_inLine +BABEL_OP1_102_11158_20121008_011850_outLine +BABEL_OP1_102_11371_20120327_175933_inLine +BABEL_OP1_102_11371_20120327_175933_outLine +BABEL_OP1_102_11521_20121005_005530_inLine +BABEL_OP1_102_11521_20121005_005530_outLine +BABEL_OP1_102_11694_20121108_184639_inLine +BABEL_OP1_102_11694_20121108_184639_outLine +BABEL_OP1_102_12120_20121105_205527_inLine +BABEL_OP1_102_12120_20121105_205527_outLine +BABEL_OP1_102_12486_20121009_231421_inLine +BABEL_OP1_102_12486_20121009_231421_outLine +BABEL_OP1_102_12535_20121009_024245_inLine +BABEL_OP1_102_12535_20121009_024245_outLine +BABEL_OP1_102_12552_20120727_023454_inLine +BABEL_OP1_102_12552_20120727_023454_outLine +BABEL_OP1_102_12643_20121108_184648_inLine +BABEL_OP1_102_12643_20121108_184648_outLine +BABEL_OP1_102_12655_20120318_171708_inLine +BABEL_OP1_102_12655_20120318_171708_outLine +BABEL_OP1_102_12844_20120411_193813_inLine +BABEL_OP1_102_12844_20120411_193813_outLine +BABEL_OP1_102_13229_20130127_023814_inLine +BABEL_OP1_102_13229_20130127_023814_outLine +BABEL_OP1_102_13389_20120406_184440_inLine +BABEL_OP1_102_13389_20120406_184440_outLine +BABEL_OP1_102_13702_20130121_185149_inLine +BABEL_OP1_102_13702_20130121_185149_outLine +BABEL_OP1_102_13913_20120807_001423_inLine +BABEL_OP1_102_13913_20120807_001423_outLine +BABEL_OP1_102_14769_20120926_165746_inLine +BABEL_OP1_102_14769_20120926_165746_outLine +BABEL_OP1_102_14874_20120417_153112_inLine +BABEL_OP1_102_14874_20120417_153112_outLine +BABEL_OP1_102_14891_20121009_003232_inLine +BABEL_OP1_102_14891_20121009_003232_outLine +BABEL_OP1_102_15146_20120318_184752_inLine +BABEL_OP1_102_15146_20120318_184752_outLine +BABEL_OP1_102_15234_20121108_022333_inLine +BABEL_OP1_102_15234_20121108_022333_outLine +BABEL_OP1_102_15493_20130127_203044_inLine +BABEL_OP1_102_15493_20130127_203044_outLine +BABEL_OP1_102_15502_20120419_233859_inLine +BABEL_OP1_102_15502_20120419_233859_outLine +BABEL_OP1_102_15502_20120420_000213_inLine +BABEL_OP1_102_15502_20120420_000213_outLine +BABEL_OP1_102_15881_20120331_215830_inLine +BABEL_OP1_102_15881_20120331_215830_outLine +BABEL_OP1_102_15916_20120428_221806_inLine +BABEL_OP1_102_15916_20120428_221806_outLine +BABEL_OP1_102_16167_20130122_175936_inLine +BABEL_OP1_102_16167_20130122_175936_outLine +BABEL_OP1_102_16185_20121105_042129_inLine +BABEL_OP1_102_16185_20121105_042129_outLine +BABEL_OP1_102_16313_20120331_215132_inLine +BABEL_OP1_102_16313_20120331_215132_outLine +BABEL_OP1_102_16669_20120327_202211_inLine +BABEL_OP1_102_16669_20120327_202211_outLine +BABEL_OP1_102_17013_20121105_230820_inLine +BABEL_OP1_102_17013_20121105_230820_outLine +BABEL_OP1_102_17203_20121221_161532_inLine +BABEL_OP1_102_17203_20121221_161532_outLine +BABEL_OP1_102_17207_20120729_230128_inLine +BABEL_OP1_102_17207_20120729_230128_outLine +BABEL_OP1_102_17572_20120806_235812_inLine +BABEL_OP1_102_17572_20120806_235812_outLine +BABEL_OP1_102_17933_20120607_184111_inLine +BABEL_OP1_102_17933_20120607_184111_outLine +BABEL_OP1_102_18344_20121109_192858_inLine +BABEL_OP1_102_18344_20121109_192858_outLine +BABEL_OP1_102_18534_20121105_185859_inLine +BABEL_OP1_102_18534_20121105_185859_outLine +BABEL_OP1_102_18730_20130122_171244_inLine +BABEL_OP1_102_18730_20130122_171244_outLine +BABEL_OP1_102_18802_20121104_232940_inLine +BABEL_OP1_102_18802_20121104_232940_outLine +BABEL_OP1_102_19063_20130209_231415_inLine +BABEL_OP1_102_19063_20130209_231415_outLine +BABEL_OP1_102_19147_20120329_190609_inLine +BABEL_OP1_102_19147_20120329_190609_outLine +BABEL_OP1_102_19456_20121110_201037_inLine +BABEL_OP1_102_19456_20121110_201037_outLine +BABEL_OP1_102_19731_20130123_200845_inLine +BABEL_OP1_102_19731_20130123_200845_outLine +BABEL_OP1_102_19758_20120417_174950_inLine +BABEL_OP1_102_19758_20120417_174950_outLine +BABEL_OP1_102_19867_20130127_211111_inLine +BABEL_OP1_102_19867_20130127_211111_outLine +BABEL_OP1_102_20271_20120410_205746_inLine +BABEL_OP1_102_20271_20120410_205746_outLine +BABEL_OP1_102_20320_20120409_212129_inLine +BABEL_OP1_102_20320_20120409_212129_outLine +BABEL_OP1_102_20320_20120409_214042_inLine +BABEL_OP1_102_20320_20120409_214042_outLine +BABEL_OP1_102_20454_20121010_020017_inLine +BABEL_OP1_102_20454_20121010_020017_outLine +BABEL_OP1_102_20591_20120806_210212_inLine +BABEL_OP1_102_20591_20120806_210212_outLine +BABEL_OP1_102_21050_20120619_010126_inLine +BABEL_OP1_102_21050_20120619_010126_outLine +BABEL_OP1_102_21477_20120417_212152_inLine +BABEL_OP1_102_21477_20120417_212152_outLine +BABEL_OP1_102_21518_20120805_195607_inLine +BABEL_OP1_102_21518_20120805_195607_outLine +BABEL_OP1_102_21758_20120823_164553_inLine +BABEL_OP1_102_21758_20120823_164553_outLine +BABEL_OP1_102_21782_20120422_184156_inLine +BABEL_OP1_102_21782_20120422_184156_outLine +BABEL_OP1_102_22401_20121017_023338_inLine +BABEL_OP1_102_22401_20121017_023338_outLine +BABEL_OP1_102_22408_20120426_225012_inLine +BABEL_OP1_102_22408_20120426_225012_outLine +BABEL_OP1_102_23167_20120329_204718_inLine +BABEL_OP1_102_23167_20120329_204718_outLine +BABEL_OP1_102_24420_20120624_013709_inLine +BABEL_OP1_102_24420_20120624_013709_outLine +BABEL_OP1_102_24661_20121104_224032_inLine +BABEL_OP1_102_24661_20121104_224032_outLine +BABEL_OP1_102_24833_20120410_172706_inLine +BABEL_OP1_102_24833_20120410_172706_outLine +BABEL_OP1_102_25236_20120804_180700_inLine +BABEL_OP1_102_25236_20120804_180700_outLine +BABEL_OP1_102_25576_20120422_180912_inLine +BABEL_OP1_102_25576_20120422_180912_outLine +BABEL_OP1_102_25904_20120611_203203_inLine +BABEL_OP1_102_25904_20120611_203203_outLine +BABEL_OP1_102_25934_20120329_005438_inLine +BABEL_OP1_102_25934_20120329_005438_outLine +BABEL_OP1_102_26348_20121109_170513_inLine +BABEL_OP1_102_26348_20121109_170513_outLine +BABEL_OP1_102_27007_20120611_223823_inLine +BABEL_OP1_102_27007_20120611_223823_outLine +BABEL_OP1_102_27349_20120422_192337_inLine +BABEL_OP1_102_27349_20120422_192337_outLine +BABEL_OP1_102_27427_20120412_182452_inLine +BABEL_OP1_102_27427_20120412_182452_outLine +BABEL_OP1_102_27824_20120427_201104_inLine +BABEL_OP1_102_27824_20120427_201104_outLine +BABEL_OP1_102_27890_20121002_030324_inLine +BABEL_OP1_102_27890_20121002_030324_outLine +BABEL_OP1_102_28016_20120430_193141_inLine +BABEL_OP1_102_28016_20120430_193141_outLine +BABEL_OP1_102_28016_20120430_194530_inLine +BABEL_OP1_102_28016_20120430_194530_outLine +BABEL_OP1_102_28107_20120327_204144_inLine +BABEL_OP1_102_28107_20120327_204144_outLine +BABEL_OP1_102_28204_20120401_204624_inLine +BABEL_OP1_102_28204_20120401_204624_outLine +BABEL_OP1_102_28260_20120329_210829_inLine +BABEL_OP1_102_28260_20120329_210829_outLine +BABEL_OP1_102_28648_20120608_192702_inLine +BABEL_OP1_102_28648_20120608_192702_outLine +BABEL_OP1_102_29168_20120411_174248_inLine +BABEL_OP1_102_29168_20120411_174248_outLine +BABEL_OP1_102_29259_20120612_211621_inLine +BABEL_OP1_102_29259_20120612_211621_outLine +BABEL_OP1_102_29335_20120609_182335_inLine +BABEL_OP1_102_29335_20120609_182335_outLine +BABEL_OP1_102_29335_20120609_183151_inLine +BABEL_OP1_102_29335_20120609_183151_outLine +BABEL_OP1_102_29444_20120331_231513_inLine +BABEL_OP1_102_29444_20120331_231513_outLine +BABEL_OP1_102_29444_20120331_233317_inLine +BABEL_OP1_102_29444_20120331_233317_outLine +BABEL_OP1_102_29512_20120805_170123_inLine +BABEL_OP1_102_29512_20120805_170123_outLine +BABEL_OP1_102_29512_20120805_172610_inLine +BABEL_OP1_102_29512_20120805_172610_outLine +BABEL_OP1_102_29545_20121105_220136_inLine +BABEL_OP1_102_29545_20121105_220136_outLine +BABEL_OP1_102_29959_20130128_195931_inLine +BABEL_OP1_102_29959_20130128_195931_outLine +BABEL_OP1_102_29959_20130128_223813_inLine +BABEL_OP1_102_29959_20130128_223813_outLine +BABEL_OP1_102_30266_20120331_212330_inLine +BABEL_OP1_102_30266_20120331_212330_outLine +BABEL_OP1_102_30530_20120330_173152_inLine +BABEL_OP1_102_30530_20120330_173152_outLine +BABEL_OP1_102_30722_20121011_013755_inLine +BABEL_OP1_102_30722_20121011_013755_outLine +BABEL_OP1_102_31031_20120611_193208_inLine +BABEL_OP1_102_31031_20120611_193208_outLine +BABEL_OP1_102_31902_20120425_211816_inLine +BABEL_OP1_102_31902_20120425_211816_outLine +BABEL_OP1_102_31917_20120611_195339_inLine +BABEL_OP1_102_31917_20120611_195339_outLine +BABEL_OP1_102_32011_20121014_024351_inLine +BABEL_OP1_102_32011_20121014_024351_outLine +BABEL_OP1_102_32562_20121010_014014_inLine +BABEL_OP1_102_32562_20121010_014014_outLine +BABEL_OP1_102_32642_20121104_220528_inLine +BABEL_OP1_102_32642_20121104_220528_outLine +BABEL_OP1_102_33023_20120329_224858_inLine +BABEL_OP1_102_33023_20120329_224858_outLine +BABEL_OP1_102_33540_20120401_212225_inLine +BABEL_OP1_102_33540_20120401_212225_outLine +BABEL_OP1_102_33671_20120422_231219_inLine +BABEL_OP1_102_33671_20120422_231219_outLine +BABEL_OP1_102_34169_20120331_183840_inLine +BABEL_OP1_102_34169_20120331_183840_outLine +BABEL_OP1_102_34194_20120330_182542_inLine +BABEL_OP1_102_34194_20120330_182542_outLine +BABEL_OP1_102_34235_20120405_190745_inLine +BABEL_OP1_102_34235_20120405_190745_outLine +BABEL_OP1_102_34480_20121012_193452_inLine +BABEL_OP1_102_34480_20121012_193452_outLine +BABEL_OP1_102_34590_20120417_151435_inLine +BABEL_OP1_102_34590_20120417_151435_outLine +BABEL_OP1_102_34590_20120417_155556_inLine +BABEL_OP1_102_34590_20120417_155556_outLine +BABEL_OP1_102_34930_20120411_200043_inLine +BABEL_OP1_102_34930_20120411_200043_outLine +BABEL_OP1_102_35011_20120420_020024_inLine +BABEL_OP1_102_35011_20120420_020024_outLine +BABEL_OP1_102_35229_20121106_204019_inLine +BABEL_OP1_102_35229_20121106_204019_outLine +BABEL_OP1_102_35324_20120426_180016_inLine +BABEL_OP1_102_35324_20120426_180016_outLine +BABEL_OP1_102_35324_20120426_203214_inLine +BABEL_OP1_102_35324_20120426_203214_outLine +BABEL_OP1_102_35455_20121112_000231_inLine +BABEL_OP1_102_35455_20121112_000231_outLine +BABEL_OP1_102_36868_20130209_201544_inLine +BABEL_OP1_102_36868_20130209_201544_outLine +BABEL_OP1_102_37260_20120808_012733_inLine +BABEL_OP1_102_37260_20120808_012733_outLine +BABEL_OP1_102_37260_20120808_014150_inLine +BABEL_OP1_102_37260_20120808_014150_outLine +BABEL_OP1_102_37268_20121226_203217_inLine +BABEL_OP1_102_37268_20121226_203217_outLine +BABEL_OP1_102_37285_20120405_223443_inLine +BABEL_OP1_102_37285_20120405_223443_outLine +BABEL_OP1_102_37444_20130128_032426_inLine +BABEL_OP1_102_37444_20130128_032426_outLine +BABEL_OP1_102_37461_20120409_191629_inLine +BABEL_OP1_102_37461_20120409_191629_outLine +BABEL_OP1_102_37461_20120409_194138_inLine +BABEL_OP1_102_37461_20120409_194138_outLine +BABEL_OP1_102_37461_20120409_195519_inLine +BABEL_OP1_102_37461_20120409_195519_outLine +BABEL_OP1_102_37524_20120329_182549_inLine +BABEL_OP1_102_37524_20120329_182549_outLine +BABEL_OP1_102_38264_20121105_050622_inLine +BABEL_OP1_102_38264_20121105_050622_outLine +BABEL_OP1_102_38464_20121012_023702_inLine +BABEL_OP1_102_38464_20121012_023702_outLine +BABEL_OP1_102_38592_20121225_215825_inLine +BABEL_OP1_102_38592_20121225_215825_outLine +BABEL_OP1_102_38635_20120607_010931_inLine +BABEL_OP1_102_38635_20120607_010931_outLine +BABEL_OP1_102_38698_20120401_215032_inLine +BABEL_OP1_102_38698_20120401_215032_outLine +BABEL_OP1_102_38863_20121011_183009_inLine +BABEL_OP1_102_38863_20121011_183009_outLine +BABEL_OP1_102_38985_20120806_174824_inLine +BABEL_OP1_102_38985_20120806_174824_outLine +BABEL_OP1_102_38985_20120806_181000_inLine +BABEL_OP1_102_38985_20120806_181000_outLine +BABEL_OP1_102_39098_20120405_203729_inLine +BABEL_OP1_102_39098_20120405_203729_outLine +BABEL_OP1_102_39114_20120930_180045_inLine +BABEL_OP1_102_39114_20120930_180045_outLine +BABEL_OP1_102_39364_20121105_220855_inLine +BABEL_OP1_102_39364_20121105_220855_outLine +BABEL_OP1_102_39430_20120411_182026_inLine +BABEL_OP1_102_39430_20120411_182026_outLine +BABEL_OP1_102_39430_20120411_184729_inLine +BABEL_OP1_102_39430_20120411_184729_outLine +BABEL_OP1_102_40133_20121112_214034_inLine +BABEL_OP1_102_40133_20121112_214034_outLine +BABEL_OP1_102_40168_20120428_173400_inLine +BABEL_OP1_102_40168_20120428_173400_outLine +BABEL_OP1_102_40882_20130209_204142_inLine +BABEL_OP1_102_40882_20130209_204142_outLine +BABEL_OP1_102_41561_20121111_220752_inLine +BABEL_OP1_102_41561_20121111_220752_outLine +BABEL_OP1_102_41949_20120426_222144_inLine +BABEL_OP1_102_41949_20120426_222144_outLine +BABEL_OP1_102_42615_20120327_180819_inLine +BABEL_OP1_102_42615_20120327_180819_outLine +BABEL_OP1_102_42651_20120409_221530_inLine +BABEL_OP1_102_42651_20120409_221530_outLine +BABEL_OP1_102_42749_20121114_005458_inLine +BABEL_OP1_102_42749_20121114_005458_outLine +BABEL_OP1_102_42749_20121114_010754_inLine +BABEL_OP1_102_42749_20121114_010754_outLine +BABEL_OP1_102_43383_20120406_193121_inLine +BABEL_OP1_102_43383_20120406_193121_outLine +BABEL_OP1_102_43423_20120919_201131_inLine +BABEL_OP1_102_43423_20120919_201131_outLine +BABEL_OP1_102_43426_20120501_170331_inLine +BABEL_OP1_102_43426_20120501_170331_outLine +BABEL_OP1_102_43553_20120408_174809_inLine +BABEL_OP1_102_43553_20120408_174809_outLine +BABEL_OP1_102_43652_20120428_191659_inLine +BABEL_OP1_102_43652_20120428_191659_outLine +BABEL_OP1_102_44649_20120611_185930_inLine +BABEL_OP1_102_44649_20120611_185930_outLine +BABEL_OP1_102_44829_20120907_011054_inLine +BABEL_OP1_102_44829_20120907_011054_outLine +BABEL_OP1_102_44829_20120907_013730_inLine +BABEL_OP1_102_44829_20120907_013730_outLine +BABEL_OP1_102_45227_20120329_003400_inLine +BABEL_OP1_102_45227_20120329_003400_outLine +BABEL_OP1_102_45361_20120611_222502_inLine +BABEL_OP1_102_45361_20120611_222502_outLine +BABEL_OP1_102_45677_20130123_192645_inLine +BABEL_OP1_102_45677_20130123_192645_outLine +BABEL_OP1_102_45681_20120623_173741_inLine +BABEL_OP1_102_45681_20120623_173741_outLine +BABEL_OP1_102_45738_20120806_202458_inLine +BABEL_OP1_102_45738_20120806_202458_outLine +BABEL_OP1_102_45892_20120408_220557_inLine +BABEL_OP1_102_45892_20120408_220557_outLine +BABEL_OP1_102_45931_20120421_233726_inLine +BABEL_OP1_102_45931_20120421_233726_outLine +BABEL_OP1_102_46002_20121009_215715_inLine +BABEL_OP1_102_46002_20121009_215715_outLine +BABEL_OP1_102_46269_20121110_215228_inLine +BABEL_OP1_102_46269_20121110_215228_outLine +BABEL_OP1_102_46521_20120411_193429_inLine +BABEL_OP1_102_46521_20120411_193429_outLine +BABEL_OP1_102_47634_20120408_214325_inLine +BABEL_OP1_102_47634_20120408_214325_outLine +BABEL_OP1_102_47823_20120804_180038_inLine +BABEL_OP1_102_47823_20120804_180038_outLine +BABEL_OP1_102_48281_20120411_214725_inLine +BABEL_OP1_102_48281_20120411_214725_outLine +BABEL_OP1_102_48410_20120407_204734_inLine +BABEL_OP1_102_48410_20120407_204734_outLine +BABEL_OP1_102_48976_20120410_161651_inLine +BABEL_OP1_102_48976_20120410_161651_outLine +BABEL_OP1_102_49042_20120408_165038_inLine +BABEL_OP1_102_49042_20120408_165038_outLine +BABEL_OP1_102_49628_20120817_204731_inLine +BABEL_OP1_102_49628_20120817_204731_outLine +BABEL_OP1_102_49864_20120421_155657_inLine +BABEL_OP1_102_49864_20120421_155657_outLine +BABEL_OP1_102_50416_20120803_215223_inLine +BABEL_OP1_102_50416_20120803_215223_outLine +BABEL_OP1_102_50555_20120606_224819_inLine +BABEL_OP1_102_50555_20120606_224819_outLine +BABEL_OP1_102_50597_20120623_193352_inLine +BABEL_OP1_102_50597_20120623_193352_outLine +BABEL_OP1_102_50718_20120421_191449_inLine +BABEL_OP1_102_50718_20120421_191449_outLine +BABEL_OP1_102_50752_20121227_204235_inLine +BABEL_OP1_102_50752_20121227_204235_outLine +BABEL_OP1_102_50763_20120405_203621_inLine +BABEL_OP1_102_50763_20120405_203621_outLine +BABEL_OP1_102_50798_20120426_190454_inLine +BABEL_OP1_102_50798_20120426_190454_outLine +BABEL_OP1_102_51149_20121227_201136_inLine +BABEL_OP1_102_51149_20121227_201136_outLine +BABEL_OP1_102_52335_20130123_183229_inLine +BABEL_OP1_102_52335_20130123_183229_outLine +BABEL_OP1_102_52606_20121009_222016_inLine +BABEL_OP1_102_52606_20121009_222016_outLine +BABEL_OP1_102_52642_20120803_212045_inLine +BABEL_OP1_102_52642_20120803_212045_outLine +BABEL_OP1_102_52691_20120407_210408_inLine +BABEL_OP1_102_52691_20120407_210408_outLine +BABEL_OP1_102_52691_20120407_211728_inLine +BABEL_OP1_102_52691_20120407_211728_outLine +BABEL_OP1_102_52691_20120407_213757_inLine +BABEL_OP1_102_52691_20120407_213757_outLine +BABEL_OP1_102_52902_20120607_175045_inLine +BABEL_OP1_102_52902_20120607_175045_outLine +BABEL_OP1_102_52902_20120607_180239_inLine +BABEL_OP1_102_52902_20120607_180239_outLine +BABEL_OP1_102_53429_20121224_202431_inLine +BABEL_OP1_102_53429_20121224_202431_outLine +BABEL_OP1_102_53500_20120428_175953_inLine +BABEL_OP1_102_53500_20120428_175953_outLine +BABEL_OP1_102_53703_20120409_180047_inLine +BABEL_OP1_102_53703_20120409_180047_outLine +BABEL_OP1_102_53982_20120607_220642_inLine +BABEL_OP1_102_53982_20120607_220642_outLine +BABEL_OP1_102_54241_20120911_024357_inLine +BABEL_OP1_102_54241_20120911_024357_outLine +BABEL_OP1_102_54241_20120911_025705_inLine +BABEL_OP1_102_54241_20120911_025705_outLine +BABEL_OP1_102_55182_20120330_201037_inLine +BABEL_OP1_102_55182_20120330_201037_outLine +BABEL_OP1_102_55399_20120409_211258_inLine +BABEL_OP1_102_55399_20120409_211258_outLine +BABEL_OP1_102_55450_20121013_171507_inLine +BABEL_OP1_102_55450_20121013_171507_outLine +BABEL_OP1_102_55470_20120429_194956_inLine +BABEL_OP1_102_55470_20120429_194956_outLine +BABEL_OP1_102_55823_20121010_005200_inLine +BABEL_OP1_102_55823_20121010_005200_outLine +BABEL_OP1_102_55874_20121108_215431_inLine +BABEL_OP1_102_55874_20121108_215431_outLine +BABEL_OP1_102_56070_20120410_224512_inLine +BABEL_OP1_102_56070_20120410_224512_outLine +BABEL_OP1_102_56648_20120615_181652_inLine +BABEL_OP1_102_56648_20120615_181652_outLine +BABEL_OP1_102_56812_20121010_203710_inLine +BABEL_OP1_102_56812_20121010_203710_outLine +BABEL_OP1_102_56943_20121221_203039_inLine +BABEL_OP1_102_56943_20121221_203039_outLine +BABEL_OP1_102_57039_20121107_201157_inLine +BABEL_OP1_102_57039_20121107_201157_outLine +BABEL_OP1_102_57422_20120607_213941_inLine +BABEL_OP1_102_57422_20120607_213941_outLine +BABEL_OP1_102_57629_20121010_011015_inLine +BABEL_OP1_102_57629_20121010_011015_outLine +BABEL_OP1_102_57907_20121013_035627_inLine +BABEL_OP1_102_57907_20121013_035627_outLine +BABEL_OP1_102_58715_20120425_190758_inLine +BABEL_OP1_102_58715_20120425_190758_outLine +BABEL_OP1_102_58863_20120404_195038_inLine +BABEL_OP1_102_58863_20120404_195038_outLine +BABEL_OP1_102_58947_20121106_203812_inLine +BABEL_OP1_102_58947_20121106_203812_outLine +BABEL_OP1_102_58947_20121106_205338_inLine +BABEL_OP1_102_58947_20121106_205338_outLine +BABEL_OP1_102_59169_20120611_172953_inLine +BABEL_OP1_102_59169_20120611_172953_outLine +BABEL_OP1_102_59383_20121220_151350_inLine +BABEL_OP1_102_59383_20121220_151350_outLine +BABEL_OP1_102_59628_20121106_031543_inLine +BABEL_OP1_102_59628_20121106_031543_outLine +BABEL_OP1_102_59891_20120611_212238_inLine +BABEL_OP1_102_59891_20120611_212238_outLine +BABEL_OP1_102_59925_20121111_214225_inLine +BABEL_OP1_102_59925_20121111_214225_outLine +BABEL_OP1_102_60193_20120419_201756_inLine +BABEL_OP1_102_60193_20120419_201756_outLine +BABEL_OP1_102_60277_20120615_195600_inLine +BABEL_OP1_102_60277_20120615_195600_outLine +BABEL_OP1_102_60826_20120606_231535_inLine +BABEL_OP1_102_60826_20120606_231535_outLine +BABEL_OP1_102_60848_20121110_170724_inLine +BABEL_OP1_102_60848_20121110_170724_outLine +BABEL_OP1_102_60881_20120401_212818_inLine +BABEL_OP1_102_60881_20120401_212818_outLine +BABEL_OP1_102_60995_20121107_203546_inLine +BABEL_OP1_102_60995_20121107_203546_outLine +BABEL_OP1_102_61263_20121112_213923_inLine +BABEL_OP1_102_61263_20121112_213923_outLine +BABEL_OP1_102_61446_20120420_184155_inLine +BABEL_OP1_102_61446_20120420_184155_outLine +BABEL_OP1_102_61936_20121224_175007_inLine +BABEL_OP1_102_61936_20121224_175007_outLine +BABEL_OP1_102_62132_20120614_214158_inLine +BABEL_OP1_102_62132_20120614_214158_outLine +BABEL_OP1_102_62923_20130122_190544_inLine +BABEL_OP1_102_62923_20130122_190544_outLine +BABEL_OP1_102_63076_20121224_225415_inLine +BABEL_OP1_102_63076_20121224_225415_outLine +BABEL_OP1_102_64185_20120722_220159_inLine +BABEL_OP1_102_64185_20120722_220159_outLine +BABEL_OP1_102_64351_20120608_202610_inLine +BABEL_OP1_102_64351_20120608_202610_outLine +BABEL_OP1_102_65248_20120317_180718_inLine +BABEL_OP1_102_65248_20120317_180718_outLine +BABEL_OP1_102_65273_20121226_233200_inLine +BABEL_OP1_102_65273_20121226_233200_outLine +BABEL_OP1_102_65371_20121228_213615_inLine +BABEL_OP1_102_65371_20121228_213615_outLine +BABEL_OP1_102_65415_20120410_193034_inLine +BABEL_OP1_102_65415_20120410_193034_outLine +BABEL_OP1_102_65580_20120320_234602_inLine +BABEL_OP1_102_65580_20120320_234602_outLine +BABEL_OP1_102_65601_20120427_193019_inLine +BABEL_OP1_102_65601_20120427_193019_outLine +BABEL_OP1_102_65837_20121106_201713_inLine +BABEL_OP1_102_65837_20121106_201713_outLine +BABEL_OP1_102_66330_20120429_164154_inLine +BABEL_OP1_102_66330_20120429_164154_outLine +BABEL_OP1_102_66330_20120429_164900_inLine +BABEL_OP1_102_66330_20120429_164900_outLine +BABEL_OP1_102_66416_20120817_204557_inLine +BABEL_OP1_102_66416_20120817_204557_outLine +BABEL_OP1_102_66441_20120411_170112_inLine +BABEL_OP1_102_66441_20120411_170112_outLine +BABEL_OP1_102_66559_20121227_172234_inLine +BABEL_OP1_102_66559_20121227_172234_outLine +BABEL_OP1_102_67150_20121106_232551_inLine +BABEL_OP1_102_67150_20121106_232551_outLine +BABEL_OP1_102_67733_20120409_192100_inLine +BABEL_OP1_102_67733_20120409_192100_outLine +BABEL_OP1_102_67750_20120330_210301_inLine +BABEL_OP1_102_67750_20120330_210301_outLine +BABEL_OP1_102_67798_20120408_211247_inLine +BABEL_OP1_102_67798_20120408_211247_outLine +BABEL_OP1_102_67916_20121224_185018_inLine +BABEL_OP1_102_67916_20121224_185018_outLine +BABEL_OP1_102_69049_20120422_174706_inLine +BABEL_OP1_102_69049_20120422_174706_outLine +BABEL_OP1_102_69145_20121006_214000_inLine +BABEL_OP1_102_69145_20121006_214000_outLine +BABEL_OP1_102_69275_20121009_000322_inLine +BABEL_OP1_102_69275_20121009_000322_outLine +BABEL_OP1_102_69368_20120328_214605_inLine +BABEL_OP1_102_69368_20120328_214605_outLine +BABEL_OP1_102_69446_20130130_183941_inLine +BABEL_OP1_102_69446_20130130_183941_outLine +BABEL_OP1_102_70077_20121222_173141_inLine +BABEL_OP1_102_70077_20121222_173141_outLine +BABEL_OP1_102_70555_20120421_203231_inLine +BABEL_OP1_102_70555_20120421_203231_outLine +BABEL_OP1_102_71778_20120608_222028_inLine +BABEL_OP1_102_71778_20120608_222028_outLine +BABEL_OP1_102_71844_20120331_200325_inLine +BABEL_OP1_102_71844_20120331_200325_outLine +BABEL_OP1_102_72032_20120329_225115_inLine +BABEL_OP1_102_72032_20120329_225115_outLine +BABEL_OP1_102_72718_20121010_030640_inLine +BABEL_OP1_102_72718_20121010_030640_outLine +BABEL_OP1_102_72799_20120428_225215_inLine +BABEL_OP1_102_72799_20120428_225215_outLine +BABEL_OP1_102_73050_20120929_012255_inLine +BABEL_OP1_102_73050_20120929_012255_outLine +BABEL_OP1_102_73059_20121225_162645_inLine +BABEL_OP1_102_73059_20121225_162645_outLine +BABEL_OP1_102_73059_20121225_163932_inLine +BABEL_OP1_102_73059_20121225_163932_outLine +BABEL_OP1_102_73438_20121103_170431_inLine +BABEL_OP1_102_73438_20121103_170431_outLine +BABEL_OP1_102_73440_20120428_195653_inLine +BABEL_OP1_102_73440_20120428_195653_outLine +BABEL_OP1_102_73452_20121003_021245_inLine +BABEL_OP1_102_73452_20121003_021245_outLine +BABEL_OP1_102_73786_20120420_171039_inLine +BABEL_OP1_102_73786_20120420_171039_outLine +BABEL_OP1_102_74043_20120422_170724_inLine +BABEL_OP1_102_74043_20120422_170724_outLine +BABEL_OP1_102_74368_20121008_041653_inLine +BABEL_OP1_102_74368_20121008_041653_outLine +BABEL_OP1_102_74709_20120806_191528_inLine +BABEL_OP1_102_74709_20120806_191528_outLine +BABEL_OP1_102_74823_20120330_181459_inLine +BABEL_OP1_102_74823_20120330_181459_outLine +BABEL_OP1_102_75140_20120330_171509_inLine +BABEL_OP1_102_75140_20120330_171509_outLine +BABEL_OP1_102_75354_20121105_033257_inLine +BABEL_OP1_102_75354_20121105_033257_outLine +BABEL_OP1_102_75498_20120806_180214_inLine +BABEL_OP1_102_75498_20120806_180214_outLine +BABEL_OP1_102_75680_20121110_180407_inLine +BABEL_OP1_102_75680_20121110_180407_outLine +BABEL_OP1_102_75990_20120426_182351_inLine +BABEL_OP1_102_75990_20120426_182351_outLine +BABEL_OP1_102_76331_20120806_185250_inLine +BABEL_OP1_102_76331_20120806_185250_outLine +BABEL_OP1_102_76451_20120329_193459_inLine +BABEL_OP1_102_76451_20120329_193459_outLine +BABEL_OP1_102_77207_20120804_174005_inLine +BABEL_OP1_102_77207_20120804_174005_outLine +BABEL_OP1_102_77244_20121001_003159_inLine +BABEL_OP1_102_77244_20121001_003159_outLine +BABEL_OP1_102_77465_20120607_001521_inLine +BABEL_OP1_102_77465_20120607_001521_outLine +BABEL_OP1_102_77771_20121227_191404_inLine +BABEL_OP1_102_77771_20121227_191404_outLine +BABEL_OP1_102_77811_20130123_215211_inLine +BABEL_OP1_102_77811_20130123_215211_outLine +BABEL_OP1_102_78514_20120409_182010_inLine +BABEL_OP1_102_78514_20120409_182010_outLine +BABEL_OP1_102_79495_20120320_011136_inLine +BABEL_OP1_102_79495_20120320_011136_outLine +BABEL_OP1_102_79618_20120401_204258_inLine +BABEL_OP1_102_79618_20120401_204258_outLine +BABEL_OP1_102_79698_20121106_212429_inLine +BABEL_OP1_102_79698_20121106_212429_outLine +BABEL_OP1_102_80174_20130211_031725_inLine +BABEL_OP1_102_80174_20130211_031725_outLine +BABEL_OP1_102_80868_20121028_015553_inLine +BABEL_OP1_102_80868_20121028_015553_outLine +BABEL_OP1_102_81084_20120406_191910_inLine +BABEL_OP1_102_81084_20120406_191910_outLine +BABEL_OP1_102_81587_20121225_213038_inLine +BABEL_OP1_102_81587_20121225_213038_outLine +BABEL_OP1_102_81611_20121110_221005_inLine +BABEL_OP1_102_81611_20121110_221005_outLine +BABEL_OP1_102_81717_20130209_201202_inLine +BABEL_OP1_102_81717_20130209_201202_outLine +BABEL_OP1_102_81878_20120331_181439_inLine +BABEL_OP1_102_81878_20120331_181439_outLine +BABEL_OP1_102_81878_20120331_182958_inLine +BABEL_OP1_102_81878_20120331_182958_outLine +BABEL_OP1_102_82009_20121104_013002_inLine +BABEL_OP1_102_82009_20121104_013002_outLine +BABEL_OP1_102_82023_20120330_175253_inLine +BABEL_OP1_102_82023_20120330_175253_outLine +BABEL_OP1_102_82192_20120429_180649_inLine +BABEL_OP1_102_82192_20120429_180649_outLine +BABEL_OP1_102_82408_20120402_190241_inLine +BABEL_OP1_102_82408_20120402_190241_outLine +BABEL_OP1_102_82880_20121108_173528_inLine +BABEL_OP1_102_82880_20121108_173528_outLine +BABEL_OP1_102_83256_20120330_210950_inLine +BABEL_OP1_102_83256_20120330_210950_outLine +BABEL_OP1_102_83493_20120429_172305_inLine +BABEL_OP1_102_83493_20120429_172305_outLine +BABEL_OP1_102_83493_20120429_175508_inLine +BABEL_OP1_102_83493_20120429_175508_outLine +BABEL_OP1_102_83531_20120408_201200_inLine +BABEL_OP1_102_83531_20120408_201200_outLine +BABEL_OP1_102_83531_20120408_203827_inLine +BABEL_OP1_102_83531_20120408_203827_outLine +BABEL_OP1_102_83634_20130123_212154_inLine +BABEL_OP1_102_83634_20130123_212154_outLine +BABEL_OP1_102_83791_20120420_215616_inLine +BABEL_OP1_102_83791_20120420_215616_outLine +BABEL_OP1_102_84088_20120328_180739_inLine +BABEL_OP1_102_84088_20120328_180739_outLine +BABEL_OP1_102_84284_20121225_175332_inLine +BABEL_OP1_102_84284_20121225_175332_outLine +BABEL_OP1_102_84397_20121110_230552_inLine +BABEL_OP1_102_84397_20121110_230552_outLine +BABEL_OP1_102_84439_20120427_184114_inLine +BABEL_OP1_102_84439_20120427_184114_outLine +BABEL_OP1_102_84608_20120609_194053_inLine +BABEL_OP1_102_84608_20120609_194053_outLine +BABEL_OP1_102_84943_20120401_170153_inLine +BABEL_OP1_102_84943_20120401_170153_outLine +BABEL_OP1_102_85204_20120329_192035_inLine +BABEL_OP1_102_85204_20120329_192035_outLine +BABEL_OP1_102_85716_20120401_165708_inLine +BABEL_OP1_102_85716_20120401_165708_outLine +BABEL_OP1_102_86004_20120418_230109_inLine +BABEL_OP1_102_86004_20120418_230109_outLine +BABEL_OP1_102_86227_20120401_195417_inLine +BABEL_OP1_102_86227_20120401_195417_outLine +BABEL_OP1_102_86886_20121112_201306_inLine +BABEL_OP1_102_86886_20121112_201306_outLine +BABEL_OP1_102_86956_20120401_173752_inLine +BABEL_OP1_102_86956_20120401_173752_outLine +BABEL_OP1_102_87234_20121224_212540_inLine +BABEL_OP1_102_87234_20121224_212540_outLine +BABEL_OP1_102_87481_20121027_153449_inLine +BABEL_OP1_102_87481_20121027_153449_outLine +BABEL_OP1_102_87486_20120406_200642_inLine +BABEL_OP1_102_87486_20120406_200642_outLine +BABEL_OP1_102_87806_20120409_183938_inLine +BABEL_OP1_102_87806_20120409_183938_outLine +BABEL_OP1_102_87857_20120405_202526_inLine +BABEL_OP1_102_87857_20120405_202526_outLine +BABEL_OP1_102_87961_20120423_155726_inLine +BABEL_OP1_102_87961_20120423_155726_outLine +BABEL_OP1_102_88163_20121112_003006_inLine +BABEL_OP1_102_88163_20121112_003006_outLine +BABEL_OP1_102_89583_20121011_013631_inLine +BABEL_OP1_102_89583_20121011_013631_outLine +BABEL_OP1_102_89583_20121012_014745_inLine +BABEL_OP1_102_89583_20121012_014745_outLine +BABEL_OP1_102_89838_20120409_214411_inLine +BABEL_OP1_102_89838_20120409_214411_outLine +BABEL_OP1_102_89993_20120607_175900_inLine +BABEL_OP1_102_89993_20120607_175900_outLine +BABEL_OP1_102_90055_20120405_192435_inLine +BABEL_OP1_102_90055_20120405_192435_outLine +BABEL_OP1_102_90389_20121012_050118_inLine +BABEL_OP1_102_90389_20121012_050118_outLine +BABEL_OP1_102_90393_20120419_214927_inLine +BABEL_OP1_102_90393_20120419_214927_outLine +BABEL_OP1_102_90511_20120329_224306_inLine +BABEL_OP1_102_90511_20120329_224306_outLine +BABEL_OP1_102_90609_20120410_184424_inLine +BABEL_OP1_102_90609_20120410_184424_outLine +BABEL_OP1_102_90810_20120404_221650_inLine +BABEL_OP1_102_90810_20120404_221650_outLine +BABEL_OP1_102_90819_20120614_222542_inLine +BABEL_OP1_102_90819_20120614_222542_outLine +BABEL_OP1_102_90890_20120320_235811_inLine +BABEL_OP1_102_90890_20120320_235811_outLine +BABEL_OP1_102_90975_20130127_194034_inLine +BABEL_OP1_102_90975_20130127_194034_outLine +BABEL_OP1_102_90975_20130127_195301_inLine +BABEL_OP1_102_90975_20130127_195301_outLine +BABEL_OP1_102_91171_20121222_000026_inLine +BABEL_OP1_102_91171_20121222_000026_outLine +BABEL_OP1_102_91358_20121103_191541_inLine +BABEL_OP1_102_91358_20121103_191541_outLine +BABEL_OP1_102_91386_20121226_175240_inLine +BABEL_OP1_102_91386_20121226_175240_outLine +BABEL_OP1_102_91401_20120409_195325_inLine +BABEL_OP1_102_91401_20120409_195325_outLine +BABEL_OP1_102_91481_20120806_232222_inLine +BABEL_OP1_102_91481_20120806_232222_outLine +BABEL_OP1_102_91865_20130127_193426_inLine +BABEL_OP1_102_91865_20130127_193426_outLine +BABEL_OP1_102_92002_20120821_172434_inLine +BABEL_OP1_102_92002_20120821_172434_outLine +BABEL_OP1_102_92252_20120805_193105_inLine +BABEL_OP1_102_92252_20120805_193105_outLine +BABEL_OP1_102_92252_20120805_202508_inLine +BABEL_OP1_102_92252_20120805_202508_outLine +BABEL_OP1_102_92321_20120729_204129_inLine +BABEL_OP1_102_92321_20120729_204129_outLine +BABEL_OP1_102_92386_20120401_175909_inLine +BABEL_OP1_102_92386_20120401_175909_outLine +BABEL_OP1_102_92407_20120330_180101_inLine +BABEL_OP1_102_92407_20120330_180101_outLine +BABEL_OP1_102_92591_20120818_164613_inLine +BABEL_OP1_102_92591_20120818_164613_outLine +BABEL_OP1_102_92591_20120818_170346_inLine +BABEL_OP1_102_92591_20120818_170346_outLine +BABEL_OP1_102_92591_20120818_171559_inLine +BABEL_OP1_102_92591_20120818_171559_outLine +BABEL_OP1_102_92628_20120404_212106_inLine +BABEL_OP1_102_92628_20120404_212106_outLine +BABEL_OP1_102_92800_20120408_165253_inLine +BABEL_OP1_102_92800_20120408_165253_outLine +BABEL_OP1_102_93091_20120425_204602_inLine +BABEL_OP1_102_93091_20120425_204602_outLine +BABEL_OP1_102_93091_20120425_205745_inLine +BABEL_OP1_102_93091_20120425_205745_outLine +BABEL_OP1_102_93151_20120410_200907_inLine +BABEL_OP1_102_93151_20120410_200907_outLine +BABEL_OP1_102_93277_20121028_025007_inLine +BABEL_OP1_102_93277_20121028_025007_outLine +BABEL_OP1_102_93277_20121106_011048_inLine +BABEL_OP1_102_93277_20121106_011048_outLine +BABEL_OP1_102_93509_20120422_230046_inLine +BABEL_OP1_102_93509_20120422_230046_outLine +BABEL_OP1_102_93607_20120806_194627_inLine +BABEL_OP1_102_93607_20120806_194627_outLine +BABEL_OP1_102_94162_20130209_213329_inLine +BABEL_OP1_102_94162_20130209_213329_outLine +BABEL_OP1_102_94542_20130122_222709_inLine +BABEL_OP1_102_94542_20130122_222709_outLine +BABEL_OP1_102_94694_20120611_183126_inLine +BABEL_OP1_102_94694_20120611_183126_outLine +BABEL_OP1_102_94696_20130127_183814_inLine +BABEL_OP1_102_94696_20130127_183814_outLine +BABEL_OP1_102_95350_20120420_225049_inLine +BABEL_OP1_102_95350_20120420_225049_outLine +BABEL_OP1_102_95566_20121222_024129_inLine +BABEL_OP1_102_95566_20121222_024129_outLine +BABEL_OP1_102_95637_20120329_225942_inLine +BABEL_OP1_102_95637_20120329_225942_outLine +BABEL_OP1_102_95650_20120327_230850_inLine +BABEL_OP1_102_95650_20120327_230850_outLine +BABEL_OP1_102_95815_20120401_233401_inLine +BABEL_OP1_102_95815_20120401_233401_outLine +BABEL_OP1_102_95849_20121106_222829_inLine +BABEL_OP1_102_95849_20121106_222829_outLine +BABEL_OP1_102_95996_20120427_174020_inLine +BABEL_OP1_102_95996_20120427_174020_outLine +BABEL_OP1_102_96216_20120412_193323_inLine +BABEL_OP1_102_96216_20120412_193323_outLine +BABEL_OP1_102_96283_20120720_021526_inLine +BABEL_OP1_102_96283_20120720_021526_outLine +BABEL_OP1_102_96347_20120330_201932_inLine +BABEL_OP1_102_96347_20120330_201932_outLine +BABEL_OP1_102_96788_20120411_183347_inLine +BABEL_OP1_102_96788_20120411_183347_outLine +BABEL_OP1_102_97004_20121107_210600_inLine +BABEL_OP1_102_97004_20121107_210600_outLine +BABEL_OP1_102_97260_20120409_175649_inLine +BABEL_OP1_102_97260_20120409_175649_outLine +BABEL_OP1_102_97590_20121110_214746_inLine +BABEL_OP1_102_97590_20121110_214746_outLine +BABEL_OP1_102_97590_20121110_215543_inLine +BABEL_OP1_102_97590_20121110_215543_outLine +BABEL_OP1_102_97760_20121010_154720_inLine +BABEL_OP1_102_97760_20121010_154720_outLine +BABEL_OP1_102_98402_20121112_014920_inLine +BABEL_OP1_102_98402_20121112_014920_outLine +BABEL_OP1_102_98640_20120930_211930_inLine +BABEL_OP1_102_98640_20120930_211930_outLine +BABEL_OP1_102_98675_20130209_215547_inLine +BABEL_OP1_102_98675_20130209_215547_outLine +BABEL_OP1_102_99514_20120406_182505_inLine +BABEL_OP1_102_99514_20120406_182505_outLine +BABEL_OP1_102_99709_20120429_201437_inLine +BABEL_OP1_102_99709_20120429_201437_outLine +BABEL_OP1_102_99709_20120429_202748_inLine +BABEL_OP1_102_99709_20120429_202748_outLine +BABEL_OP1_102_99731_20121220_214209_inLine +BABEL_OP1_102_99731_20121220_214209_outLine +BABEL_OP1_102_99823_20120429_181728_inLine +BABEL_OP1_102_99823_20120429_181728_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list new file mode 100644 index 00000000000..138e2c7651f --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list @@ -0,0 +1,138 @@ +BABEL_OP1_102_10713_20120401_204236_inLine +BABEL_OP1_102_10713_20120401_204236_outLine +BABEL_OP1_102_11031_20120926_231829_inLine +BABEL_OP1_102_11031_20120926_231829_outLine +BABEL_OP1_102_12655_20120318_171708_inLine +BABEL_OP1_102_12655_20120318_171708_outLine +BABEL_OP1_102_14874_20120417_153112_inLine +BABEL_OP1_102_14874_20120417_153112_outLine +BABEL_OP1_102_15493_20130127_203044_inLine +BABEL_OP1_102_15493_20130127_203044_outLine +BABEL_OP1_102_16185_20121105_042129_inLine +BABEL_OP1_102_16185_20121105_042129_outLine +BABEL_OP1_102_17207_20120729_230128_inLine +BABEL_OP1_102_17207_20120729_230128_outLine +BABEL_OP1_102_18344_20121109_192858_inLine +BABEL_OP1_102_18344_20121109_192858_outLine +BABEL_OP1_102_19063_20130209_231415_inLine +BABEL_OP1_102_19063_20130209_231415_outLine +BABEL_OP1_102_19758_20120417_174950_inLine +BABEL_OP1_102_19758_20120417_174950_outLine +BABEL_OP1_102_29512_20120805_170123_inLine +BABEL_OP1_102_29512_20120805_170123_outLine +BABEL_OP1_102_29512_20120805_172610_inLine +BABEL_OP1_102_29512_20120805_172610_outLine +BABEL_OP1_102_30530_20120330_173152_inLine +BABEL_OP1_102_30530_20120330_173152_outLine +BABEL_OP1_102_32011_20121014_024351_inLine +BABEL_OP1_102_32011_20121014_024351_outLine +BABEL_OP1_102_34194_20120330_182542_inLine +BABEL_OP1_102_34194_20120330_182542_outLine +BABEL_OP1_102_37524_20120329_182549_inLine +BABEL_OP1_102_37524_20120329_182549_outLine +BABEL_OP1_102_38464_20121012_023702_inLine +BABEL_OP1_102_38464_20121012_023702_outLine +BABEL_OP1_102_38635_20120607_010931_inLine +BABEL_OP1_102_38635_20120607_010931_outLine +BABEL_OP1_102_38985_20120806_174824_inLine +BABEL_OP1_102_38985_20120806_174824_outLine +BABEL_OP1_102_38985_20120806_181000_inLine +BABEL_OP1_102_38985_20120806_181000_outLine +BABEL_OP1_102_39098_20120405_203729_inLine +BABEL_OP1_102_39098_20120405_203729_outLine +BABEL_OP1_102_45227_20120329_003400_inLine +BABEL_OP1_102_45227_20120329_003400_outLine +BABEL_OP1_102_46521_20120411_193429_inLine +BABEL_OP1_102_46521_20120411_193429_outLine +BABEL_OP1_102_48281_20120411_214725_inLine +BABEL_OP1_102_48281_20120411_214725_outLine +BABEL_OP1_102_50416_20120803_215223_inLine +BABEL_OP1_102_50416_20120803_215223_outLine +BABEL_OP1_102_51149_20121227_201136_inLine +BABEL_OP1_102_51149_20121227_201136_outLine +BABEL_OP1_102_53429_20121224_202431_inLine +BABEL_OP1_102_53429_20121224_202431_outLine +BABEL_OP1_102_55399_20120409_211258_inLine +BABEL_OP1_102_55399_20120409_211258_outLine +BABEL_OP1_102_59628_20121106_031543_inLine +BABEL_OP1_102_59628_20121106_031543_outLine +BABEL_OP1_102_61936_20121224_175007_inLine +BABEL_OP1_102_61936_20121224_175007_outLine +BABEL_OP1_102_65601_20120427_193019_inLine +BABEL_OP1_102_65601_20120427_193019_outLine +BABEL_OP1_102_66330_20120429_164154_inLine +BABEL_OP1_102_66330_20120429_164154_outLine +BABEL_OP1_102_66330_20120429_164900_inLine +BABEL_OP1_102_66330_20120429_164900_outLine +BABEL_OP1_102_69446_20130130_183941_inLine +BABEL_OP1_102_69446_20130130_183941_outLine +BABEL_OP1_102_70077_20121222_173141_inLine +BABEL_OP1_102_70077_20121222_173141_outLine +BABEL_OP1_102_71844_20120331_200325_inLine +BABEL_OP1_102_71844_20120331_200325_outLine +BABEL_OP1_102_73059_20121225_162645_inLine +BABEL_OP1_102_73059_20121225_162645_outLine +BABEL_OP1_102_73059_20121225_163932_inLine +BABEL_OP1_102_73059_20121225_163932_outLine +BABEL_OP1_102_77207_20120804_174005_inLine +BABEL_OP1_102_77207_20120804_174005_outLine +BABEL_OP1_102_79618_20120401_204258_inLine +BABEL_OP1_102_79618_20120401_204258_outLine +BABEL_OP1_102_79698_20121106_212429_inLine +BABEL_OP1_102_79698_20121106_212429_outLine +BABEL_OP1_102_80174_20130211_031725_inLine +BABEL_OP1_102_80174_20130211_031725_outLine +BABEL_OP1_102_81611_20121110_221005_inLine +BABEL_OP1_102_81611_20121110_221005_outLine +BABEL_OP1_102_82880_20121108_173528_inLine +BABEL_OP1_102_82880_20121108_173528_outLine +BABEL_OP1_102_85204_20120329_192035_inLine +BABEL_OP1_102_85204_20120329_192035_outLine +BABEL_OP1_102_86227_20120401_195417_inLine +BABEL_OP1_102_86227_20120401_195417_outLine +BABEL_OP1_102_86956_20120401_173752_inLine +BABEL_OP1_102_86956_20120401_173752_outLine +BABEL_OP1_102_87481_20121027_153449_inLine +BABEL_OP1_102_87481_20121027_153449_outLine +BABEL_OP1_102_87486_20120406_200642_inLine +BABEL_OP1_102_87486_20120406_200642_outLine +BABEL_OP1_102_87806_20120409_183938_inLine +BABEL_OP1_102_87806_20120409_183938_outLine +BABEL_OP1_102_89583_20121011_013631_inLine +BABEL_OP1_102_89583_20121011_013631_outLine +BABEL_OP1_102_89583_20121012_014745_inLine +BABEL_OP1_102_89583_20121012_014745_outLine +BABEL_OP1_102_89838_20120409_214411_inLine +BABEL_OP1_102_89838_20120409_214411_outLine +BABEL_OP1_102_90055_20120405_192435_inLine +BABEL_OP1_102_90055_20120405_192435_outLine +BABEL_OP1_102_90389_20121012_050118_inLine +BABEL_OP1_102_90389_20121012_050118_outLine +BABEL_OP1_102_90609_20120410_184424_inLine +BABEL_OP1_102_90609_20120410_184424_outLine +BABEL_OP1_102_92591_20120818_164613_inLine +BABEL_OP1_102_92591_20120818_164613_outLine +BABEL_OP1_102_92591_20120818_170346_inLine +BABEL_OP1_102_92591_20120818_170346_outLine +BABEL_OP1_102_92591_20120818_171559_inLine +BABEL_OP1_102_92591_20120818_171559_outLine +BABEL_OP1_102_93151_20120410_200907_inLine +BABEL_OP1_102_93151_20120410_200907_outLine +BABEL_OP1_102_93277_20121028_025007_inLine +BABEL_OP1_102_93277_20121028_025007_outLine +BABEL_OP1_102_93277_20121106_011048_inLine +BABEL_OP1_102_93277_20121106_011048_outLine +BABEL_OP1_102_95996_20120427_174020_inLine +BABEL_OP1_102_95996_20120427_174020_outLine +BABEL_OP1_102_96216_20120412_193323_inLine +BABEL_OP1_102_96216_20120412_193323_outLine +BABEL_OP1_102_97004_20121107_210600_inLine +BABEL_OP1_102_97004_20121107_210600_outLine +BABEL_OP1_102_97760_20121010_154720_inLine +BABEL_OP1_102_97760_20121010_154720_outLine +BABEL_OP1_102_98640_20120930_211930_inLine +BABEL_OP1_102_98640_20120930_211930_outLine +BABEL_OP1_102_99709_20120429_201437_inLine +BABEL_OP1_102_99709_20120429_201437_outLine +BABEL_OP1_102_99709_20120429_202748_inLine +BABEL_OP1_102_99709_20120429_202748_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..f363d1b4216 --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list @@ -0,0 +1,652 @@ +BABEL_OP1_102_10187_20120405_173448_inLine +BABEL_OP1_102_10187_20120405_173448_outLine +BABEL_OP1_102_10271_20120729_173749_inLine +BABEL_OP1_102_10271_20120729_173749_outLine +BABEL_OP1_102_11004_20120420_213442_inLine +BABEL_OP1_102_11004_20120420_213442_outLine +BABEL_OP1_102_11036_20120406_202335_inLine +BABEL_OP1_102_11036_20120406_202335_outLine +BABEL_OP1_102_11158_20121008_011850_inLine +BABEL_OP1_102_11158_20121008_011850_outLine +BABEL_OP1_102_11371_20120327_175933_inLine +BABEL_OP1_102_11371_20120327_175933_outLine +BABEL_OP1_102_11521_20121005_005530_inLine +BABEL_OP1_102_11521_20121005_005530_outLine +BABEL_OP1_102_11694_20121108_184639_inLine +BABEL_OP1_102_11694_20121108_184639_outLine +BABEL_OP1_102_12120_20121105_205527_inLine +BABEL_OP1_102_12120_20121105_205527_outLine +BABEL_OP1_102_12486_20121009_231421_inLine +BABEL_OP1_102_12486_20121009_231421_outLine +BABEL_OP1_102_12535_20121009_024245_inLine +BABEL_OP1_102_12535_20121009_024245_outLine +BABEL_OP1_102_12552_20120727_023454_inLine +BABEL_OP1_102_12552_20120727_023454_outLine +BABEL_OP1_102_12643_20121108_184648_inLine +BABEL_OP1_102_12643_20121108_184648_outLine +BABEL_OP1_102_12844_20120411_193813_inLine +BABEL_OP1_102_12844_20120411_193813_outLine +BABEL_OP1_102_13229_20130127_023814_inLine +BABEL_OP1_102_13229_20130127_023814_outLine +BABEL_OP1_102_13389_20120406_184440_inLine +BABEL_OP1_102_13389_20120406_184440_outLine +BABEL_OP1_102_13702_20130121_185149_inLine +BABEL_OP1_102_13702_20130121_185149_outLine +BABEL_OP1_102_13913_20120807_001423_inLine +BABEL_OP1_102_13913_20120807_001423_outLine +BABEL_OP1_102_14769_20120926_165746_inLine +BABEL_OP1_102_14769_20120926_165746_outLine +BABEL_OP1_102_14891_20121009_003232_inLine +BABEL_OP1_102_14891_20121009_003232_outLine +BABEL_OP1_102_15146_20120318_184752_inLine +BABEL_OP1_102_15146_20120318_184752_outLine +BABEL_OP1_102_15234_20121108_022333_inLine +BABEL_OP1_102_15234_20121108_022333_outLine +BABEL_OP1_102_15502_20120419_233859_inLine +BABEL_OP1_102_15502_20120419_233859_outLine +BABEL_OP1_102_15502_20120420_000213_inLine +BABEL_OP1_102_15502_20120420_000213_outLine +BABEL_OP1_102_15881_20120331_215830_inLine +BABEL_OP1_102_15881_20120331_215830_outLine +BABEL_OP1_102_15916_20120428_221806_inLine +BABEL_OP1_102_15916_20120428_221806_outLine +BABEL_OP1_102_16167_20130122_175936_inLine +BABEL_OP1_102_16167_20130122_175936_outLine +BABEL_OP1_102_16313_20120331_215132_inLine +BABEL_OP1_102_16313_20120331_215132_outLine +BABEL_OP1_102_16669_20120327_202211_inLine +BABEL_OP1_102_16669_20120327_202211_outLine +BABEL_OP1_102_17013_20121105_230820_inLine +BABEL_OP1_102_17013_20121105_230820_outLine +BABEL_OP1_102_17203_20121221_161532_inLine +BABEL_OP1_102_17203_20121221_161532_outLine +BABEL_OP1_102_17572_20120806_235812_inLine +BABEL_OP1_102_17572_20120806_235812_outLine +BABEL_OP1_102_17933_20120607_184111_inLine +BABEL_OP1_102_17933_20120607_184111_outLine +BABEL_OP1_102_18534_20121105_185859_inLine +BABEL_OP1_102_18534_20121105_185859_outLine +BABEL_OP1_102_18730_20130122_171244_inLine +BABEL_OP1_102_18730_20130122_171244_outLine +BABEL_OP1_102_18802_20121104_232940_inLine +BABEL_OP1_102_18802_20121104_232940_outLine +BABEL_OP1_102_19147_20120329_190609_inLine +BABEL_OP1_102_19147_20120329_190609_outLine +BABEL_OP1_102_19456_20121110_201037_inLine +BABEL_OP1_102_19456_20121110_201037_outLine +BABEL_OP1_102_19731_20130123_200845_inLine +BABEL_OP1_102_19731_20130123_200845_outLine +BABEL_OP1_102_19867_20130127_211111_inLine +BABEL_OP1_102_19867_20130127_211111_outLine +BABEL_OP1_102_20271_20120410_205746_inLine +BABEL_OP1_102_20271_20120410_205746_outLine +BABEL_OP1_102_20320_20120409_212129_inLine +BABEL_OP1_102_20320_20120409_212129_outLine +BABEL_OP1_102_20320_20120409_214042_inLine +BABEL_OP1_102_20320_20120409_214042_outLine +BABEL_OP1_102_20454_20121010_020017_inLine +BABEL_OP1_102_20454_20121010_020017_outLine +BABEL_OP1_102_20591_20120806_210212_inLine +BABEL_OP1_102_20591_20120806_210212_outLine +BABEL_OP1_102_21050_20120619_010126_inLine +BABEL_OP1_102_21050_20120619_010126_outLine +BABEL_OP1_102_21477_20120417_212152_inLine +BABEL_OP1_102_21477_20120417_212152_outLine +BABEL_OP1_102_21518_20120805_195607_inLine +BABEL_OP1_102_21518_20120805_195607_outLine +BABEL_OP1_102_21758_20120823_164553_inLine +BABEL_OP1_102_21758_20120823_164553_outLine +BABEL_OP1_102_21782_20120422_184156_inLine +BABEL_OP1_102_21782_20120422_184156_outLine +BABEL_OP1_102_22401_20121017_023338_inLine +BABEL_OP1_102_22401_20121017_023338_outLine +BABEL_OP1_102_22408_20120426_225012_inLine +BABEL_OP1_102_22408_20120426_225012_outLine +BABEL_OP1_102_23167_20120329_204718_inLine +BABEL_OP1_102_23167_20120329_204718_outLine +BABEL_OP1_102_24420_20120624_013709_inLine +BABEL_OP1_102_24420_20120624_013709_outLine +BABEL_OP1_102_24661_20121104_224032_inLine +BABEL_OP1_102_24661_20121104_224032_outLine +BABEL_OP1_102_24833_20120410_172706_inLine +BABEL_OP1_102_24833_20120410_172706_outLine +BABEL_OP1_102_25236_20120804_180700_inLine +BABEL_OP1_102_25236_20120804_180700_outLine +BABEL_OP1_102_25576_20120422_180912_inLine +BABEL_OP1_102_25576_20120422_180912_outLine +BABEL_OP1_102_25904_20120611_203203_inLine +BABEL_OP1_102_25904_20120611_203203_outLine +BABEL_OP1_102_25934_20120329_005438_inLine +BABEL_OP1_102_25934_20120329_005438_outLine +BABEL_OP1_102_26348_20121109_170513_inLine +BABEL_OP1_102_26348_20121109_170513_outLine +BABEL_OP1_102_27007_20120611_223823_inLine +BABEL_OP1_102_27007_20120611_223823_outLine +BABEL_OP1_102_27349_20120422_192337_inLine +BABEL_OP1_102_27349_20120422_192337_outLine +BABEL_OP1_102_27427_20120412_182452_inLine +BABEL_OP1_102_27427_20120412_182452_outLine +BABEL_OP1_102_27824_20120427_201104_inLine +BABEL_OP1_102_27824_20120427_201104_outLine +BABEL_OP1_102_27890_20121002_030324_inLine +BABEL_OP1_102_27890_20121002_030324_outLine +BABEL_OP1_102_28016_20120430_193141_inLine +BABEL_OP1_102_28016_20120430_193141_outLine +BABEL_OP1_102_28016_20120430_194530_inLine +BABEL_OP1_102_28016_20120430_194530_outLine +BABEL_OP1_102_28107_20120327_204144_inLine +BABEL_OP1_102_28107_20120327_204144_outLine +BABEL_OP1_102_28204_20120401_204624_inLine +BABEL_OP1_102_28204_20120401_204624_outLine +BABEL_OP1_102_28260_20120329_210829_inLine +BABEL_OP1_102_28260_20120329_210829_outLine +BABEL_OP1_102_28648_20120608_192702_inLine +BABEL_OP1_102_28648_20120608_192702_outLine +BABEL_OP1_102_29168_20120411_174248_inLine +BABEL_OP1_102_29168_20120411_174248_outLine +BABEL_OP1_102_29259_20120612_211621_inLine +BABEL_OP1_102_29259_20120612_211621_outLine +BABEL_OP1_102_29335_20120609_182335_inLine +BABEL_OP1_102_29335_20120609_182335_outLine +BABEL_OP1_102_29335_20120609_183151_inLine +BABEL_OP1_102_29335_20120609_183151_outLine +BABEL_OP1_102_29444_20120331_231513_inLine +BABEL_OP1_102_29444_20120331_231513_outLine +BABEL_OP1_102_29444_20120331_233317_inLine +BABEL_OP1_102_29444_20120331_233317_outLine +BABEL_OP1_102_29545_20121105_220136_inLine +BABEL_OP1_102_29545_20121105_220136_outLine +BABEL_OP1_102_29959_20130128_195931_inLine +BABEL_OP1_102_29959_20130128_195931_outLine +BABEL_OP1_102_29959_20130128_223813_inLine +BABEL_OP1_102_29959_20130128_223813_outLine +BABEL_OP1_102_30266_20120331_212330_inLine +BABEL_OP1_102_30266_20120331_212330_outLine +BABEL_OP1_102_30722_20121011_013755_inLine +BABEL_OP1_102_30722_20121011_013755_outLine +BABEL_OP1_102_31031_20120611_193208_inLine +BABEL_OP1_102_31031_20120611_193208_outLine +BABEL_OP1_102_31902_20120425_211816_inLine +BABEL_OP1_102_31902_20120425_211816_outLine +BABEL_OP1_102_31917_20120611_195339_inLine +BABEL_OP1_102_31917_20120611_195339_outLine +BABEL_OP1_102_32562_20121010_014014_inLine +BABEL_OP1_102_32562_20121010_014014_outLine +BABEL_OP1_102_32642_20121104_220528_inLine +BABEL_OP1_102_32642_20121104_220528_outLine +BABEL_OP1_102_33023_20120329_224858_inLine +BABEL_OP1_102_33023_20120329_224858_outLine +BABEL_OP1_102_33540_20120401_212225_inLine +BABEL_OP1_102_33540_20120401_212225_outLine +BABEL_OP1_102_33671_20120422_231219_inLine +BABEL_OP1_102_33671_20120422_231219_outLine +BABEL_OP1_102_34169_20120331_183840_inLine +BABEL_OP1_102_34169_20120331_183840_outLine +BABEL_OP1_102_34235_20120405_190745_inLine +BABEL_OP1_102_34235_20120405_190745_outLine +BABEL_OP1_102_34480_20121012_193452_inLine +BABEL_OP1_102_34480_20121012_193452_outLine +BABEL_OP1_102_34590_20120417_151435_inLine +BABEL_OP1_102_34590_20120417_151435_outLine +BABEL_OP1_102_34590_20120417_155556_inLine +BABEL_OP1_102_34590_20120417_155556_outLine +BABEL_OP1_102_34930_20120411_200043_inLine +BABEL_OP1_102_34930_20120411_200043_outLine +BABEL_OP1_102_35011_20120420_020024_inLine +BABEL_OP1_102_35011_20120420_020024_outLine +BABEL_OP1_102_35229_20121106_204019_inLine +BABEL_OP1_102_35229_20121106_204019_outLine +BABEL_OP1_102_35324_20120426_180016_inLine +BABEL_OP1_102_35324_20120426_180016_outLine +BABEL_OP1_102_35324_20120426_203214_inLine +BABEL_OP1_102_35324_20120426_203214_outLine +BABEL_OP1_102_35455_20121112_000231_inLine +BABEL_OP1_102_35455_20121112_000231_outLine +BABEL_OP1_102_36868_20130209_201544_inLine +BABEL_OP1_102_36868_20130209_201544_outLine +BABEL_OP1_102_37260_20120808_012733_inLine +BABEL_OP1_102_37260_20120808_012733_outLine +BABEL_OP1_102_37260_20120808_014150_inLine +BABEL_OP1_102_37260_20120808_014150_outLine +BABEL_OP1_102_37268_20121226_203217_inLine +BABEL_OP1_102_37268_20121226_203217_outLine +BABEL_OP1_102_37285_20120405_223443_inLine +BABEL_OP1_102_37285_20120405_223443_outLine +BABEL_OP1_102_37444_20130128_032426_inLine +BABEL_OP1_102_37444_20130128_032426_outLine +BABEL_OP1_102_37461_20120409_191629_inLine +BABEL_OP1_102_37461_20120409_191629_outLine +BABEL_OP1_102_37461_20120409_194138_inLine +BABEL_OP1_102_37461_20120409_194138_outLine +BABEL_OP1_102_37461_20120409_195519_inLine +BABEL_OP1_102_37461_20120409_195519_outLine +BABEL_OP1_102_38264_20121105_050622_inLine +BABEL_OP1_102_38264_20121105_050622_outLine +BABEL_OP1_102_38592_20121225_215825_inLine +BABEL_OP1_102_38592_20121225_215825_outLine +BABEL_OP1_102_38698_20120401_215032_inLine +BABEL_OP1_102_38698_20120401_215032_outLine +BABEL_OP1_102_38863_20121011_183009_inLine +BABEL_OP1_102_38863_20121011_183009_outLine +BABEL_OP1_102_39114_20120930_180045_inLine +BABEL_OP1_102_39114_20120930_180045_outLine +BABEL_OP1_102_39364_20121105_220855_inLine +BABEL_OP1_102_39364_20121105_220855_outLine +BABEL_OP1_102_39430_20120411_182026_inLine +BABEL_OP1_102_39430_20120411_182026_outLine +BABEL_OP1_102_39430_20120411_184729_inLine +BABEL_OP1_102_39430_20120411_184729_outLine +BABEL_OP1_102_40133_20121112_214034_inLine +BABEL_OP1_102_40133_20121112_214034_outLine +BABEL_OP1_102_40168_20120428_173400_inLine +BABEL_OP1_102_40168_20120428_173400_outLine +BABEL_OP1_102_40882_20130209_204142_inLine +BABEL_OP1_102_40882_20130209_204142_outLine +BABEL_OP1_102_41561_20121111_220752_inLine +BABEL_OP1_102_41561_20121111_220752_outLine +BABEL_OP1_102_41949_20120426_222144_inLine +BABEL_OP1_102_41949_20120426_222144_outLine +BABEL_OP1_102_42615_20120327_180819_inLine +BABEL_OP1_102_42615_20120327_180819_outLine +BABEL_OP1_102_42651_20120409_221530_inLine +BABEL_OP1_102_42651_20120409_221530_outLine +BABEL_OP1_102_42749_20121114_005458_inLine +BABEL_OP1_102_42749_20121114_005458_outLine +BABEL_OP1_102_42749_20121114_010754_inLine +BABEL_OP1_102_42749_20121114_010754_outLine +BABEL_OP1_102_43383_20120406_193121_inLine +BABEL_OP1_102_43383_20120406_193121_outLine +BABEL_OP1_102_43423_20120919_201131_inLine +BABEL_OP1_102_43423_20120919_201131_outLine +BABEL_OP1_102_43426_20120501_170331_inLine +BABEL_OP1_102_43426_20120501_170331_outLine +BABEL_OP1_102_43553_20120408_174809_inLine +BABEL_OP1_102_43553_20120408_174809_outLine +BABEL_OP1_102_43652_20120428_191659_inLine +BABEL_OP1_102_43652_20120428_191659_outLine +BABEL_OP1_102_44649_20120611_185930_inLine +BABEL_OP1_102_44649_20120611_185930_outLine +BABEL_OP1_102_44829_20120907_011054_inLine +BABEL_OP1_102_44829_20120907_011054_outLine +BABEL_OP1_102_44829_20120907_013730_inLine +BABEL_OP1_102_44829_20120907_013730_outLine +BABEL_OP1_102_45361_20120611_222502_inLine +BABEL_OP1_102_45361_20120611_222502_outLine +BABEL_OP1_102_45677_20130123_192645_inLine +BABEL_OP1_102_45677_20130123_192645_outLine +BABEL_OP1_102_45681_20120623_173741_inLine +BABEL_OP1_102_45681_20120623_173741_outLine +BABEL_OP1_102_45738_20120806_202458_inLine +BABEL_OP1_102_45738_20120806_202458_outLine +BABEL_OP1_102_45892_20120408_220557_inLine +BABEL_OP1_102_45892_20120408_220557_outLine +BABEL_OP1_102_45931_20120421_233726_inLine +BABEL_OP1_102_45931_20120421_233726_outLine +BABEL_OP1_102_46002_20121009_215715_inLine +BABEL_OP1_102_46002_20121009_215715_outLine +BABEL_OP1_102_46269_20121110_215228_inLine +BABEL_OP1_102_46269_20121110_215228_outLine +BABEL_OP1_102_47634_20120408_214325_inLine +BABEL_OP1_102_47634_20120408_214325_outLine +BABEL_OP1_102_47823_20120804_180038_inLine +BABEL_OP1_102_47823_20120804_180038_outLine +BABEL_OP1_102_48410_20120407_204734_inLine +BABEL_OP1_102_48410_20120407_204734_outLine +BABEL_OP1_102_48976_20120410_161651_inLine +BABEL_OP1_102_48976_20120410_161651_outLine +BABEL_OP1_102_49042_20120408_165038_inLine +BABEL_OP1_102_49042_20120408_165038_outLine +BABEL_OP1_102_49628_20120817_204731_inLine +BABEL_OP1_102_49628_20120817_204731_outLine +BABEL_OP1_102_49864_20120421_155657_inLine +BABEL_OP1_102_49864_20120421_155657_outLine +BABEL_OP1_102_50555_20120606_224819_inLine +BABEL_OP1_102_50555_20120606_224819_outLine +BABEL_OP1_102_50597_20120623_193352_inLine +BABEL_OP1_102_50597_20120623_193352_outLine +BABEL_OP1_102_50718_20120421_191449_inLine +BABEL_OP1_102_50718_20120421_191449_outLine +BABEL_OP1_102_50752_20121227_204235_inLine +BABEL_OP1_102_50752_20121227_204235_outLine +BABEL_OP1_102_50763_20120405_203621_inLine +BABEL_OP1_102_50763_20120405_203621_outLine +BABEL_OP1_102_50798_20120426_190454_inLine +BABEL_OP1_102_50798_20120426_190454_outLine +BABEL_OP1_102_52335_20130123_183229_inLine +BABEL_OP1_102_52335_20130123_183229_outLine +BABEL_OP1_102_52606_20121009_222016_inLine +BABEL_OP1_102_52606_20121009_222016_outLine +BABEL_OP1_102_52642_20120803_212045_inLine +BABEL_OP1_102_52642_20120803_212045_outLine +BABEL_OP1_102_52691_20120407_210408_inLine +BABEL_OP1_102_52691_20120407_210408_outLine +BABEL_OP1_102_52691_20120407_211728_inLine +BABEL_OP1_102_52691_20120407_211728_outLine +BABEL_OP1_102_52691_20120407_213757_inLine +BABEL_OP1_102_52691_20120407_213757_outLine +BABEL_OP1_102_52902_20120607_175045_inLine +BABEL_OP1_102_52902_20120607_175045_outLine +BABEL_OP1_102_52902_20120607_180239_inLine +BABEL_OP1_102_52902_20120607_180239_outLine +BABEL_OP1_102_53500_20120428_175953_inLine +BABEL_OP1_102_53500_20120428_175953_outLine +BABEL_OP1_102_53703_20120409_180047_inLine +BABEL_OP1_102_53703_20120409_180047_outLine +BABEL_OP1_102_53982_20120607_220642_inLine +BABEL_OP1_102_53982_20120607_220642_outLine +BABEL_OP1_102_54241_20120911_024357_inLine +BABEL_OP1_102_54241_20120911_024357_outLine +BABEL_OP1_102_54241_20120911_025705_inLine +BABEL_OP1_102_54241_20120911_025705_outLine +BABEL_OP1_102_55182_20120330_201037_inLine +BABEL_OP1_102_55182_20120330_201037_outLine +BABEL_OP1_102_55450_20121013_171507_inLine +BABEL_OP1_102_55450_20121013_171507_outLine +BABEL_OP1_102_55470_20120429_194956_inLine +BABEL_OP1_102_55470_20120429_194956_outLine +BABEL_OP1_102_55823_20121010_005200_inLine +BABEL_OP1_102_55823_20121010_005200_outLine +BABEL_OP1_102_55874_20121108_215431_inLine +BABEL_OP1_102_55874_20121108_215431_outLine +BABEL_OP1_102_56070_20120410_224512_inLine +BABEL_OP1_102_56070_20120410_224512_outLine +BABEL_OP1_102_56648_20120615_181652_inLine +BABEL_OP1_102_56648_20120615_181652_outLine +BABEL_OP1_102_56812_20121010_203710_inLine +BABEL_OP1_102_56812_20121010_203710_outLine +BABEL_OP1_102_56943_20121221_203039_inLine +BABEL_OP1_102_56943_20121221_203039_outLine +BABEL_OP1_102_57039_20121107_201157_inLine +BABEL_OP1_102_57039_20121107_201157_outLine +BABEL_OP1_102_57422_20120607_213941_inLine +BABEL_OP1_102_57422_20120607_213941_outLine +BABEL_OP1_102_57629_20121010_011015_inLine +BABEL_OP1_102_57629_20121010_011015_outLine +BABEL_OP1_102_57907_20121013_035627_inLine +BABEL_OP1_102_57907_20121013_035627_outLine +BABEL_OP1_102_58715_20120425_190758_inLine +BABEL_OP1_102_58715_20120425_190758_outLine +BABEL_OP1_102_58863_20120404_195038_inLine +BABEL_OP1_102_58863_20120404_195038_outLine +BABEL_OP1_102_58947_20121106_203812_inLine +BABEL_OP1_102_58947_20121106_203812_outLine +BABEL_OP1_102_58947_20121106_205338_inLine +BABEL_OP1_102_58947_20121106_205338_outLine +BABEL_OP1_102_59169_20120611_172953_inLine +BABEL_OP1_102_59169_20120611_172953_outLine +BABEL_OP1_102_59383_20121220_151350_inLine +BABEL_OP1_102_59383_20121220_151350_outLine +BABEL_OP1_102_59891_20120611_212238_inLine +BABEL_OP1_102_59891_20120611_212238_outLine +BABEL_OP1_102_59925_20121111_214225_inLine +BABEL_OP1_102_59925_20121111_214225_outLine +BABEL_OP1_102_60193_20120419_201756_inLine +BABEL_OP1_102_60193_20120419_201756_outLine +BABEL_OP1_102_60277_20120615_195600_inLine +BABEL_OP1_102_60277_20120615_195600_outLine +BABEL_OP1_102_60826_20120606_231535_inLine +BABEL_OP1_102_60826_20120606_231535_outLine +BABEL_OP1_102_60848_20121110_170724_inLine +BABEL_OP1_102_60848_20121110_170724_outLine +BABEL_OP1_102_60881_20120401_212818_inLine +BABEL_OP1_102_60881_20120401_212818_outLine +BABEL_OP1_102_60995_20121107_203546_inLine +BABEL_OP1_102_60995_20121107_203546_outLine +BABEL_OP1_102_61263_20121112_213923_inLine +BABEL_OP1_102_61263_20121112_213923_outLine +BABEL_OP1_102_61446_20120420_184155_inLine +BABEL_OP1_102_61446_20120420_184155_outLine +BABEL_OP1_102_62132_20120614_214158_inLine +BABEL_OP1_102_62132_20120614_214158_outLine +BABEL_OP1_102_62923_20130122_190544_inLine +BABEL_OP1_102_62923_20130122_190544_outLine +BABEL_OP1_102_63076_20121224_225415_inLine +BABEL_OP1_102_63076_20121224_225415_outLine +BABEL_OP1_102_64185_20120722_220159_inLine +BABEL_OP1_102_64185_20120722_220159_outLine +BABEL_OP1_102_64351_20120608_202610_inLine +BABEL_OP1_102_64351_20120608_202610_outLine +BABEL_OP1_102_65248_20120317_180718_inLine +BABEL_OP1_102_65248_20120317_180718_outLine +BABEL_OP1_102_65273_20121226_233200_inLine +BABEL_OP1_102_65273_20121226_233200_outLine +BABEL_OP1_102_65371_20121228_213615_inLine +BABEL_OP1_102_65371_20121228_213615_outLine +BABEL_OP1_102_65415_20120410_193034_inLine +BABEL_OP1_102_65415_20120410_193034_outLine +BABEL_OP1_102_65580_20120320_234602_inLine +BABEL_OP1_102_65580_20120320_234602_outLine +BABEL_OP1_102_65837_20121106_201713_inLine +BABEL_OP1_102_65837_20121106_201713_outLine +BABEL_OP1_102_66416_20120817_204557_inLine +BABEL_OP1_102_66416_20120817_204557_outLine +BABEL_OP1_102_66441_20120411_170112_inLine +BABEL_OP1_102_66441_20120411_170112_outLine +BABEL_OP1_102_66559_20121227_172234_inLine +BABEL_OP1_102_66559_20121227_172234_outLine +BABEL_OP1_102_67150_20121106_232551_inLine +BABEL_OP1_102_67150_20121106_232551_outLine +BABEL_OP1_102_67733_20120409_192100_inLine +BABEL_OP1_102_67733_20120409_192100_outLine +BABEL_OP1_102_67750_20120330_210301_inLine +BABEL_OP1_102_67750_20120330_210301_outLine +BABEL_OP1_102_67798_20120408_211247_inLine +BABEL_OP1_102_67798_20120408_211247_outLine +BABEL_OP1_102_67916_20121224_185018_inLine +BABEL_OP1_102_67916_20121224_185018_outLine +BABEL_OP1_102_69049_20120422_174706_inLine +BABEL_OP1_102_69049_20120422_174706_outLine +BABEL_OP1_102_69145_20121006_214000_inLine +BABEL_OP1_102_69145_20121006_214000_outLine +BABEL_OP1_102_69275_20121009_000322_inLine +BABEL_OP1_102_69275_20121009_000322_outLine +BABEL_OP1_102_69368_20120328_214605_inLine +BABEL_OP1_102_69368_20120328_214605_outLine +BABEL_OP1_102_70555_20120421_203231_inLine +BABEL_OP1_102_70555_20120421_203231_outLine +BABEL_OP1_102_71778_20120608_222028_inLine +BABEL_OP1_102_71778_20120608_222028_outLine +BABEL_OP1_102_72032_20120329_225115_inLine +BABEL_OP1_102_72032_20120329_225115_outLine +BABEL_OP1_102_72718_20121010_030640_inLine +BABEL_OP1_102_72718_20121010_030640_outLine +BABEL_OP1_102_72799_20120428_225215_inLine +BABEL_OP1_102_72799_20120428_225215_outLine +BABEL_OP1_102_73050_20120929_012255_inLine +BABEL_OP1_102_73050_20120929_012255_outLine +BABEL_OP1_102_73438_20121103_170431_inLine +BABEL_OP1_102_73438_20121103_170431_outLine +BABEL_OP1_102_73440_20120428_195653_inLine +BABEL_OP1_102_73440_20120428_195653_outLine +BABEL_OP1_102_73452_20121003_021245_inLine +BABEL_OP1_102_73452_20121003_021245_outLine +BABEL_OP1_102_73786_20120420_171039_inLine +BABEL_OP1_102_73786_20120420_171039_outLine +BABEL_OP1_102_74043_20120422_170724_inLine +BABEL_OP1_102_74043_20120422_170724_outLine +BABEL_OP1_102_74368_20121008_041653_inLine +BABEL_OP1_102_74368_20121008_041653_outLine +BABEL_OP1_102_74709_20120806_191528_inLine +BABEL_OP1_102_74709_20120806_191528_outLine +BABEL_OP1_102_74823_20120330_181459_inLine +BABEL_OP1_102_74823_20120330_181459_outLine +BABEL_OP1_102_75140_20120330_171509_inLine +BABEL_OP1_102_75140_20120330_171509_outLine +BABEL_OP1_102_75354_20121105_033257_inLine +BABEL_OP1_102_75354_20121105_033257_outLine +BABEL_OP1_102_75498_20120806_180214_inLine +BABEL_OP1_102_75498_20120806_180214_outLine +BABEL_OP1_102_75680_20121110_180407_inLine +BABEL_OP1_102_75680_20121110_180407_outLine +BABEL_OP1_102_75990_20120426_182351_inLine +BABEL_OP1_102_75990_20120426_182351_outLine +BABEL_OP1_102_76331_20120806_185250_inLine +BABEL_OP1_102_76331_20120806_185250_outLine +BABEL_OP1_102_76451_20120329_193459_inLine +BABEL_OP1_102_76451_20120329_193459_outLine +BABEL_OP1_102_77244_20121001_003159_inLine +BABEL_OP1_102_77244_20121001_003159_outLine +BABEL_OP1_102_77465_20120607_001521_inLine +BABEL_OP1_102_77465_20120607_001521_outLine +BABEL_OP1_102_77771_20121227_191404_inLine +BABEL_OP1_102_77771_20121227_191404_outLine +BABEL_OP1_102_77811_20130123_215211_inLine +BABEL_OP1_102_77811_20130123_215211_outLine +BABEL_OP1_102_78514_20120409_182010_inLine +BABEL_OP1_102_78514_20120409_182010_outLine +BABEL_OP1_102_79495_20120320_011136_inLine +BABEL_OP1_102_79495_20120320_011136_outLine +BABEL_OP1_102_80868_20121028_015553_inLine +BABEL_OP1_102_80868_20121028_015553_outLine +BABEL_OP1_102_81084_20120406_191910_inLine +BABEL_OP1_102_81084_20120406_191910_outLine +BABEL_OP1_102_81587_20121225_213038_inLine +BABEL_OP1_102_81587_20121225_213038_outLine +BABEL_OP1_102_81717_20130209_201202_inLine +BABEL_OP1_102_81717_20130209_201202_outLine +BABEL_OP1_102_81878_20120331_181439_inLine +BABEL_OP1_102_81878_20120331_181439_outLine +BABEL_OP1_102_81878_20120331_182958_inLine +BABEL_OP1_102_81878_20120331_182958_outLine +BABEL_OP1_102_82009_20121104_013002_inLine +BABEL_OP1_102_82009_20121104_013002_outLine +BABEL_OP1_102_82023_20120330_175253_inLine +BABEL_OP1_102_82023_20120330_175253_outLine +BABEL_OP1_102_82192_20120429_180649_inLine +BABEL_OP1_102_82192_20120429_180649_outLine +BABEL_OP1_102_82408_20120402_190241_inLine +BABEL_OP1_102_82408_20120402_190241_outLine +BABEL_OP1_102_83256_20120330_210950_inLine +BABEL_OP1_102_83256_20120330_210950_outLine +BABEL_OP1_102_83493_20120429_172305_inLine +BABEL_OP1_102_83493_20120429_172305_outLine +BABEL_OP1_102_83493_20120429_175508_inLine +BABEL_OP1_102_83493_20120429_175508_outLine +BABEL_OP1_102_83531_20120408_201200_inLine +BABEL_OP1_102_83531_20120408_201200_outLine +BABEL_OP1_102_83531_20120408_203827_inLine +BABEL_OP1_102_83531_20120408_203827_outLine +BABEL_OP1_102_83634_20130123_212154_inLine +BABEL_OP1_102_83634_20130123_212154_outLine +BABEL_OP1_102_83791_20120420_215616_inLine +BABEL_OP1_102_83791_20120420_215616_outLine +BABEL_OP1_102_84088_20120328_180739_inLine +BABEL_OP1_102_84088_20120328_180739_outLine +BABEL_OP1_102_84284_20121225_175332_inLine +BABEL_OP1_102_84284_20121225_175332_outLine +BABEL_OP1_102_84397_20121110_230552_inLine +BABEL_OP1_102_84397_20121110_230552_outLine +BABEL_OP1_102_84439_20120427_184114_inLine +BABEL_OP1_102_84439_20120427_184114_outLine +BABEL_OP1_102_84608_20120609_194053_inLine +BABEL_OP1_102_84608_20120609_194053_outLine +BABEL_OP1_102_84943_20120401_170153_inLine +BABEL_OP1_102_84943_20120401_170153_outLine +BABEL_OP1_102_85716_20120401_165708_inLine +BABEL_OP1_102_85716_20120401_165708_outLine +BABEL_OP1_102_86004_20120418_230109_inLine +BABEL_OP1_102_86004_20120418_230109_outLine +BABEL_OP1_102_86886_20121112_201306_inLine +BABEL_OP1_102_86886_20121112_201306_outLine +BABEL_OP1_102_87234_20121224_212540_inLine +BABEL_OP1_102_87234_20121224_212540_outLine +BABEL_OP1_102_87857_20120405_202526_inLine +BABEL_OP1_102_87857_20120405_202526_outLine +BABEL_OP1_102_87961_20120423_155726_inLine +BABEL_OP1_102_87961_20120423_155726_outLine +BABEL_OP1_102_88163_20121112_003006_inLine +BABEL_OP1_102_88163_20121112_003006_outLine +BABEL_OP1_102_89993_20120607_175900_inLine +BABEL_OP1_102_89993_20120607_175900_outLine +BABEL_OP1_102_90393_20120419_214927_inLine +BABEL_OP1_102_90393_20120419_214927_outLine +BABEL_OP1_102_90511_20120329_224306_inLine +BABEL_OP1_102_90511_20120329_224306_outLine +BABEL_OP1_102_90810_20120404_221650_inLine +BABEL_OP1_102_90810_20120404_221650_outLine +BABEL_OP1_102_90819_20120614_222542_inLine +BABEL_OP1_102_90819_20120614_222542_outLine +BABEL_OP1_102_90890_20120320_235811_inLine +BABEL_OP1_102_90890_20120320_235811_outLine +BABEL_OP1_102_90975_20130127_194034_inLine +BABEL_OP1_102_90975_20130127_194034_outLine +BABEL_OP1_102_90975_20130127_195301_inLine +BABEL_OP1_102_90975_20130127_195301_outLine +BABEL_OP1_102_91171_20121222_000026_inLine +BABEL_OP1_102_91171_20121222_000026_outLine +BABEL_OP1_102_91358_20121103_191541_inLine +BABEL_OP1_102_91358_20121103_191541_outLine +BABEL_OP1_102_91386_20121226_175240_inLine +BABEL_OP1_102_91386_20121226_175240_outLine +BABEL_OP1_102_91401_20120409_195325_inLine +BABEL_OP1_102_91401_20120409_195325_outLine +BABEL_OP1_102_91481_20120806_232222_inLine +BABEL_OP1_102_91481_20120806_232222_outLine +BABEL_OP1_102_91865_20130127_193426_inLine +BABEL_OP1_102_91865_20130127_193426_outLine +BABEL_OP1_102_92002_20120821_172434_inLine +BABEL_OP1_102_92002_20120821_172434_outLine +BABEL_OP1_102_92252_20120805_193105_inLine +BABEL_OP1_102_92252_20120805_193105_outLine +BABEL_OP1_102_92252_20120805_202508_inLine +BABEL_OP1_102_92252_20120805_202508_outLine +BABEL_OP1_102_92321_20120729_204129_inLine +BABEL_OP1_102_92321_20120729_204129_outLine +BABEL_OP1_102_92386_20120401_175909_inLine +BABEL_OP1_102_92386_20120401_175909_outLine +BABEL_OP1_102_92407_20120330_180101_inLine +BABEL_OP1_102_92407_20120330_180101_outLine +BABEL_OP1_102_92628_20120404_212106_inLine +BABEL_OP1_102_92628_20120404_212106_outLine +BABEL_OP1_102_92800_20120408_165253_inLine +BABEL_OP1_102_92800_20120408_165253_outLine +BABEL_OP1_102_93091_20120425_204602_inLine +BABEL_OP1_102_93091_20120425_204602_outLine +BABEL_OP1_102_93091_20120425_205745_inLine +BABEL_OP1_102_93091_20120425_205745_outLine +BABEL_OP1_102_93509_20120422_230046_inLine +BABEL_OP1_102_93509_20120422_230046_outLine +BABEL_OP1_102_93607_20120806_194627_inLine +BABEL_OP1_102_93607_20120806_194627_outLine +BABEL_OP1_102_94162_20130209_213329_inLine +BABEL_OP1_102_94162_20130209_213329_outLine +BABEL_OP1_102_94542_20130122_222709_inLine +BABEL_OP1_102_94542_20130122_222709_outLine +BABEL_OP1_102_94694_20120611_183126_inLine +BABEL_OP1_102_94694_20120611_183126_outLine +BABEL_OP1_102_94696_20130127_183814_inLine +BABEL_OP1_102_94696_20130127_183814_outLine +BABEL_OP1_102_95350_20120420_225049_inLine +BABEL_OP1_102_95350_20120420_225049_outLine +BABEL_OP1_102_95566_20121222_024129_inLine +BABEL_OP1_102_95566_20121222_024129_outLine +BABEL_OP1_102_95637_20120329_225942_inLine +BABEL_OP1_102_95637_20120329_225942_outLine +BABEL_OP1_102_95650_20120327_230850_inLine +BABEL_OP1_102_95650_20120327_230850_outLine +BABEL_OP1_102_95815_20120401_233401_inLine +BABEL_OP1_102_95815_20120401_233401_outLine +BABEL_OP1_102_95849_20121106_222829_inLine +BABEL_OP1_102_95849_20121106_222829_outLine +BABEL_OP1_102_96283_20120720_021526_inLine +BABEL_OP1_102_96283_20120720_021526_outLine +BABEL_OP1_102_96347_20120330_201932_inLine +BABEL_OP1_102_96347_20120330_201932_outLine +BABEL_OP1_102_96788_20120411_183347_inLine +BABEL_OP1_102_96788_20120411_183347_outLine +BABEL_OP1_102_97260_20120409_175649_inLine +BABEL_OP1_102_97260_20120409_175649_outLine +BABEL_OP1_102_97590_20121110_214746_inLine +BABEL_OP1_102_97590_20121110_214746_outLine +BABEL_OP1_102_97590_20121110_215543_inLine +BABEL_OP1_102_97590_20121110_215543_outLine +BABEL_OP1_102_98402_20121112_014920_inLine +BABEL_OP1_102_98402_20121112_014920_outLine +BABEL_OP1_102_98675_20130209_215547_inLine +BABEL_OP1_102_98675_20130209_215547_outLine +BABEL_OP1_102_99514_20120406_182505_inLine +BABEL_OP1_102_99514_20120406_182505_outLine +BABEL_OP1_102_99731_20121220_214209_inLine +BABEL_OP1_102_99731_20121220_214209_outLine +BABEL_OP1_102_99823_20120429_181728_inLine +BABEL_OP1_102_99823_20120429_181728_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list b/egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list new file mode 100644 index 00000000000..f93c4c32be7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list @@ -0,0 +1,259 @@ +BABEL_OP1_102_11267_20120807_194639_inLine +BABEL_OP1_102_11267_20120807_194639_outLine +BABEL_OP1_102_11311_20120420_205813_inLine +BABEL_OP1_102_11311_20120420_205813_outLine +BABEL_OP1_102_14610_20120405_182316_inLine +BABEL_OP1_102_14610_20120405_182316_outLine +BABEL_OP1_102_14936_20120408_200722_inLine +BABEL_OP1_102_14936_20120408_200722_outLine +BABEL_OP1_102_16855_20121112_222619_inLine +BABEL_OP1_102_16855_20121112_222619_outLine +BABEL_OP1_102_17080_20120328_184723_inLine +BABEL_OP1_102_17080_20120328_184723_outLine +BABEL_OP1_102_19656_20120426_205905_inLine +BABEL_OP1_102_19656_20120426_205905_outLine +BABEL_OP1_102_22973_20121228_181929_inLine +BABEL_OP1_102_22973_20121228_181929_outLine +BABEL_OP1_102_24642_20121027_144752_inLine +BABEL_OP1_102_24642_20121027_144752_outLine +BABEL_OP1_102_24799_20120425_195004_inLine +BABEL_OP1_102_24799_20120425_195004_outLine +BABEL_OP1_102_25106_20120408_181647_inLine +BABEL_OP1_102_25106_20120408_181647_outLine +BABEL_OP1_102_25992_20120611_184443_inLine +BABEL_OP1_102_25992_20120611_184443_outLine +BABEL_OP1_102_26164_20121224_194642_inLine +BABEL_OP1_102_26164_20121224_194642_outLine +BABEL_OP1_102_27605_20120420_193239_inLine +BABEL_OP1_102_27605_20120420_193239_outLine +BABEL_OP1_102_27825_20120612_214044_inLine +BABEL_OP1_102_27825_20120612_214044_outLine +BABEL_OP1_102_27825_20120612_215834_inLine +BABEL_OP1_102_27825_20120612_215834_outLine +BABEL_OP1_102_27916_20121011_020742_inLine +BABEL_OP1_102_27916_20121011_020742_outLine +BABEL_OP1_102_29302_20120411_221747_inLine +BABEL_OP1_102_29302_20120411_221747_outLine +BABEL_OP1_102_29812_20120408_222336_inLine +BABEL_OP1_102_29812_20120408_222336_outLine +BABEL_OP1_102_30227_20121105_031526_inLine +BABEL_OP1_102_30227_20121105_031526_outLine +BABEL_OP1_102_31393_20120409_185950_inLine +BABEL_OP1_102_31393_20120409_185950_outLine +BABEL_OP1_102_31538_20120411_163952_inLine +BABEL_OP1_102_31538_20120411_163952_outLine +BABEL_OP1_102_31975_20120805_174531_inLine +BABEL_OP1_102_31975_20120805_174531_outLine +BABEL_OP1_102_32045_20120408_214902_inLine +BABEL_OP1_102_32045_20120408_214902_outLine +BABEL_OP1_102_32236_20130121_194429_inLine +BABEL_OP1_102_32236_20130121_194429_outLine +BABEL_OP1_102_32263_20120805_213534_inLine +BABEL_OP1_102_32263_20120805_213534_outLine +BABEL_OP1_102_32274_20120407_220211_inLine +BABEL_OP1_102_32274_20120407_220211_outLine +BABEL_OP1_102_34558_20120401_172719_inLine +BABEL_OP1_102_34558_20120401_172719_outLine +BABEL_OP1_102_35932_20120409_181050_inLine +BABEL_OP1_102_35932_20120409_181050_outLine +BABEL_OP1_102_35972_20120804_222857_inLine +BABEL_OP1_102_35972_20120804_222857_outLine +BABEL_OP1_102_36561_20120615_182603_inLine +BABEL_OP1_102_36561_20120615_182603_outLine +BABEL_OP1_102_37094_20120327_212647_inLine +BABEL_OP1_102_37094_20120327_212647_outLine +BABEL_OP1_102_37374_20120807_002505_inLine +BABEL_OP1_102_37374_20120807_002505_outLine +BABEL_OP1_102_37374_20120807_004102_inLine +BABEL_OP1_102_37374_20120807_004102_outLine +BABEL_OP1_102_39141_20121220_172812_inLine +BABEL_OP1_102_39141_20121220_172812_outLine +BABEL_OP1_102_39497_20120428_183546_inLine +BABEL_OP1_102_39497_20120428_183546_outLine +BABEL_OP1_102_39774_20121224_203424_inLine +BABEL_OP1_102_39774_20121224_203424_outLine +BABEL_OP1_102_40040_20120611_202254_inLine +BABEL_OP1_102_40040_20120611_202254_outLine +BABEL_OP1_102_41512_20121224_195155_inLine +BABEL_OP1_102_41512_20121224_195155_outLine +BABEL_OP1_102_41686_20120404_200841_inLine +BABEL_OP1_102_41686_20120404_200841_outLine +BABEL_OP1_102_42420_20121225_200910_inLine +BABEL_OP1_102_42420_20121225_200910_outLine +BABEL_OP1_102_43317_20120804_190955_inLine +BABEL_OP1_102_43317_20120804_190955_outLine +BABEL_OP1_102_44038_20121110_191648_inLine +BABEL_OP1_102_44038_20121110_191648_outLine +BABEL_OP1_102_44117_20121105_205012_inLine +BABEL_OP1_102_44117_20121105_205012_outLine +BABEL_OP1_102_44500_20120609_205327_inLine +BABEL_OP1_102_44500_20120609_205327_outLine +BABEL_OP1_102_44744_20120330_204705_inLine +BABEL_OP1_102_44744_20120330_204705_outLine +BABEL_OP1_102_45145_20120417_144517_inLine +BABEL_OP1_102_45145_20120417_144517_outLine +BABEL_OP1_102_45512_20120408_174807_inLine +BABEL_OP1_102_45512_20120408_174807_outLine +BABEL_OP1_102_45655_20120405_201151_inLine +BABEL_OP1_102_45655_20120405_201151_outLine +BABEL_OP1_102_45655_20120405_205759_inLine +BABEL_OP1_102_45655_20120405_205759_outLine +BABEL_OP1_102_47037_20120805_212557_inLine +BABEL_OP1_102_47037_20120805_212557_outLine +BABEL_OP1_102_47433_20120329_001114_inLine +BABEL_OP1_102_47433_20120329_001114_outLine +BABEL_OP1_102_47733_20120607_225347_inLine +BABEL_OP1_102_47733_20120607_225347_outLine +BABEL_OP1_102_49173_20121028_022705_inLine +BABEL_OP1_102_49173_20121028_022705_outLine +BABEL_OP1_102_51448_20121111_155248_inLine +BABEL_OP1_102_51448_20121111_155248_outLine +BABEL_OP1_102_52325_20120430_191407_inLine +BABEL_OP1_102_52325_20120430_191407_outLine +BABEL_OP1_102_52515_20120910_021046_inLine +BABEL_OP1_102_52515_20120910_021046_outLine +BABEL_OP1_102_52900_20120426_225238_inLine +BABEL_OP1_102_52900_20120426_225238_outLine +BABEL_OP1_102_52900_20120426_230606_inLine +BABEL_OP1_102_52900_20120426_230606_outLine +BABEL_OP1_102_52913_20121224_231026_inLine +BABEL_OP1_102_52913_20121224_231026_outLine +BABEL_OP1_102_53278_20120607_205252_inLine +BABEL_OP1_102_53278_20120607_205252_outLine +BABEL_OP1_102_53649_20121008_013457_inLine +BABEL_OP1_102_53649_20121008_013457_outLine +BABEL_OP1_102_54818_20120407_212156_inLine +BABEL_OP1_102_54818_20120407_212156_outLine +BABEL_OP1_102_55786_20120401_224618_inLine +BABEL_OP1_102_55786_20120401_224618_outLine +BABEL_OP1_102_57277_20121227_213448_inLine +BABEL_OP1_102_57277_20121227_213448_outLine +BABEL_OP1_102_57454_20120615_183718_inLine +BABEL_OP1_102_57454_20120615_183718_outLine +BABEL_OP1_102_58536_20120426_204822_inLine +BABEL_OP1_102_60064_20121006_215918_inLine +BABEL_OP1_102_60064_20121006_215918_outLine +BABEL_OP1_102_61351_20121220_161410_inLine +BABEL_OP1_102_61351_20121220_161410_outLine +BABEL_OP1_102_62163_20121011_012642_inLine +BABEL_OP1_102_62163_20121011_012642_outLine +BABEL_OP1_102_62277_20120722_203834_inLine +BABEL_OP1_102_62277_20120722_203834_outLine +BABEL_OP1_102_63233_20120405_184701_inLine +BABEL_OP1_102_63233_20120405_184701_outLine +BABEL_OP1_102_63339_20120328_190947_inLine +BABEL_OP1_102_63339_20120328_190947_outLine +BABEL_OP1_102_63353_20120409_193206_inLine +BABEL_OP1_102_63353_20120409_193206_outLine +BABEL_OP1_102_63353_20120409_194011_inLine +BABEL_OP1_102_63353_20120409_194011_outLine +BABEL_OP1_102_64372_20120406_183945_inLine +BABEL_OP1_102_64372_20120406_183945_outLine +BABEL_OP1_102_65989_20120607_000921_inLine +BABEL_OP1_102_65989_20120607_000921_outLine +BABEL_OP1_102_66275_20120719_004257_inLine +BABEL_OP1_102_66275_20120719_004257_outLine +BABEL_OP1_102_66883_20120428_204106_inLine +BABEL_OP1_102_66883_20120428_204106_outLine +BABEL_OP1_102_67304_20120806_203538_inLine +BABEL_OP1_102_67304_20120806_203538_outLine +BABEL_OP1_102_68191_20120606_224106_inLine +BABEL_OP1_102_68191_20120606_224106_outLine +BABEL_OP1_102_68337_20120420_004336_inLine +BABEL_OP1_102_68337_20120420_004336_outLine +BABEL_OP1_102_68671_20121014_155929_inLine +BABEL_OP1_102_68671_20121014_155929_outLine +BABEL_OP1_102_69548_20120330_180855_inLine +BABEL_OP1_102_69548_20120330_180855_outLine +BABEL_OP1_102_72907_20121219_204634_inLine +BABEL_OP1_102_72907_20121219_204634_outLine +BABEL_OP1_102_74295_20120618_234350_inLine +BABEL_OP1_102_74295_20120618_234350_outLine +BABEL_OP1_102_74625_20121010_165038_inLine +BABEL_OP1_102_74625_20121010_165038_outLine +BABEL_OP1_102_75151_20121017_164432_inLine +BABEL_OP1_102_75151_20121017_164432_outLine +BABEL_OP1_102_75151_20121017_164918_inLine +BABEL_OP1_102_75151_20121017_164918_outLine +BABEL_OP1_102_75248_20121106_201226_inLine +BABEL_OP1_102_75248_20121106_201226_outLine +BABEL_OP1_102_75333_20130121_191749_inLine +BABEL_OP1_102_75333_20130121_191749_outLine +BABEL_OP1_102_75871_20120910_013715_inLine +BABEL_OP1_102_75871_20120910_013715_outLine +BABEL_OP1_102_77238_20120331_175602_inLine +BABEL_OP1_102_77238_20120331_175602_outLine +BABEL_OP1_102_77238_20120331_181840_inLine +BABEL_OP1_102_77238_20120331_181840_outLine +BABEL_OP1_102_77697_20130128_202557_inLine +BABEL_OP1_102_77697_20130128_202557_outLine +BABEL_OP1_102_77697_20130128_203734_inLine +BABEL_OP1_102_77697_20130128_203734_outLine +BABEL_OP1_102_78290_20121010_135127_inLine +BABEL_OP1_102_78290_20121010_135127_outLine +BABEL_OP1_102_78681_20121112_013035_inLine +BABEL_OP1_102_78681_20121112_013035_outLine +BABEL_OP1_102_79293_20120404_182947_inLine +BABEL_OP1_102_79293_20120404_182947_outLine +BABEL_OP1_102_80075_20120617_182928_inLine +BABEL_OP1_102_80075_20120617_182928_outLine +BABEL_OP1_102_80247_20130121_182518_inLine +BABEL_OP1_102_80247_20130121_182518_outLine +BABEL_OP1_102_81053_20130127_205227_inLine +BABEL_OP1_102_81053_20130127_205227_outLine +BABEL_OP1_102_81119_20130209_215021_inLine +BABEL_OP1_102_81119_20130209_215021_outLine +BABEL_OP1_102_81642_20120608_184707_inLine +BABEL_OP1_102_81642_20120608_184707_outLine +BABEL_OP1_102_81647_20121010_143838_inLine +BABEL_OP1_102_81647_20121010_143838_outLine +BABEL_OP1_102_81820_20130121_175432_inLine +BABEL_OP1_102_81820_20130121_175432_outLine +BABEL_OP1_102_82881_20120330_215822_inLine +BABEL_OP1_102_82881_20120330_215822_outLine +BABEL_OP1_102_83186_20120817_222832_inLine +BABEL_OP1_102_83186_20120817_222832_outLine +BABEL_OP1_102_83219_20121112_012249_inLine +BABEL_OP1_102_83219_20121112_012249_outLine +BABEL_OP1_102_83702_20120419_173053_inLine +BABEL_OP1_102_83702_20120419_173053_outLine +BABEL_OP1_102_84491_20121026_003510_inLine +BABEL_OP1_102_84491_20121026_003510_outLine +BABEL_OP1_102_86998_20121110_171744_inLine +BABEL_OP1_102_86998_20121110_171744_outLine +BABEL_OP1_102_87077_20120429_190133_inLine +BABEL_OP1_102_87077_20120429_190133_outLine +BABEL_OP1_102_87634_20120327_210105_inLine +BABEL_OP1_102_87634_20120327_210105_outLine +BABEL_OP1_102_88294_20120331_223132_inLine +BABEL_OP1_102_88383_20120409_194253_inLine +BABEL_OP1_102_88383_20120409_194253_outLine +BABEL_OP1_102_88532_20120805_223539_inLine +BABEL_OP1_102_88532_20120805_223539_outLine +BABEL_OP1_102_88982_20120607_221313_inLine +BABEL_OP1_102_88982_20120607_221313_outLine +BABEL_OP1_102_89345_20120331_184511_inLine +BABEL_OP1_102_89345_20120331_184511_outLine +BABEL_OP1_102_89345_20120331_190311_inLine +BABEL_OP1_102_89345_20120331_190311_outLine +BABEL_OP1_102_90024_20121106_025738_inLine +BABEL_OP1_102_90024_20121106_025738_outLine +BABEL_OP1_102_90490_20120318_194705_inLine +BABEL_OP1_102_90490_20120318_194705_outLine +BABEL_OP1_102_90730_20120420_175543_inLine +BABEL_OP1_102_90951_20120929_024352_inLine +BABEL_OP1_102_90951_20120929_024352_outLine +BABEL_OP1_102_91409_20120425_213805_inLine +BABEL_OP1_102_91409_20120425_213805_outLine +BABEL_OP1_102_92642_20120329_225854_inLine +BABEL_OP1_102_92642_20120329_225854_outLine +BABEL_OP1_102_92735_20120425_232435_inLine +BABEL_OP1_102_92735_20120425_232435_outLine +BABEL_OP1_102_92820_20121104_193517_inLine +BABEL_OP1_102_92820_20121104_193517_outLine +BABEL_OP1_102_94218_20121112_171836_inLine +BABEL_OP1_102_94218_20121112_171836_outLine +BABEL_OP1_102_97052_20121013_023448_inLine +BABEL_OP1_102_97052_20121013_023448_outLine +BABEL_OP1_102_99694_20120401_230049_inLine +BABEL_OP1_102_99694_20120401_230049_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/dev.list b/egs/babel/s5d/conf/lists/103-bengali/dev.list new file mode 100644 index 00000000000..4dd26d694d3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/dev.list @@ -0,0 +1,125 @@ +BABEL_OP1_103_10569_20111221_201913_inLine +BABEL_OP1_103_10569_20111221_201913_outLine +BABEL_OP1_103_10576_20111221_214850_inLine +BABEL_OP1_103_10576_20111221_214850_outLine +BABEL_OP1_103_11153_20120204_001459_inLine +BABEL_OP1_103_11153_20120204_001459_outLine +BABEL_OP1_103_12600_20120127_235915_inLine +BABEL_OP1_103_12600_20120127_235915_outLine +BABEL_OP1_103_13990_20120121_225453_inLine +BABEL_OP1_103_13990_20120121_225453_outLine +BABEL_OP1_103_14002_20120116_220151_inLine +BABEL_OP1_103_14002_20120116_220151_outLine +BABEL_OP1_103_14852_20120203_024637_inLine +BABEL_OP1_103_14852_20120203_024637_outLine +BABEL_OP1_103_17081_20120608_004038_inLine +BABEL_OP1_103_17081_20120608_004038_outLine +BABEL_OP1_103_21203_20120523_225358_inLine +BABEL_OP1_103_21203_20120523_225358_outLine +BABEL_OP1_103_22340_20120513_220417_inLine +BABEL_OP1_103_22340_20120513_220417_outLine +BABEL_OP1_103_24503_20120127_182430_inLine +BABEL_OP1_103_24503_20120127_182430_outLine +BABEL_OP1_103_24810_20120114_225518_inLine +BABEL_OP1_103_24810_20120114_225518_outLine +BABEL_OP1_103_25067_20120129_230104_inLine +BABEL_OP1_103_25067_20120129_230104_outLine +BABEL_OP1_103_27912_20120123_185402_inLine +BABEL_OP1_103_27912_20120123_185402_outLine +BABEL_OP1_103_31084_20120729_201226_inLine +BABEL_OP1_103_31084_20120729_201226_outLine +BABEL_OP1_103_37083_20120125_224559_inLine +BABEL_OP1_103_38382_20120110_013824_inLine +BABEL_OP1_103_38382_20120110_013824_outLine +BABEL_OP1_103_40114_20120122_183602_inLine +BABEL_OP1_103_40114_20120122_183602_outLine +BABEL_OP1_103_41417_20120122_224848_inLine +BABEL_OP1_103_41417_20120122_224848_outLine +BABEL_OP1_103_42929_20120118_211148_inLine +BABEL_OP1_103_42929_20120118_211148_outLine +BABEL_OP1_103_42929_20120118_212321_inLine +BABEL_OP1_103_42929_20120118_212321_outLine +BABEL_OP1_103_43051_20120524_163506_inLine +BABEL_OP1_103_43051_20120524_163506_outLine +BABEL_OP1_103_44799_20120131_222925_inLine +BABEL_OP1_103_44799_20120131_222925_outLine +BABEL_OP1_103_48834_20111227_010514_inLine +BABEL_OP1_103_48834_20111227_010514_outLine +BABEL_OP1_103_49329_20120131_223617_inLine +BABEL_OP1_103_49329_20120131_223617_outLine +BABEL_OP1_103_50583_20120114_233345_inLine +BABEL_OP1_103_50583_20120114_233345_outLine +BABEL_OP1_103_50614_20120130_225030_inLine +BABEL_OP1_103_50614_20120130_225030_outLine +BABEL_OP1_103_50910_20120122_001708_inLine +BABEL_OP1_103_50910_20120122_001708_outLine +BABEL_OP1_103_52067_20120127_020600_inLine +BABEL_OP1_103_52845_20120126_200807_inLine +BABEL_OP1_103_52845_20120126_200807_outLine +BABEL_OP1_103_53805_20120126_211949_inLine +BABEL_OP1_103_53805_20120126_211949_outLine +BABEL_OP1_103_57087_20120204_181410_inLine +BABEL_OP1_103_57087_20120204_181410_outLine +BABEL_OP1_103_57721_20120531_194610_inLine +BABEL_OP1_103_57721_20120531_194610_outLine +BABEL_OP1_103_57742_20120125_200619_inLine +BABEL_OP1_103_57742_20120125_200619_outLine +BABEL_OP1_103_60462_20120521_181224_inLine +BABEL_OP1_103_60462_20120521_181224_outLine +BABEL_OP1_103_62038_20111230_004215_inLine +BABEL_OP1_103_62038_20111230_004215_outLine +BABEL_OP1_103_62169_20120304_153842_inLine +BABEL_OP1_103_62169_20120304_153842_outLine +BABEL_OP1_103_62584_20120305_152943_inLine +BABEL_OP1_103_62584_20120305_152943_outLine +BABEL_OP1_103_62837_20120307_223844_inLine +BABEL_OP1_103_62837_20120307_223844_outLine +BABEL_OP1_103_62837_20120307_225550_inLine +BABEL_OP1_103_62837_20120307_225550_outLine +BABEL_OP1_103_63220_20120514_232049_inLine +BABEL_OP1_103_63220_20120514_232049_outLine +BABEL_OP1_103_63444_20120316_030633_inLine +BABEL_OP1_103_64297_20120514_162741_inLine +BABEL_OP1_103_64297_20120514_162741_outLine +BABEL_OP1_103_64853_20120405_163727_inLine +BABEL_OP1_103_64853_20120405_163727_outLine +BABEL_OP1_103_65597_20120530_213140_inLine +BABEL_OP1_103_65597_20120530_213140_outLine +BABEL_OP1_103_65895_20120229_202918_inLine +BABEL_OP1_103_65895_20120229_202918_outLine +BABEL_OP1_103_66313_20120229_230907_inLine +BABEL_OP1_103_66313_20120229_230907_outLine +BABEL_OP1_103_66351_20120111_041605_inLine +BABEL_OP1_103_66351_20120111_041605_outLine +BABEL_OP1_103_66757_20120131_215301_inLine +BABEL_OP1_103_66757_20120131_215301_outLine +BABEL_OP1_103_67421_20120310_230757_inLine +BABEL_OP1_103_67421_20120310_230757_outLine +BABEL_OP1_103_69894_20120307_152955_inLine +BABEL_OP1_103_69894_20120307_152955_outLine +BABEL_OP1_103_76654_20120519_203100_inLine +BABEL_OP1_103_76654_20120519_203100_outLine +BABEL_OP1_103_77082_20120203_232638_inLine +BABEL_OP1_103_77082_20120203_232638_outLine +BABEL_OP1_103_80105_20120530_211541_inLine +BABEL_OP1_103_80105_20120530_211541_outLine +BABEL_OP1_103_80875_20120522_224314_inLine +BABEL_OP1_103_80875_20120522_224314_outLine +BABEL_OP1_103_81318_20120104_020938_inLine +BABEL_OP1_103_81318_20120104_020938_outLine +BABEL_OP1_103_81773_20120101_024120_inLine +BABEL_OP1_103_81773_20120101_024120_outLine +BABEL_OP1_103_82526_20120118_185334_inLine +BABEL_OP1_103_82526_20120118_185334_outLine +BABEL_OP1_103_86207_20120127_145936_inLine +BABEL_OP1_103_86207_20120127_145936_outLine +BABEL_OP1_103_88690_20120201_005057_inLine +BABEL_OP1_103_88690_20120201_005057_outLine +BABEL_OP1_103_91202_20111229_185342_inLine +BABEL_OP1_103_91202_20111229_185342_outLine +BABEL_OP1_103_91275_20120529_195749_inLine +BABEL_OP1_103_91275_20120529_195749_outLine +BABEL_OP1_103_93273_20120123_022109_inLine +BABEL_OP1_103_93273_20120123_022109_outLine +BABEL_OP1_103_95826_20120201_001701_inLine +BABEL_OP1_103_95826_20120201_001701_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/eval.list b/egs/babel/s5d/conf/lists/103-bengali/eval.list new file mode 100644 index 00000000000..03220030e17 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/eval.list @@ -0,0 +1,193 @@ +BABEL_OP1_103_10490_20111220_235407_inLine +BABEL_OP1_103_10490_20111220_235407_outLine +BABEL_OP1_103_11146_20120528_182053_inLine +BABEL_OP1_103_11146_20120528_182053_outLine +BABEL_OP1_103_11168_20111228_213615_inLine +BABEL_OP1_103_11168_20111228_213615_outLine +BABEL_OP1_103_11388_20120520_161554_inLine +BABEL_OP1_103_11511_20120526_232041_inLine +BABEL_OP1_103_11511_20120526_232041_outLine +BABEL_OP1_103_12959_20120127_201055_inLine +BABEL_OP1_103_12959_20120127_201055_outLine +BABEL_OP1_103_14503_20120117_213020_inLine +BABEL_OP1_103_14503_20120117_213020_outLine +BABEL_OP1_103_14713_20120123_195706_inLine +BABEL_OP1_103_14713_20120123_195706_outLine +BABEL_OP1_103_16352_20120201_160631_inLine +BABEL_OP1_103_16352_20120201_160631_outLine +BABEL_OP1_103_17749_20120115_221220_inLine +BABEL_OP1_103_17749_20120115_221220_outLine +BABEL_OP1_103_23322_20120519_165208_inLine +BABEL_OP1_103_23322_20120519_165208_outLine +BABEL_OP1_103_24427_20120513_210712_inLine +BABEL_OP1_103_24427_20120513_210712_outLine +BABEL_OP1_103_25147_20120201_164613_inLine +BABEL_OP1_103_25147_20120201_164614_outLine +BABEL_OP1_103_25525_20120114_010656_inLine +BABEL_OP1_103_25525_20120114_010656_outLine +BABEL_OP1_103_27267_20120101_213815_inLine +BABEL_OP1_103_27267_20120101_213815_outLine +BABEL_OP1_103_28046_20120407_154949_inLine +BABEL_OP1_103_28046_20120407_154949_outLine +BABEL_OP1_103_28516_20120421_231427_inLine +BABEL_OP1_103_28516_20120421_231428_outLine +BABEL_OP1_103_28533_20120421_224406_inLine +BABEL_OP1_103_28533_20120421_224406_outLine +BABEL_OP1_103_28534_20120421_222000_inLine +BABEL_OP1_103_28561_20120421_215523_inLine +BABEL_OP1_103_28561_20120421_215523_outLine +BABEL_OP1_103_28607_20120421_213119_inLine +BABEL_OP1_103_28607_20120421_213119_outLine +BABEL_OP1_103_28834_20120421_205128_inLine +BABEL_OP1_103_28834_20120421_205941_inLine +BABEL_OP1_103_28922_20120421_202038_inLine +BABEL_OP1_103_28922_20120421_202038_outLine +BABEL_OP1_103_29061_20120421_195632_inLine +BABEL_OP1_103_29061_20120421_195632_outLine +BABEL_OP1_103_29397_20120421_192844_inLine +BABEL_OP1_103_29397_20120421_192844_outLine +BABEL_OP1_103_29411_20120421_190505_inLine +BABEL_OP1_103_29411_20120421_190505_outLine +BABEL_OP1_103_29471_20120421_183732_inLine +BABEL_OP1_103_29471_20120421_183732_outLine +BABEL_OP1_103_29489_20120421_181719_inLine +BABEL_OP1_103_29489_20120421_181719_outLine +BABEL_OP1_103_29513_20120421_170000_inLine +BABEL_OP1_103_29513_20120421_170000_outLine +BABEL_OP1_103_30747_20120111_231823_inLine +BABEL_OP1_103_30747_20120111_231823_outLine +BABEL_OP1_103_30848_20120102_001515_inLine +BABEL_OP1_103_30848_20120102_001515_outLine +BABEL_OP1_103_32703_20120128_203538_inLine +BABEL_OP1_103_32703_20120128_203538_outLine +BABEL_OP1_103_33590_20120122_165207_inLine +BABEL_OP1_103_33590_20120122_165207_outLine +BABEL_OP1_103_33590_20120122_170610_inLine +BABEL_OP1_103_33590_20120122_170610_outLine +BABEL_OP1_103_33809_20120122_184348_inLine +BABEL_OP1_103_33809_20120122_184349_outLine +BABEL_OP1_103_34102_20120528_233758_inLine +BABEL_OP1_103_34102_20120528_233758_outLine +BABEL_OP1_103_35052_20120118_164925_inLine +BABEL_OP1_103_35052_20120118_164925_outLine +BABEL_OP1_103_35052_20120118_171428_inLine +BABEL_OP1_103_35052_20120118_171428_outLine +BABEL_OP1_103_36842_20120120_013653_inLine +BABEL_OP1_103_36842_20120120_013653_outLine +BABEL_OP1_103_37798_20120121_014828_inLine +BABEL_OP1_103_37798_20120121_014828_outLine +BABEL_OP1_103_40701_20120523_230827_inLine +BABEL_OP1_103_40701_20120523_230827_outLine +BABEL_OP1_103_40701_20120523_232042_inLine +BABEL_OP1_103_40701_20120523_232042_outLine +BABEL_OP1_103_41871_20120127_015943_inLine +BABEL_OP1_103_41871_20120127_015943_outLine +BABEL_OP1_103_43725_20120518_195136_inLine +BABEL_OP1_103_43725_20120518_195136_outLine +BABEL_OP1_103_44141_20120520_005301_inLine +BABEL_OP1_103_44141_20120520_005301_outLine +BABEL_OP1_103_44515_20120104_001740_inLine +BABEL_OP1_103_44515_20120104_001740_outLine +BABEL_OP1_103_44515_20120104_002748_inLine +BABEL_OP1_103_44515_20120104_002749_outLine +BABEL_OP1_103_46776_20120520_000315_inLine +BABEL_OP1_103_46776_20120520_000315_outLine +BABEL_OP1_103_47313_20120110_161032_inLine +BABEL_OP1_103_47313_20120110_161032_outLine +BABEL_OP1_103_50697_20120523_192842_inLine +BABEL_OP1_103_50697_20120523_192842_outLine +BABEL_OP1_103_51047_20120129_041648_inLine +BABEL_OP1_103_51047_20120129_041648_outLine +BABEL_OP1_103_51079_20120125_205839_inLine +BABEL_OP1_103_51079_20120125_205839_outLine +BABEL_OP1_103_51791_20120207_192918_inLine +BABEL_OP1_103_51791_20120207_192918_outLine +BABEL_OP1_103_52306_20120204_205158_inLine +BABEL_OP1_103_52306_20120204_205158_outLine +BABEL_OP1_103_52570_20120202_202812_inLine +BABEL_OP1_103_52570_20120202_202812_outLine +BABEL_OP1_103_53334_20120115_004411_inLine +BABEL_OP1_103_53334_20120115_004411_outLine +BABEL_OP1_103_54178_20120205_163228_inLine +BABEL_OP1_103_54178_20120205_163228_outLine +BABEL_OP1_103_54673_20120203_032314_inLine +BABEL_OP1_103_54673_20120203_032314_outLine +BABEL_OP1_103_56452_20120131_183725_inLine +BABEL_OP1_103_56452_20120131_183725_outLine +BABEL_OP1_103_56452_20120131_185001_inLine +BABEL_OP1_103_56452_20120131_185001_outLine +BABEL_OP1_103_56945_20120125_234057_inLine +BABEL_OP1_103_56945_20120125_234057_outLine +BABEL_OP1_103_57320_20120519_014148_inLine +BABEL_OP1_103_57320_20120519_014148_outLine +BABEL_OP1_103_57618_20120206_004508_inLine +BABEL_OP1_103_57618_20120206_004508_outLine +BABEL_OP1_103_58807_20120106_230153_inLine +BABEL_OP1_103_58807_20120106_230153_outLine +BABEL_OP1_103_59399_20120123_013608_inLine +BABEL_OP1_103_59399_20120123_013608_outLine +BABEL_OP1_103_61606_20120524_001028_inLine +BABEL_OP1_103_61735_20120102_220532_inLine +BABEL_OP1_103_61735_20120102_220532_outLine +BABEL_OP1_103_62671_20120521_174222_inLine +BABEL_OP1_103_62671_20120521_174222_outLine +BABEL_OP1_103_62941_20120311_004945_inLine +BABEL_OP1_103_62941_20120311_004945_outLine +BABEL_OP1_103_63204_20120312_013958_inLine +BABEL_OP1_103_63204_20120312_013958_outLine +BABEL_OP1_103_63327_20120312_024230_inLine +BABEL_OP1_103_63327_20120312_024230_outLine +BABEL_OP1_103_63439_20120315_041347_inLine +BABEL_OP1_103_63439_20120315_041347_outLine +BABEL_OP1_103_63548_20120319_031651_inLine +BABEL_OP1_103_63548_20120319_031651_outLine +BABEL_OP1_103_63575_20120319_044400_inLine +BABEL_OP1_103_63575_20120319_044400_outLine +BABEL_OP1_103_65788_20120524_153801_inLine +BABEL_OP1_103_65788_20120524_153801_outLine +BABEL_OP1_103_66784_20120111_032559_inLine +BABEL_OP1_103_66784_20120111_032559_outLine +BABEL_OP1_103_66825_20120305_214401_inLine +BABEL_OP1_103_66825_20120305_214401_outLine +BABEL_OP1_103_67716_20120106_145810_inLine +BABEL_OP1_103_67716_20120106_145810_outLine +BABEL_OP1_103_67721_20111229_210017_inLine +BABEL_OP1_103_67721_20111229_210017_outLine +BABEL_OP1_103_68063_20120601_155054_inLine +BABEL_OP1_103_68063_20120601_155054_outLine +BABEL_OP1_103_70108_20120516_193813_inLine +BABEL_OP1_103_70108_20120516_193813_outLine +BABEL_OP1_103_70466_20120526_205046_inLine +BABEL_OP1_103_70466_20120526_205046_outLine +BABEL_OP1_103_72693_20120522_233148_inLine +BABEL_OP1_103_72693_20120522_233148_outLine +BABEL_OP1_103_73171_20120511_003731_inLine +BABEL_OP1_103_73171_20120511_003731_outLine +BABEL_OP1_103_78737_20120107_144050_inLine +BABEL_OP1_103_78737_20120107_144050_outLine +BABEL_OP1_103_80424_20120523_223457_inLine +BABEL_OP1_103_80424_20120523_223457_outLine +BABEL_OP1_103_83137_20120101_220939_inLine +BABEL_OP1_103_83137_20120101_220939_outLine +BABEL_OP1_103_83733_20120114_230510_inLine +BABEL_OP1_103_83733_20120114_230510_outLine +BABEL_OP1_103_88434_20120616_183901_inLine +BABEL_OP1_103_88434_20120616_183901_outLine +BABEL_OP1_103_90432_20111231_212535_inLine +BABEL_OP1_103_90432_20111231_212535_outLine +BABEL_OP1_103_91407_20120204_221709_inLine +BABEL_OP1_103_91407_20120204_221709_outLine +BABEL_OP1_103_92880_20120522_232802_inLine +BABEL_OP1_103_92880_20120522_232802_outLine +BABEL_OP1_103_93227_20120116_190634_inLine +BABEL_OP1_103_93227_20120116_190634_outLine +BABEL_OP1_103_93748_20120114_210648_inLine +BABEL_OP1_103_93748_20120114_210648_outLine +BABEL_OP1_103_96956_20120519_002918_inLine +BABEL_OP1_103_96956_20120519_002918_outLine +BABEL_OP1_103_97738_20120521_183220_inLine +BABEL_OP1_103_97738_20120521_183220_outLine +BABEL_OP1_103_99354_20120206_194646_inLine +BABEL_OP1_103_99354_20120206_194646_outLine +BABEL_OP1_103_99354_20120206_195707_inLine +BABEL_OP1_103_99354_20120206_195707_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/evalpart1.list b/egs/babel/s5d/conf/lists/103-bengali/evalpart1.list new file mode 100644 index 00000000000..1c606caf3b3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/evalpart1.list @@ -0,0 +1,66 @@ +BABEL_OP1_103_11146_20120528_182053_inLine +BABEL_OP1_103_11146_20120528_182053_outLine +BABEL_OP1_103_11168_20111228_213615_inLine +BABEL_OP1_103_11168_20111228_213615_outLine +BABEL_OP1_103_16352_20120201_160631_inLine +BABEL_OP1_103_16352_20120201_160631_outLine +BABEL_OP1_103_17749_20120115_221220_inLine +BABEL_OP1_103_17749_20120115_221220_outLine +BABEL_OP1_103_24427_20120513_210712_inLine +BABEL_OP1_103_24427_20120513_210712_outLine +BABEL_OP1_103_25147_20120201_164613_inLine +BABEL_OP1_103_25147_20120201_164614_outLine +BABEL_OP1_103_28046_20120407_154949_inLine +BABEL_OP1_103_28046_20120407_154949_outLine +BABEL_OP1_103_30747_20120111_231823_inLine +BABEL_OP1_103_30747_20120111_231823_outLine +BABEL_OP1_103_33809_20120122_184348_inLine +BABEL_OP1_103_33809_20120122_184349_outLine +BABEL_OP1_103_35052_20120118_164925_inLine +BABEL_OP1_103_35052_20120118_164925_outLine +BABEL_OP1_103_35052_20120118_171428_inLine +BABEL_OP1_103_35052_20120118_171428_outLine +BABEL_OP1_103_37798_20120121_014828_inLine +BABEL_OP1_103_37798_20120121_014828_outLine +BABEL_OP1_103_41871_20120127_015943_inLine +BABEL_OP1_103_41871_20120127_015943_outLine +BABEL_OP1_103_51079_20120125_205839_inLine +BABEL_OP1_103_51079_20120125_205839_outLine +BABEL_OP1_103_51791_20120207_192918_inLine +BABEL_OP1_103_51791_20120207_192918_outLine +BABEL_OP1_103_52306_20120204_205158_inLine +BABEL_OP1_103_52306_20120204_205158_outLine +BABEL_OP1_103_56452_20120131_183725_inLine +BABEL_OP1_103_56452_20120131_183725_outLine +BABEL_OP1_103_56452_20120131_185001_inLine +BABEL_OP1_103_56452_20120131_185001_outLine +BABEL_OP1_103_58807_20120106_230153_inLine +BABEL_OP1_103_58807_20120106_230153_outLine +BABEL_OP1_103_63204_20120312_013958_inLine +BABEL_OP1_103_63204_20120312_013958_outLine +BABEL_OP1_103_63327_20120312_024230_inLine +BABEL_OP1_103_63327_20120312_024230_outLine +BABEL_OP1_103_63439_20120315_041347_inLine +BABEL_OP1_103_63439_20120315_041347_outLine +BABEL_OP1_103_63548_20120319_031651_inLine +BABEL_OP1_103_63548_20120319_031651_outLine +BABEL_OP1_103_66784_20120111_032559_inLine +BABEL_OP1_103_66784_20120111_032559_outLine +BABEL_OP1_103_68063_20120601_155054_inLine +BABEL_OP1_103_68063_20120601_155054_outLine +BABEL_OP1_103_70466_20120526_205046_inLine +BABEL_OP1_103_70466_20120526_205046_outLine +BABEL_OP1_103_73171_20120511_003731_inLine +BABEL_OP1_103_73171_20120511_003731_outLine +BABEL_OP1_103_83137_20120101_220939_inLine +BABEL_OP1_103_83137_20120101_220939_outLine +BABEL_OP1_103_83733_20120114_230510_inLine +BABEL_OP1_103_83733_20120114_230510_outLine +BABEL_OP1_103_90432_20111231_212535_inLine +BABEL_OP1_103_90432_20111231_212535_outLine +BABEL_OP1_103_92880_20120522_232802_inLine +BABEL_OP1_103_92880_20120522_232802_outLine +BABEL_OP1_103_93748_20120114_210648_inLine +BABEL_OP1_103_93748_20120114_210648_outLine +BABEL_OP1_103_97738_20120521_183220_inLine +BABEL_OP1_103_97738_20120521_183220_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list b/egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list new file mode 100644 index 00000000000..203b313ade2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list @@ -0,0 +1,751 @@ +BABEL_OP1_103_10193_20111229_035249_inLine +BABEL_OP1_103_10193_20111229_035249_outLine +BABEL_OP1_103_10301_20111220_225237_inLine +BABEL_OP1_103_10301_20111220_225237_outLine +BABEL_OP1_103_10305_20111220_231100_inLine +BABEL_OP1_103_10305_20111220_231100_outLine +BABEL_OP1_103_10348_20120113_213854_inLine +BABEL_OP1_103_10348_20120113_213854_outLine +BABEL_OP1_103_10531_20120118_042000_inLine +BABEL_OP1_103_10531_20120118_042000_outLine +BABEL_OP1_103_10556_20111221_000031_inLine +BABEL_OP1_103_10556_20111221_000031_outLine +BABEL_OP1_103_10612_20111222_210911_inLine +BABEL_OP1_103_10612_20111222_210911_outLine +BABEL_OP1_103_10806_20111226_181132_inLine +BABEL_OP1_103_10806_20111226_181132_outLine +BABEL_OP1_103_11128_20120124_200626_inLine +BABEL_OP1_103_11128_20120124_200626_outLine +BABEL_OP1_103_11155_20111230_211626_inLine +BABEL_OP1_103_11155_20111230_211626_outLine +BABEL_OP1_103_11442_20120125_025606_inLine +BABEL_OP1_103_11442_20120125_025606_outLine +BABEL_OP1_103_12518_20111227_181020_inLine +BABEL_OP1_103_12518_20111227_181021_outLine +BABEL_OP1_103_12639_20111229_015021_inLine +BABEL_OP1_103_12639_20111229_015021_outLine +BABEL_OP1_103_12682_20120125_201902_inLine +BABEL_OP1_103_12682_20120125_201902_outLine +BABEL_OP1_103_12682_20120125_210238_inLine +BABEL_OP1_103_12682_20120125_210238_outLine +BABEL_OP1_103_12719_20120203_035027_inLine +BABEL_OP1_103_12719_20120203_035027_outLine +BABEL_OP1_103_12786_20111230_012748_inLine +BABEL_OP1_103_12786_20111230_012749_outLine +BABEL_OP1_103_12809_20111229_175926_inLine +BABEL_OP1_103_12809_20111229_175926_outLine +BABEL_OP1_103_12843_20120117_224043_inLine +BABEL_OP1_103_12843_20120117_224043_outLine +BABEL_OP1_103_13024_20111229_010356_inLine +BABEL_OP1_103_13024_20111229_010357_outLine +BABEL_OP1_103_13295_20120522_232550_inLine +BABEL_OP1_103_13295_20120522_232550_outLine +BABEL_OP1_103_13615_20120113_174612_inLine +BABEL_OP1_103_13615_20120113_174612_outLine +BABEL_OP1_103_13708_20120102_032700_inLine +BABEL_OP1_103_13708_20120102_032700_outLine +BABEL_OP1_103_13752_20120530_221929_inLine +BABEL_OP1_103_13752_20120530_221929_outLine +BABEL_OP1_103_14086_20120113_200751_inLine +BABEL_OP1_103_14086_20120113_200751_outLine +BABEL_OP1_103_14147_20120531_160226_inLine +BABEL_OP1_103_14147_20120531_160226_outLine +BABEL_OP1_103_14147_20120531_170020_inLine +BABEL_OP1_103_14147_20120531_170020_outLine +BABEL_OP1_103_14422_20120514_181741_inLine +BABEL_OP1_103_14422_20120514_181741_outLine +BABEL_OP1_103_14554_20120120_230548_inLine +BABEL_OP1_103_14554_20120120_230548_outLine +BABEL_OP1_103_14583_20120515_192730_inLine +BABEL_OP1_103_14583_20120515_192730_outLine +BABEL_OP1_103_14942_20120101_203529_inLine +BABEL_OP1_103_14942_20120101_203529_outLine +BABEL_OP1_103_15304_20120106_035227_inLine +BABEL_OP1_103_15304_20120106_035227_outLine +BABEL_OP1_103_15600_20111231_181856_inLine +BABEL_OP1_103_15600_20111231_181856_outLine +BABEL_OP1_103_15665_20120517_162750_inLine +BABEL_OP1_103_15665_20120517_162750_outLine +BABEL_OP1_103_15749_20111230_015120_inLine +BABEL_OP1_103_15749_20111230_015120_outLine +BABEL_OP1_103_15803_20120528_164556_inLine +BABEL_OP1_103_15803_20120528_164556_outLine +BABEL_OP1_103_16210_20120118_201234_inLine +BABEL_OP1_103_16210_20120118_201234_outLine +BABEL_OP1_103_16393_20111230_012139_inLine +BABEL_OP1_103_16393_20111230_012139_outLine +BABEL_OP1_103_16416_20120205_011943_inLine +BABEL_OP1_103_16416_20120205_011943_outLine +BABEL_OP1_103_16633_20120105_164800_inLine +BABEL_OP1_103_16633_20120105_164800_outLine +BABEL_OP1_103_16754_20120101_015558_inLine +BABEL_OP1_103_16754_20120101_015558_outLine +BABEL_OP1_103_17063_20120202_201950_inLine +BABEL_OP1_103_17063_20120202_201950_outLine +BABEL_OP1_103_17063_20120202_204211_inLine +BABEL_OP1_103_17063_20120202_204211_outLine +BABEL_OP1_103_17139_20120110_182115_inLine +BABEL_OP1_103_17139_20120110_182115_outLine +BABEL_OP1_103_17180_20120126_233802_inLine +BABEL_OP1_103_17180_20120126_233802_outLine +BABEL_OP1_103_17612_20120531_232906_inLine +BABEL_OP1_103_17876_20120203_220933_inLine +BABEL_OP1_103_17876_20120203_220933_outLine +BABEL_OP1_103_18244_20120514_000930_inLine +BABEL_OP1_103_18244_20120514_000931_outLine +BABEL_OP1_103_18556_20111231_233139_inLine +BABEL_OP1_103_18556_20111231_233139_outLine +BABEL_OP1_103_18755_20120103_012800_inLine +BABEL_OP1_103_18755_20120103_012800_outLine +BABEL_OP1_103_18861_20120612_231154_inLine +BABEL_OP1_103_18861_20120612_231154_outLine +BABEL_OP1_103_18938_20120515_163044_inLine +BABEL_OP1_103_18938_20120515_163044_outLine +BABEL_OP1_103_19118_20120206_194310_inLine +BABEL_OP1_103_19118_20120206_194310_outLine +BABEL_OP1_103_19280_20120515_173629_inLine +BABEL_OP1_103_19280_20120515_173630_outLine +BABEL_OP1_103_19324_20120114_223457_inLine +BABEL_OP1_103_19324_20120114_223457_outLine +BABEL_OP1_103_19564_20120112_151539_inLine +BABEL_OP1_103_19564_20120112_151539_outLine +BABEL_OP1_103_19697_20120107_043218_inLine +BABEL_OP1_103_19697_20120107_043218_outLine +BABEL_OP1_103_19885_20120517_200533_inLine +BABEL_OP1_103_19885_20120517_200533_outLine +BABEL_OP1_103_20481_20120118_004556_inLine +BABEL_OP1_103_20481_20120118_004556_outLine +BABEL_OP1_103_21020_20120517_182615_inLine +BABEL_OP1_103_21020_20120517_182615_outLine +BABEL_OP1_103_21076_20111231_203216_inLine +BABEL_OP1_103_21076_20111231_203216_outLine +BABEL_OP1_103_21208_20120113_192303_inLine +BABEL_OP1_103_21208_20120113_192303_outLine +BABEL_OP1_103_21417_20120115_235720_inLine +BABEL_OP1_103_21417_20120115_235720_outLine +BABEL_OP1_103_21648_20111229_024025_inLine +BABEL_OP1_103_21648_20111229_024025_outLine +BABEL_OP1_103_21928_20120205_210433_inLine +BABEL_OP1_103_21928_20120205_221157_inLine +BABEL_OP1_103_22134_20120204_185956_inLine +BABEL_OP1_103_22134_20120204_185956_outLine +BABEL_OP1_103_22134_20120204_191024_inLine +BABEL_OP1_103_22134_20120204_191024_outLine +BABEL_OP1_103_22338_20120203_214144_inLine +BABEL_OP1_103_22338_20120203_214144_outLine +BABEL_OP1_103_22528_20120204_221751_inLine +BABEL_OP1_103_22528_20120204_221751_outLine +BABEL_OP1_103_22697_20120123_213617_inLine +BABEL_OP1_103_22697_20120123_213617_outLine +BABEL_OP1_103_23237_20120201_204534_inLine +BABEL_OP1_103_23237_20120201_204534_outLine +BABEL_OP1_103_24235_20120205_171351_inLine +BABEL_OP1_103_24235_20120205_171351_outLine +BABEL_OP1_103_24340_20120526_221640_inLine +BABEL_OP1_103_24340_20120526_221640_outLine +BABEL_OP1_103_25040_20120129_215646_inLine +BABEL_OP1_103_25040_20120129_215647_outLine +BABEL_OP1_103_25489_20120107_015122_inLine +BABEL_OP1_103_25489_20120107_015122_outLine +BABEL_OP1_103_26464_20120115_153724_inLine +BABEL_OP1_103_26464_20120115_153725_outLine +BABEL_OP1_103_26603_20120519_190743_inLine +BABEL_OP1_103_26603_20120519_190743_outLine +BABEL_OP1_103_26980_20120114_151400_inLine +BABEL_OP1_103_26980_20120114_151400_outLine +BABEL_OP1_103_27159_20120109_175434_inLine +BABEL_OP1_103_27159_20120109_175434_outLine +BABEL_OP1_103_27298_20120519_164745_inLine +BABEL_OP1_103_27298_20120519_164745_outLine +BABEL_OP1_103_27374_20120608_213343_inLine +BABEL_OP1_103_27374_20120608_213344_outLine +BABEL_OP1_103_27889_20120405_184406_inLine +BABEL_OP1_103_27889_20120405_184406_outLine +BABEL_OP1_103_27895_20120407_201822_inLine +BABEL_OP1_103_27895_20120407_201822_outLine +BABEL_OP1_103_27997_20120406_024629_inLine +BABEL_OP1_103_27997_20120406_024629_outLine +BABEL_OP1_103_28150_20120421_030716_inLine +BABEL_OP1_103_28150_20120421_030716_outLine +BABEL_OP1_103_28281_20120204_215552_inLine +BABEL_OP1_103_28281_20120204_215552_outLine +BABEL_OP1_103_28325_20120421_034840_inLine +BABEL_OP1_103_28325_20120421_034840_outLine +BABEL_OP1_103_28349_20120422_230936_inLine +BABEL_OP1_103_28349_20120422_230936_outLine +BABEL_OP1_103_28452_20120423_002721_inLine +BABEL_OP1_103_28452_20120423_002721_outLine +BABEL_OP1_103_28820_20111231_235604_inLine +BABEL_OP1_103_28820_20111231_235604_outLine +BABEL_OP1_103_29083_20120524_203900_inLine +BABEL_OP1_103_29083_20120524_203900_outLine +BABEL_OP1_103_29368_20120109_152242_inLine +BABEL_OP1_103_29368_20120109_152242_outLine +BABEL_OP1_103_29757_20120607_155549_inLine +BABEL_OP1_103_29757_20120607_155549_outLine +BABEL_OP1_103_30012_20120523_210111_inLine +BABEL_OP1_103_30012_20120523_210111_outLine +BABEL_OP1_103_30031_20111231_051935_inLine +BABEL_OP1_103_30031_20111231_051935_outLine +BABEL_OP1_103_30040_20120114_164613_inLine +BABEL_OP1_103_30040_20120114_164613_outLine +BABEL_OP1_103_30243_20120115_005252_inLine +BABEL_OP1_103_30243_20120115_005252_outLine +BABEL_OP1_103_30620_20111231_181228_inLine +BABEL_OP1_103_30620_20111231_181228_outLine +BABEL_OP1_103_30711_20120612_211646_inLine +BABEL_OP1_103_30711_20120612_211646_outLine +BABEL_OP1_103_30810_20111227_000227_inLine +BABEL_OP1_103_30810_20111227_000227_outLine +BABEL_OP1_103_30847_20120108_235955_inLine +BABEL_OP1_103_30847_20120108_235955_outLine +BABEL_OP1_103_30904_20120522_013413_inLine +BABEL_OP1_103_30904_20120522_013413_outLine +BABEL_OP1_103_31201_20120523_211540_inLine +BABEL_OP1_103_31201_20120523_211540_outLine +BABEL_OP1_103_31871_20120115_205857_inLine +BABEL_OP1_103_31871_20120115_205857_outLine +BABEL_OP1_103_32040_20120122_181109_inLine +BABEL_OP1_103_32040_20120122_181109_outLine +BABEL_OP1_103_32453_20120116_174338_inLine +BABEL_OP1_103_32453_20120116_174338_outLine +BABEL_OP1_103_32722_20120115_005258_inLine +BABEL_OP1_103_32722_20120115_005258_outLine +BABEL_OP1_103_33223_20120108_225050_inLine +BABEL_OP1_103_33223_20120108_225050_outLine +BABEL_OP1_103_33316_20120528_173250_inLine +BABEL_OP1_103_33316_20120528_173250_outLine +BABEL_OP1_103_33534_20120122_020502_inLine +BABEL_OP1_103_33534_20120122_020502_outLine +BABEL_OP1_103_33551_20120122_194434_inLine +BABEL_OP1_103_33551_20120122_194434_outLine +BABEL_OP1_103_33699_20120122_173500_inLine +BABEL_OP1_103_33699_20120122_173500_outLine +BABEL_OP1_103_33807_20120122_190057_inLine +BABEL_OP1_103_33807_20120122_190057_outLine +BABEL_OP1_103_33885_20120125_172938_inLine +BABEL_OP1_103_33885_20120125_172938_outLine +BABEL_OP1_103_33991_20120117_202117_inLine +BABEL_OP1_103_33991_20120117_202118_outLine +BABEL_OP1_103_34137_20120529_224220_inLine +BABEL_OP1_103_34137_20120529_224220_outLine +BABEL_OP1_103_34332_20120204_191733_inLine +BABEL_OP1_103_34332_20120204_191733_outLine +BABEL_OP1_103_34545_20120118_173942_inLine +BABEL_OP1_103_34545_20120118_173942_outLine +BABEL_OP1_103_34564_20120530_211027_inLine +BABEL_OP1_103_34564_20120530_211027_outLine +BABEL_OP1_103_34925_20120112_154829_inLine +BABEL_OP1_103_34925_20120112_154829_outLine +BABEL_OP1_103_34994_20120115_213251_inLine +BABEL_OP1_103_34994_20120115_213251_outLine +BABEL_OP1_103_35144_20120123_230913_inLine +BABEL_OP1_103_35144_20120123_230913_outLine +BABEL_OP1_103_35152_20111230_220705_inLine +BABEL_OP1_103_35152_20111230_220705_outLine +BABEL_OP1_103_35157_20120124_010640_inLine +BABEL_OP1_103_35157_20120124_010640_outLine +BABEL_OP1_103_35444_20120612_203930_inLine +BABEL_OP1_103_35444_20120612_203930_outLine +BABEL_OP1_103_35660_20120122_013401_inLine +BABEL_OP1_103_35660_20120122_013402_outLine +BABEL_OP1_103_35750_20111230_025221_inLine +BABEL_OP1_103_35750_20111230_025221_outLine +BABEL_OP1_103_35892_20120120_205811_inLine +BABEL_OP1_103_35892_20120120_205811_outLine +BABEL_OP1_103_36584_20120201_230611_inLine +BABEL_OP1_103_36584_20120201_230611_outLine +BABEL_OP1_103_36748_20120121_230812_inLine +BABEL_OP1_103_36748_20120121_230812_outLine +BABEL_OP1_103_36962_20120810_005828_inLine +BABEL_OP1_103_36962_20120810_005828_outLine +BABEL_OP1_103_37131_20120522_165130_inLine +BABEL_OP1_103_37131_20120522_165130_outLine +BABEL_OP1_103_37551_20111229_232422_inLine +BABEL_OP1_103_37551_20111229_232422_outLine +BABEL_OP1_103_37604_20120122_203335_inLine +BABEL_OP1_103_37604_20120122_203335_outLine +BABEL_OP1_103_37687_20120124_220825_inLine +BABEL_OP1_103_37687_20120124_220826_outLine +BABEL_OP1_103_38163_20120202_001843_inLine +BABEL_OP1_103_38163_20120202_001843_outLine +BABEL_OP1_103_38573_20120120_234500_inLine +BABEL_OP1_103_38573_20120120_234500_outLine +BABEL_OP1_103_38573_20120121_000745_inLine +BABEL_OP1_103_38573_20120121_000745_outLine +BABEL_OP1_103_38588_20120522_215415_inLine +BABEL_OP1_103_38588_20120522_215415_outLine +BABEL_OP1_103_39119_20120608_004832_inLine +BABEL_OP1_103_39119_20120608_004832_outLine +BABEL_OP1_103_39320_20120207_022344_inLine +BABEL_OP1_103_39320_20120207_022344_outLine +BABEL_OP1_103_39769_20120127_213455_inLine +BABEL_OP1_103_39769_20120127_213455_outLine +BABEL_OP1_103_40410_20120124_204758_inLine +BABEL_OP1_103_40410_20120124_204758_outLine +BABEL_OP1_103_40442_20120202_174431_inLine +BABEL_OP1_103_40442_20120202_174431_outLine +BABEL_OP1_103_40889_20120206_221100_inLine +BABEL_OP1_103_40889_20120206_221100_outLine +BABEL_OP1_103_41144_20120118_222314_inLine +BABEL_OP1_103_41144_20120118_222314_outLine +BABEL_OP1_103_41172_20120114_134829_inLine +BABEL_OP1_103_41172_20120114_134829_outLine +BABEL_OP1_103_41197_20120805_155112_inLine +BABEL_OP1_103_41197_20120805_155112_outLine +BABEL_OP1_103_41498_20120118_023411_inLine +BABEL_OP1_103_41498_20120118_023411_outLine +BABEL_OP1_103_42332_20120126_191134_inLine +BABEL_OP1_103_42332_20120126_191134_outLine +BABEL_OP1_103_42332_20120126_192035_inLine +BABEL_OP1_103_42332_20120126_192035_outLine +BABEL_OP1_103_42651_20120122_000902_inLine +BABEL_OP1_103_42651_20120122_000902_outLine +BABEL_OP1_103_42698_20120123_230900_inLine +BABEL_OP1_103_42698_20120123_230900_outLine +BABEL_OP1_103_42742_20120123_232130_inLine +BABEL_OP1_103_42742_20120123_232130_outLine +BABEL_OP1_103_42790_20120129_205024_inLine +BABEL_OP1_103_42790_20120129_205025_outLine +BABEL_OP1_103_42986_20120125_204035_inLine +BABEL_OP1_103_42986_20120125_204035_outLine +BABEL_OP1_103_43442_20120120_033602_inLine +BABEL_OP1_103_43442_20120120_033602_outLine +BABEL_OP1_103_43571_20111226_210759_inLine +BABEL_OP1_103_43571_20111226_210759_outLine +BABEL_OP1_103_43812_20120124_005515_inLine +BABEL_OP1_103_43812_20120124_005515_outLine +BABEL_OP1_103_43959_20120125_223215_inLine +BABEL_OP1_103_43959_20120125_223215_outLine +BABEL_OP1_103_43974_20120110_164058_inLine +BABEL_OP1_103_43974_20120110_164058_outLine +BABEL_OP1_103_44192_20120523_184414_inLine +BABEL_OP1_103_44192_20120523_184414_outLine +BABEL_OP1_103_44838_20111229_014707_inLine +BABEL_OP1_103_44838_20111229_014707_outLine +BABEL_OP1_103_44948_20120203_011011_inLine +BABEL_OP1_103_44948_20120203_011011_outLine +BABEL_OP1_103_44967_20120207_025756_inLine +BABEL_OP1_103_44967_20120207_025756_outLine +BABEL_OP1_103_45020_20120522_170055_inLine +BABEL_OP1_103_45020_20120522_170055_outLine +BABEL_OP1_103_45029_20120608_010540_inLine +BABEL_OP1_103_45029_20120608_010540_outLine +BABEL_OP1_103_45565_20120125_220956_inLine +BABEL_OP1_103_45565_20120125_220956_outLine +BABEL_OP1_103_45601_20120201_181124_inLine +BABEL_OP1_103_45601_20120201_181124_outLine +BABEL_OP1_103_45763_20120116_175349_inLine +BABEL_OP1_103_45763_20120116_175349_outLine +BABEL_OP1_103_46197_20120524_220246_inLine +BABEL_OP1_103_46197_20120524_220246_outLine +BABEL_OP1_103_46460_20120530_183725_inLine +BABEL_OP1_103_46460_20120530_183725_outLine +BABEL_OP1_103_46460_20120530_185105_inLine +BABEL_OP1_103_46460_20120530_185106_outLine +BABEL_OP1_103_46548_20120517_192114_inLine +BABEL_OP1_103_46548_20120517_192114_outLine +BABEL_OP1_103_46862_20120124_195804_inLine +BABEL_OP1_103_46862_20120124_195804_outLine +BABEL_OP1_103_46862_20120204_203651_inLine +BABEL_OP1_103_46862_20120204_203651_outLine +BABEL_OP1_103_46887_20120202_214319_inLine +BABEL_OP1_103_46887_20120202_214320_outLine +BABEL_OP1_103_46900_20120204_225820_inLine +BABEL_OP1_103_46900_20120204_225820_outLine +BABEL_OP1_103_47151_20111229_233253_inLine +BABEL_OP1_103_47151_20111229_233253_outLine +BABEL_OP1_103_47177_20120127_201638_inLine +BABEL_OP1_103_47177_20120127_201638_outLine +BABEL_OP1_103_47416_20120729_181025_inLine +BABEL_OP1_103_47416_20120729_181025_outLine +BABEL_OP1_103_47424_20111231_203241_inLine +BABEL_OP1_103_47424_20111231_203241_outLine +BABEL_OP1_103_47574_20120207_034724_inLine +BABEL_OP1_103_47574_20120207_034724_outLine +BABEL_OP1_103_48176_20120206_023101_inLine +BABEL_OP1_103_48176_20120206_023101_outLine +BABEL_OP1_103_48259_20120116_022438_inLine +BABEL_OP1_103_48259_20120116_022438_outLine +BABEL_OP1_103_48518_20120121_195050_inLine +BABEL_OP1_103_48518_20120121_195050_outLine +BABEL_OP1_103_49175_20120206_214803_inLine +BABEL_OP1_103_49175_20120206_214803_outLine +BABEL_OP1_103_49520_20120523_172707_inLine +BABEL_OP1_103_49520_20120523_172707_outLine +BABEL_OP1_103_49629_20120104_040004_inLine +BABEL_OP1_103_49629_20120104_040004_outLine +BABEL_OP1_103_49755_20120110_010410_inLine +BABEL_OP1_103_49755_20120110_010410_outLine +BABEL_OP1_103_49819_20120127_012212_inLine +BABEL_OP1_103_49819_20120127_012212_outLine +BABEL_OP1_103_50492_20120123_211938_inLine +BABEL_OP1_103_50492_20120123_211938_outLine +BABEL_OP1_103_50523_20120607_185125_inLine +BABEL_OP1_103_50523_20120607_185126_outLine +BABEL_OP1_103_50798_20120131_022954_inLine +BABEL_OP1_103_50798_20120131_022954_outLine +BABEL_OP1_103_51243_20120201_200604_inLine +BABEL_OP1_103_51243_20120201_200604_outLine +BABEL_OP1_103_52122_20120207_025756_inLine +BABEL_OP1_103_52122_20120207_025756_outLine +BABEL_OP1_103_52604_20120131_233302_inLine +BABEL_OP1_103_52604_20120131_233302_outLine +BABEL_OP1_103_52753_20120521_000301_inLine +BABEL_OP1_103_52753_20120521_001422_inLine +BABEL_OP1_103_53067_20120127_225851_inLine +BABEL_OP1_103_53067_20120127_225851_outLine +BABEL_OP1_103_53262_20120204_194912_inLine +BABEL_OP1_103_53262_20120204_194912_outLine +BABEL_OP1_103_53346_20120128_214441_inLine +BABEL_OP1_103_53346_20120128_214441_outLine +BABEL_OP1_103_53636_20120127_000358_inLine +BABEL_OP1_103_53636_20120127_000358_outLine +BABEL_OP1_103_54030_20111230_220440_inLine +BABEL_OP1_103_54030_20111230_220440_outLine +BABEL_OP1_103_54263_20120206_225348_inLine +BABEL_OP1_103_54263_20120206_225349_outLine +BABEL_OP1_103_54417_20120522_172155_inLine +BABEL_OP1_103_54417_20120522_172155_outLine +BABEL_OP1_103_54606_20120205_175853_inLine +BABEL_OP1_103_54606_20120205_175853_outLine +BABEL_OP1_103_54975_20120207_015749_inLine +BABEL_OP1_103_54975_20120207_015749_outLine +BABEL_OP1_103_54991_20120206_003607_inLine +BABEL_OP1_103_54991_20120206_003607_outLine +BABEL_OP1_103_55166_20120119_180058_inLine +BABEL_OP1_103_55166_20120119_180058_outLine +BABEL_OP1_103_55194_20120529_215243_inLine +BABEL_OP1_103_55194_20120529_215243_outLine +BABEL_OP1_103_55316_20111226_180557_inLine +BABEL_OP1_103_55316_20111226_180557_outLine +BABEL_OP1_103_56704_20120606_171759_inLine +BABEL_OP1_103_56704_20120606_171759_outLine +BABEL_OP1_103_57092_20111227_044400_inLine +BABEL_OP1_103_57092_20111227_044400_outLine +BABEL_OP1_103_57232_20120126_020104_inLine +BABEL_OP1_103_57232_20120126_020104_outLine +BABEL_OP1_103_57351_20120612_182248_inLine +BABEL_OP1_103_57351_20120612_182248_outLine +BABEL_OP1_103_58283_20111231_230840_inLine +BABEL_OP1_103_58283_20111231_230840_outLine +BABEL_OP1_103_58925_20120113_212456_inLine +BABEL_OP1_103_58925_20120113_212456_outLine +BABEL_OP1_103_58925_20120113_214350_inLine +BABEL_OP1_103_58925_20120113_214350_outLine +BABEL_OP1_103_59482_20120612_190437_inLine +BABEL_OP1_103_59482_20120612_190437_outLine +BABEL_OP1_103_59558_20120121_234224_inLine +BABEL_OP1_103_59558_20120121_234224_outLine +BABEL_OP1_103_60524_20120109_213755_inLine +BABEL_OP1_103_60524_20120109_213755_outLine +BABEL_OP1_103_60571_20111228_183342_inLine +BABEL_OP1_103_60571_20111228_183342_outLine +BABEL_OP1_103_60806_20120117_233630_inLine +BABEL_OP1_103_60806_20120117_233630_outLine +BABEL_OP1_103_61229_20120616_151341_inLine +BABEL_OP1_103_61229_20120616_151341_outLine +BABEL_OP1_103_61558_20120106_205412_inLine +BABEL_OP1_103_61558_20120106_205412_outLine +BABEL_OP1_103_61592_20120125_225752_inLine +BABEL_OP1_103_61592_20120125_225752_outLine +BABEL_OP1_103_61629_20120127_192849_inLine +BABEL_OP1_103_61629_20120127_192849_outLine +BABEL_OP1_103_61733_20120201_183457_inLine +BABEL_OP1_103_61733_20120201_183457_outLine +BABEL_OP1_103_62097_20120307_164325_inLine +BABEL_OP1_103_62097_20120307_164325_outLine +BABEL_OP1_103_62182_20111231_003944_inLine +BABEL_OP1_103_62182_20111231_003944_outLine +BABEL_OP1_103_62222_20120122_201756_inLine +BABEL_OP1_103_62222_20120122_201756_outLine +BABEL_OP1_103_62479_20120306_025702_inLine +BABEL_OP1_103_62479_20120306_025702_outLine +BABEL_OP1_103_62558_20120124_220850_inLine +BABEL_OP1_103_62558_20120124_220850_outLine +BABEL_OP1_103_62652_20120306_015948_inLine +BABEL_OP1_103_62652_20120306_015948_outLine +BABEL_OP1_103_62720_20120308_164432_inLine +BABEL_OP1_103_62720_20120308_164432_outLine +BABEL_OP1_103_62720_20120308_165706_inLine +BABEL_OP1_103_62720_20120308_165706_outLine +BABEL_OP1_103_62843_20120310_235523_inLine +BABEL_OP1_103_62843_20120310_235523_outLine +BABEL_OP1_103_63127_20120311_184714_inLine +BABEL_OP1_103_63127_20120311_184714_outLine +BABEL_OP1_103_63129_20120311_193438_inLine +BABEL_OP1_103_63129_20120311_193438_outLine +BABEL_OP1_103_63194_20120312_010359_inLine +BABEL_OP1_103_63194_20120312_010359_outLine +BABEL_OP1_103_63215_20120513_191621_inLine +BABEL_OP1_103_63215_20120513_191621_outLine +BABEL_OP1_103_63240_20120312_021342_inLine +BABEL_OP1_103_63240_20120312_021342_outLine +BABEL_OP1_103_63373_20120315_025205_inLine +BABEL_OP1_103_63373_20120315_025205_outLine +BABEL_OP1_103_63384_20120315_031012_inLine +BABEL_OP1_103_63384_20120315_031012_outLine +BABEL_OP1_103_63422_20120315_034640_inLine +BABEL_OP1_103_63422_20120315_034640_outLine +BABEL_OP1_103_63510_20120318_221426_inLine +BABEL_OP1_103_63510_20120318_221426_outLine +BABEL_OP1_103_63680_20120319_214759_inLine +BABEL_OP1_103_63680_20120319_214759_outLine +BABEL_OP1_103_63687_20120320_181655_inLine +BABEL_OP1_103_63687_20120320_181655_outLine +BABEL_OP1_103_63923_20120320_172933_inLine +BABEL_OP1_103_63923_20120320_172933_outLine +BABEL_OP1_103_63929_20120123_192325_inLine +BABEL_OP1_103_63929_20120123_192325_outLine +BABEL_OP1_103_63950_20120320_184409_inLine +BABEL_OP1_103_63950_20120320_184409_outLine +BABEL_OP1_103_64039_20120320_215418_inLine +BABEL_OP1_103_64039_20120320_215418_outLine +BABEL_OP1_103_64145_20120404_204905_inLine +BABEL_OP1_103_64145_20120404_204905_outLine +BABEL_OP1_103_64153_20120403_180645_inLine +BABEL_OP1_103_64153_20120403_180645_outLine +BABEL_OP1_103_64177_20120404_212051_inLine +BABEL_OP1_103_64177_20120404_212051_outLine +BABEL_OP1_103_64231_20120310_224637_inLine +BABEL_OP1_103_64231_20120310_224637_outLine +BABEL_OP1_103_64610_20120125_223001_inLine +BABEL_OP1_103_64610_20120125_223001_outLine +BABEL_OP1_103_65512_20111229_045507_inLine +BABEL_OP1_103_65512_20111229_045507_outLine +BABEL_OP1_103_65818_20120127_011907_inLine +BABEL_OP1_103_65818_20120127_011907_outLine +BABEL_OP1_103_65954_20120205_190321_inLine +BABEL_OP1_103_65954_20120205_190321_outLine +BABEL_OP1_103_65991_20120229_215906_inLine +BABEL_OP1_103_65991_20120229_215906_outLine +BABEL_OP1_103_66005_20120229_221845_inLine +BABEL_OP1_103_66005_20120229_221845_outLine +BABEL_OP1_103_66048_20120229_225251_inLine +BABEL_OP1_103_66048_20120229_225251_outLine +BABEL_OP1_103_66287_20120108_191621_inLine +BABEL_OP1_103_66287_20120108_191621_outLine +BABEL_OP1_103_66309_20120229_232503_inLine +BABEL_OP1_103_66309_20120229_232503_outLine +BABEL_OP1_103_66659_20120229_235042_inLine +BABEL_OP1_103_66659_20120229_235042_outLine +BABEL_OP1_103_66719_20120116_002436_inLine +BABEL_OP1_103_66719_20120116_002436_outLine +BABEL_OP1_103_66813_20120127_151237_inLine +BABEL_OP1_103_66813_20120127_151237_outLine +BABEL_OP1_103_67001_20120305_223711_inLine +BABEL_OP1_103_67001_20120305_223711_outLine +BABEL_OP1_103_67288_20120305_233501_inLine +BABEL_OP1_103_67288_20120305_233501_outLine +BABEL_OP1_103_67358_20120128_224934_inLine +BABEL_OP1_103_67358_20120128_224934_outLine +BABEL_OP1_103_67484_20120306_212801_inLine +BABEL_OP1_103_67484_20120306_212801_outLine +BABEL_OP1_103_67604_20120306_201231_inLine +BABEL_OP1_103_67604_20120306_201231_outLine +BABEL_OP1_103_67685_20120118_163939_inLine +BABEL_OP1_103_67685_20120118_163939_outLine +BABEL_OP1_103_67814_20120522_200114_inLine +BABEL_OP1_103_67814_20120522_200114_outLine +BABEL_OP1_103_67824_20120116_000148_inLine +BABEL_OP1_103_67824_20120116_000148_outLine +BABEL_OP1_103_68144_20120201_183136_inLine +BABEL_OP1_103_68144_20120201_183136_outLine +BABEL_OP1_103_68602_20120729_174819_inLine +BABEL_OP1_103_68602_20120729_174819_outLine +BABEL_OP1_103_68811_20120531_155031_inLine +BABEL_OP1_103_68811_20120531_155031_outLine +BABEL_OP1_103_69771_20120118_183315_inLine +BABEL_OP1_103_69771_20120118_183315_outLine +BABEL_OP1_103_69969_20120309_020612_inLine +BABEL_OP1_103_69969_20120309_020612_outLine +BABEL_OP1_103_69990_20120305_153850_inLine +BABEL_OP1_103_69990_20120305_153850_outLine +BABEL_OP1_103_70200_20120311_000406_inLine +BABEL_OP1_103_70200_20120311_000406_outLine +BABEL_OP1_103_70442_20111231_223721_inLine +BABEL_OP1_103_70442_20111231_223721_outLine +BABEL_OP1_103_70476_20120117_202957_inLine +BABEL_OP1_103_70476_20120117_202957_outLine +BABEL_OP1_103_70476_20120117_204242_inLine +BABEL_OP1_103_70476_20120117_204242_outLine +BABEL_OP1_103_70484_20120524_210819_inLine +BABEL_OP1_103_70484_20120524_210819_outLine +BABEL_OP1_103_70651_20120131_034337_inLine +BABEL_OP1_103_70651_20120131_034337_outLine +BABEL_OP1_103_70762_20111230_015835_inLine +BABEL_OP1_103_70762_20111230_015835_outLine +BABEL_OP1_103_70858_20120201_191031_inLine +BABEL_OP1_103_70858_20120201_191031_outLine +BABEL_OP1_103_70897_20120118_020506_inLine +BABEL_OP1_103_70897_20120118_020506_outLine +BABEL_OP1_103_70919_20120202_170934_inLine +BABEL_OP1_103_70919_20120202_170934_outLine +BABEL_OP1_103_71215_20120207_001204_inLine +BABEL_OP1_103_71215_20120207_001204_outLine +BABEL_OP1_103_71293_20120101_212224_inLine +BABEL_OP1_103_71293_20120101_212224_outLine +BABEL_OP1_103_71450_20120514_181620_inLine +BABEL_OP1_103_71450_20120514_181621_outLine +BABEL_OP1_103_71666_20120514_223534_inLine +BABEL_OP1_103_71666_20120514_223534_outLine +BABEL_OP1_103_71691_20120109_034006_inLine +BABEL_OP1_103_71691_20120109_034007_outLine +BABEL_OP1_103_72176_20111226_224243_inLine +BABEL_OP1_103_72176_20111226_224243_outLine +BABEL_OP1_103_72179_20120511_023300_inLine +BABEL_OP1_103_72179_20120511_023300_outLine +BABEL_OP1_103_72709_20120204_231928_inLine +BABEL_OP1_103_72709_20120204_231928_outLine +BABEL_OP1_103_72714_20120126_001354_inLine +BABEL_OP1_103_72714_20120126_001354_outLine +BABEL_OP1_103_73264_20111228_184038_inLine +BABEL_OP1_103_73264_20111228_184038_outLine +BABEL_OP1_103_73881_20120120_041629_inLine +BABEL_OP1_103_73881_20120120_041629_outLine +BABEL_OP1_103_74188_20120522_172823_inLine +BABEL_OP1_103_74188_20120522_172823_outLine +BABEL_OP1_103_74334_20120102_033902_inLine +BABEL_OP1_103_74334_20120102_033902_outLine +BABEL_OP1_103_75402_20120120_190246_inLine +BABEL_OP1_103_75402_20120120_190246_outLine +BABEL_OP1_103_75797_20120125_192735_inLine +BABEL_OP1_103_75797_20120125_192735_outLine +BABEL_OP1_103_76069_20120608_031447_inLine +BABEL_OP1_103_76069_20120608_031447_outLine +BABEL_OP1_103_76276_20120114_191208_inLine +BABEL_OP1_103_76276_20120114_191208_outLine +BABEL_OP1_103_76347_20120601_011206_inLine +BABEL_OP1_103_76347_20120601_011206_outLine +BABEL_OP1_103_77097_20120109_024625_inLine +BABEL_OP1_103_77097_20120109_024625_outLine +BABEL_OP1_103_77737_20111230_143637_inLine +BABEL_OP1_103_77737_20111230_143637_outLine +BABEL_OP1_103_78722_20120126_234318_inLine +BABEL_OP1_103_78722_20120126_234318_outLine +BABEL_OP1_103_79127_20120205_215208_inLine +BABEL_OP1_103_79127_20120205_215208_outLine +BABEL_OP1_103_79788_20120201_222512_inLine +BABEL_OP1_103_79788_20120201_222512_outLine +BABEL_OP1_103_79803_20120730_020433_inLine +BABEL_OP1_103_79803_20120730_020433_outLine +BABEL_OP1_103_79857_20120111_205043_inLine +BABEL_OP1_103_79857_20120111_205043_outLine +BABEL_OP1_103_79901_20120202_193650_inLine +BABEL_OP1_103_79901_20120202_194746_inLine +BABEL_OP1_103_80118_20120126_010553_inLine +BABEL_OP1_103_80118_20120126_010553_outLine +BABEL_OP1_103_80183_20120513_182754_inLine +BABEL_OP1_103_80183_20120513_182754_outLine +BABEL_OP1_103_80313_20120106_200706_inLine +BABEL_OP1_103_80313_20120106_200706_outLine +BABEL_OP1_103_80319_20120120_231835_inLine +BABEL_OP1_103_80319_20120120_231835_outLine +BABEL_OP1_103_80943_20120125_185437_inLine +BABEL_OP1_103_80943_20120125_185437_outLine +BABEL_OP1_103_81800_20120531_180959_inLine +BABEL_OP1_103_81800_20120531_180959_outLine +BABEL_OP1_103_81800_20120531_182855_inLine +BABEL_OP1_103_81800_20120531_182855_outLine +BABEL_OP1_103_82094_20120522_225233_inLine +BABEL_OP1_103_82094_20120522_225233_outLine +BABEL_OP1_103_82135_20120117_213149_inLine +BABEL_OP1_103_82135_20120117_213149_outLine +BABEL_OP1_103_83819_20120125_193543_inLine +BABEL_OP1_103_83819_20120125_193543_outLine +BABEL_OP1_103_83835_20111231_193822_inLine +BABEL_OP1_103_83835_20111231_193822_outLine +BABEL_OP1_103_84654_20120515_201204_inLine +BABEL_OP1_103_84654_20120515_201204_outLine +BABEL_OP1_103_84754_20120523_180347_inLine +BABEL_OP1_103_84754_20120523_180347_outLine +BABEL_OP1_103_84854_20120205_001920_inLine +BABEL_OP1_103_84854_20120205_001920_outLine +BABEL_OP1_103_84985_20120105_205509_inLine +BABEL_OP1_103_84985_20120105_205509_outLine +BABEL_OP1_103_85457_20120521_204532_inLine +BABEL_OP1_103_85457_20120521_204532_outLine +BABEL_OP1_103_85577_20120729_215558_inLine +BABEL_OP1_103_85577_20120729_215558_outLine +BABEL_OP1_103_85730_20120116_233350_inLine +BABEL_OP1_103_85730_20120116_233350_outLine +BABEL_OP1_103_85764_20120129_192217_inLine +BABEL_OP1_103_85764_20120129_192217_outLine +BABEL_OP1_103_85897_20120120_171153_inLine +BABEL_OP1_103_85897_20120120_171153_outLine +BABEL_OP1_103_86537_20120511_195620_inLine +BABEL_OP1_103_86537_20120511_195620_outLine +BABEL_OP1_103_86614_20120521_220136_inLine +BABEL_OP1_103_86614_20120521_220136_outLine +BABEL_OP1_103_86680_20120105_191615_inLine +BABEL_OP1_103_86680_20120105_191615_outLine +BABEL_OP1_103_87453_20120515_170718_inLine +BABEL_OP1_103_87453_20120515_170718_outLine +BABEL_OP1_103_87677_20120121_224149_inLine +BABEL_OP1_103_87677_20120121_224149_outLine +BABEL_OP1_103_87723_20120518_211143_inLine +BABEL_OP1_103_87723_20120518_211143_outLine +BABEL_OP1_103_88604_20120206_014323_inLine +BABEL_OP1_103_88604_20120206_014323_outLine +BABEL_OP1_103_88604_20120206_015628_inLine +BABEL_OP1_103_88604_20120206_015628_outLine +BABEL_OP1_103_88677_20120112_032502_inLine +BABEL_OP1_103_88677_20120112_032502_outLine +BABEL_OP1_103_89464_20120205_204528_inLine +BABEL_OP1_103_89464_20120205_204528_outLine +BABEL_OP1_103_89702_20120109_021228_inLine +BABEL_OP1_103_89702_20120109_021228_outLine +BABEL_OP1_103_90041_20120201_190104_inLine +BABEL_OP1_103_90041_20120201_190104_outLine +BABEL_OP1_103_90129_20120126_221744_inLine +BABEL_OP1_103_90129_20120126_221744_outLine +BABEL_OP1_103_90641_20120102_212610_inLine +BABEL_OP1_103_90641_20120102_212610_outLine +BABEL_OP1_103_90882_20120530_230837_inLine +BABEL_OP1_103_90882_20120530_230837_outLine +BABEL_OP1_103_91161_20111229_202627_inLine +BABEL_OP1_103_91161_20111229_202627_outLine +BABEL_OP1_103_91372_20120115_023342_inLine +BABEL_OP1_103_91372_20120115_023342_outLine +BABEL_OP1_103_92722_20120512_132612_inLine +BABEL_OP1_103_92722_20120512_132612_outLine +BABEL_OP1_103_92793_20111229_200332_inLine +BABEL_OP1_103_92793_20111229_200332_outLine +BABEL_OP1_103_92910_20120205_195736_inLine +BABEL_OP1_103_92910_20120205_195736_outLine +BABEL_OP1_103_93026_20111228_235326_inLine +BABEL_OP1_103_93026_20111228_235326_outLine +BABEL_OP1_103_93358_20120107_025421_inLine +BABEL_OP1_103_93358_20120107_025421_outLine +BABEL_OP1_103_93742_20120529_184600_inLine +BABEL_OP1_103_93742_20120529_184600_outLine +BABEL_OP1_103_93907_20111228_051458_inLine +BABEL_OP1_103_93907_20111228_051458_outLine +BABEL_OP1_103_94572_20120131_224123_inLine +BABEL_OP1_103_94572_20120131_224123_outLine +BABEL_OP1_103_94793_20120102_034406_inLine +BABEL_OP1_103_94793_20120102_034406_outLine +BABEL_OP1_103_95349_20111229_201011_inLine +BABEL_OP1_103_95349_20111229_201011_outLine +BABEL_OP1_103_95349_20111229_225436_inLine +BABEL_OP1_103_95349_20111229_225436_outLine +BABEL_OP1_103_95360_20120206_204731_inLine +BABEL_OP1_103_95360_20120206_204731_outLine +BABEL_OP1_103_96186_20120128_212837_inLine +BABEL_OP1_103_96186_20120128_212837_outLine +BABEL_OP1_103_96537_20120729_165831_inLine +BABEL_OP1_103_96537_20120729_165831_outLine +BABEL_OP1_103_96690_20120131_213344_inLine +BABEL_OP1_103_96690_20120131_213344_outLine +BABEL_OP1_103_97679_20111229_191138_inLine +BABEL_OP1_103_97679_20111229_191138_outLine +BABEL_OP1_103_97971_20120111_020458_inLine +BABEL_OP1_103_97971_20120111_020459_outLine +BABEL_OP1_103_98331_20120131_213958_inLine +BABEL_OP1_103_98331_20120131_213958_outLine +BABEL_OP1_103_98446_20120101_215857_inLine +BABEL_OP1_103_98446_20120101_215857_outLine +BABEL_OP1_103_99093_20120514_161939_inLine +BABEL_OP1_103_99093_20120514_161939_outLine +BABEL_OP1_103_99510_20120515_175659_inLine +BABEL_OP1_103_99510_20120515_175659_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list new file mode 100644 index 00000000000..4d5c081b1c2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list @@ -0,0 +1,124 @@ +BABEL_OP1_103_10193_20111229_035249_inLine +BABEL_OP1_103_10193_20111229_035249_outLine +BABEL_OP1_103_10612_20111222_210911_inLine +BABEL_OP1_103_10612_20111222_210911_outLine +BABEL_OP1_103_11128_20120124_200626_inLine +BABEL_OP1_103_11128_20120124_200626_outLine +BABEL_OP1_103_12639_20111229_015021_inLine +BABEL_OP1_103_12639_20111229_015021_outLine +BABEL_OP1_103_12786_20111230_012748_inLine +BABEL_OP1_103_12786_20111230_012749_outLine +BABEL_OP1_103_14554_20120120_230548_inLine +BABEL_OP1_103_14554_20120120_230548_outLine +BABEL_OP1_103_16416_20120205_011943_inLine +BABEL_OP1_103_16416_20120205_011943_outLine +BABEL_OP1_103_19280_20120515_173629_inLine +BABEL_OP1_103_19280_20120515_173630_outLine +BABEL_OP1_103_22134_20120204_185956_inLine +BABEL_OP1_103_22134_20120204_185956_outLine +BABEL_OP1_103_22134_20120204_191024_inLine +BABEL_OP1_103_22134_20120204_191024_outLine +BABEL_OP1_103_22697_20120123_213617_inLine +BABEL_OP1_103_22697_20120123_213617_outLine +BABEL_OP1_103_30620_20111231_181228_inLine +BABEL_OP1_103_30620_20111231_181228_outLine +BABEL_OP1_103_30810_20111227_000227_inLine +BABEL_OP1_103_30810_20111227_000227_outLine +BABEL_OP1_103_32040_20120122_181109_inLine +BABEL_OP1_103_32040_20120122_181109_outLine +BABEL_OP1_103_36584_20120201_230611_inLine +BABEL_OP1_103_36584_20120201_230611_outLine +BABEL_OP1_103_38163_20120202_001843_inLine +BABEL_OP1_103_38163_20120202_001843_outLine +BABEL_OP1_103_39119_20120608_004832_inLine +BABEL_OP1_103_39119_20120608_004832_outLine +BABEL_OP1_103_41144_20120118_222314_inLine +BABEL_OP1_103_41144_20120118_222314_outLine +BABEL_OP1_103_41197_20120805_155112_inLine +BABEL_OP1_103_41197_20120805_155112_outLine +BABEL_OP1_103_41498_20120118_023411_inLine +BABEL_OP1_103_41498_20120118_023411_outLine +BABEL_OP1_103_42742_20120123_232130_inLine +BABEL_OP1_103_42742_20120123_232130_outLine +BABEL_OP1_103_43974_20120110_164058_inLine +BABEL_OP1_103_43974_20120110_164058_outLine +BABEL_OP1_103_44192_20120523_184414_inLine +BABEL_OP1_103_44192_20120523_184414_outLine +BABEL_OP1_103_45601_20120201_181124_inLine +BABEL_OP1_103_45601_20120201_181124_outLine +BABEL_OP1_103_45763_20120116_175349_inLine +BABEL_OP1_103_45763_20120116_175349_outLine +BABEL_OP1_103_46548_20120517_192114_inLine +BABEL_OP1_103_46548_20120517_192114_outLine +BABEL_OP1_103_46887_20120202_214319_inLine +BABEL_OP1_103_46887_20120202_214320_outLine +BABEL_OP1_103_46900_20120204_225820_inLine +BABEL_OP1_103_46900_20120204_225820_outLine +BABEL_OP1_103_48518_20120121_195050_inLine +BABEL_OP1_103_48518_20120121_195050_outLine +BABEL_OP1_103_52604_20120131_233302_inLine +BABEL_OP1_103_52604_20120131_233302_outLine +BABEL_OP1_103_54606_20120205_175853_inLine +BABEL_OP1_103_54606_20120205_175853_outLine +BABEL_OP1_103_55316_20111226_180557_inLine +BABEL_OP1_103_55316_20111226_180557_outLine +BABEL_OP1_103_57232_20120126_020104_inLine +BABEL_OP1_103_57232_20120126_020104_outLine +BABEL_OP1_103_59558_20120121_234224_inLine +BABEL_OP1_103_59558_20120121_234224_outLine +BABEL_OP1_103_60571_20111228_183342_inLine +BABEL_OP1_103_60571_20111228_183342_outLine +BABEL_OP1_103_63422_20120315_034640_inLine +BABEL_OP1_103_63422_20120315_034640_outLine +BABEL_OP1_103_63950_20120320_184409_inLine +BABEL_OP1_103_63950_20120320_184409_outLine +BABEL_OP1_103_64153_20120403_180645_inLine +BABEL_OP1_103_64153_20120403_180645_outLine +BABEL_OP1_103_66659_20120229_235042_inLine +BABEL_OP1_103_66659_20120229_235042_outLine +BABEL_OP1_103_67604_20120306_201231_inLine +BABEL_OP1_103_67604_20120306_201231_outLine +BABEL_OP1_103_68144_20120201_183136_inLine +BABEL_OP1_103_68144_20120201_183136_outLine +BABEL_OP1_103_69771_20120118_183315_inLine +BABEL_OP1_103_69771_20120118_183315_outLine +BABEL_OP1_103_70442_20111231_223721_inLine +BABEL_OP1_103_70442_20111231_223721_outLine +BABEL_OP1_103_70484_20120524_210819_inLine +BABEL_OP1_103_70484_20120524_210819_outLine +BABEL_OP1_103_72176_20111226_224243_inLine +BABEL_OP1_103_72176_20111226_224243_outLine +BABEL_OP1_103_75402_20120120_190246_inLine +BABEL_OP1_103_75402_20120120_190246_outLine +BABEL_OP1_103_76069_20120608_031447_inLine +BABEL_OP1_103_76069_20120608_031447_outLine +BABEL_OP1_103_76347_20120601_011206_inLine +BABEL_OP1_103_76347_20120601_011206_outLine +BABEL_OP1_103_77737_20111230_143637_inLine +BABEL_OP1_103_77737_20111230_143637_outLine +BABEL_OP1_103_80319_20120120_231835_inLine +BABEL_OP1_103_80319_20120120_231835_outLine +BABEL_OP1_103_84754_20120523_180347_inLine +BABEL_OP1_103_84754_20120523_180347_outLine +BABEL_OP1_103_84985_20120105_205509_inLine +BABEL_OP1_103_84985_20120105_205509_outLine +BABEL_OP1_103_85897_20120120_171153_inLine +BABEL_OP1_103_85897_20120120_171153_outLine +BABEL_OP1_103_87723_20120518_211143_inLine +BABEL_OP1_103_87723_20120518_211143_outLine +BABEL_OP1_103_88604_20120206_014323_inLine +BABEL_OP1_103_88604_20120206_014323_outLine +BABEL_OP1_103_88604_20120206_015628_inLine +BABEL_OP1_103_88604_20120206_015628_outLine +BABEL_OP1_103_90041_20120201_190104_inLine +BABEL_OP1_103_90041_20120201_190104_outLine +BABEL_OP1_103_90129_20120126_221744_inLine +BABEL_OP1_103_90129_20120126_221744_outLine +BABEL_OP1_103_93742_20120529_184600_inLine +BABEL_OP1_103_93742_20120529_184600_outLine +BABEL_OP1_103_94572_20120131_224123_inLine +BABEL_OP1_103_94572_20120131_224123_outLine +BABEL_OP1_103_95360_20120206_204731_inLine +BABEL_OP1_103_95360_20120206_204731_outLine +BABEL_OP1_103_96186_20120128_212837_inLine +BABEL_OP1_103_96186_20120128_212837_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..0b2264097e0 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list @@ -0,0 +1,627 @@ +BABEL_OP1_103_10301_20111220_225237_inLine +BABEL_OP1_103_10301_20111220_225237_outLine +BABEL_OP1_103_10305_20111220_231100_inLine +BABEL_OP1_103_10305_20111220_231100_outLine +BABEL_OP1_103_10348_20120113_213854_inLine +BABEL_OP1_103_10348_20120113_213854_outLine +BABEL_OP1_103_10531_20120118_042000_inLine +BABEL_OP1_103_10531_20120118_042000_outLine +BABEL_OP1_103_10556_20111221_000031_inLine +BABEL_OP1_103_10556_20111221_000031_outLine +BABEL_OP1_103_10806_20111226_181132_inLine +BABEL_OP1_103_10806_20111226_181132_outLine +BABEL_OP1_103_11155_20111230_211626_inLine +BABEL_OP1_103_11155_20111230_211626_outLine +BABEL_OP1_103_11442_20120125_025606_inLine +BABEL_OP1_103_11442_20120125_025606_outLine +BABEL_OP1_103_12518_20111227_181020_inLine +BABEL_OP1_103_12518_20111227_181021_outLine +BABEL_OP1_103_12682_20120125_201902_inLine +BABEL_OP1_103_12682_20120125_201902_outLine +BABEL_OP1_103_12682_20120125_210238_inLine +BABEL_OP1_103_12682_20120125_210238_outLine +BABEL_OP1_103_12719_20120203_035027_inLine +BABEL_OP1_103_12719_20120203_035027_outLine +BABEL_OP1_103_12809_20111229_175926_inLine +BABEL_OP1_103_12809_20111229_175926_outLine +BABEL_OP1_103_12843_20120117_224043_inLine +BABEL_OP1_103_12843_20120117_224043_outLine +BABEL_OP1_103_13024_20111229_010356_inLine +BABEL_OP1_103_13024_20111229_010357_outLine +BABEL_OP1_103_13295_20120522_232550_inLine +BABEL_OP1_103_13295_20120522_232550_outLine +BABEL_OP1_103_13615_20120113_174612_inLine +BABEL_OP1_103_13615_20120113_174612_outLine +BABEL_OP1_103_13708_20120102_032700_inLine +BABEL_OP1_103_13708_20120102_032700_outLine +BABEL_OP1_103_13752_20120530_221929_inLine +BABEL_OP1_103_13752_20120530_221929_outLine +BABEL_OP1_103_14086_20120113_200751_inLine +BABEL_OP1_103_14086_20120113_200751_outLine +BABEL_OP1_103_14147_20120531_160226_inLine +BABEL_OP1_103_14147_20120531_160226_outLine +BABEL_OP1_103_14147_20120531_170020_inLine +BABEL_OP1_103_14147_20120531_170020_outLine +BABEL_OP1_103_14422_20120514_181741_inLine +BABEL_OP1_103_14422_20120514_181741_outLine +BABEL_OP1_103_14583_20120515_192730_inLine +BABEL_OP1_103_14583_20120515_192730_outLine +BABEL_OP1_103_14942_20120101_203529_inLine +BABEL_OP1_103_14942_20120101_203529_outLine +BABEL_OP1_103_15304_20120106_035227_inLine +BABEL_OP1_103_15304_20120106_035227_outLine +BABEL_OP1_103_15600_20111231_181856_inLine +BABEL_OP1_103_15600_20111231_181856_outLine +BABEL_OP1_103_15665_20120517_162750_inLine +BABEL_OP1_103_15665_20120517_162750_outLine +BABEL_OP1_103_15749_20111230_015120_inLine +BABEL_OP1_103_15749_20111230_015120_outLine +BABEL_OP1_103_15803_20120528_164556_inLine +BABEL_OP1_103_15803_20120528_164556_outLine +BABEL_OP1_103_16210_20120118_201234_inLine +BABEL_OP1_103_16210_20120118_201234_outLine +BABEL_OP1_103_16393_20111230_012139_inLine +BABEL_OP1_103_16393_20111230_012139_outLine +BABEL_OP1_103_16633_20120105_164800_inLine +BABEL_OP1_103_16633_20120105_164800_outLine +BABEL_OP1_103_16754_20120101_015558_inLine +BABEL_OP1_103_16754_20120101_015558_outLine +BABEL_OP1_103_17063_20120202_201950_inLine +BABEL_OP1_103_17063_20120202_201950_outLine +BABEL_OP1_103_17063_20120202_204211_inLine +BABEL_OP1_103_17063_20120202_204211_outLine +BABEL_OP1_103_17139_20120110_182115_inLine +BABEL_OP1_103_17139_20120110_182115_outLine +BABEL_OP1_103_17180_20120126_233802_inLine +BABEL_OP1_103_17180_20120126_233802_outLine +BABEL_OP1_103_17612_20120531_232906_inLine +BABEL_OP1_103_17876_20120203_220933_inLine +BABEL_OP1_103_17876_20120203_220933_outLine +BABEL_OP1_103_18244_20120514_000930_inLine +BABEL_OP1_103_18244_20120514_000931_outLine +BABEL_OP1_103_18556_20111231_233139_inLine +BABEL_OP1_103_18556_20111231_233139_outLine +BABEL_OP1_103_18755_20120103_012800_inLine +BABEL_OP1_103_18755_20120103_012800_outLine +BABEL_OP1_103_18861_20120612_231154_inLine +BABEL_OP1_103_18861_20120612_231154_outLine +BABEL_OP1_103_18938_20120515_163044_inLine +BABEL_OP1_103_18938_20120515_163044_outLine +BABEL_OP1_103_19118_20120206_194310_inLine +BABEL_OP1_103_19118_20120206_194310_outLine +BABEL_OP1_103_19324_20120114_223457_inLine +BABEL_OP1_103_19324_20120114_223457_outLine +BABEL_OP1_103_19564_20120112_151539_inLine +BABEL_OP1_103_19564_20120112_151539_outLine +BABEL_OP1_103_19697_20120107_043218_inLine +BABEL_OP1_103_19697_20120107_043218_outLine +BABEL_OP1_103_19885_20120517_200533_inLine +BABEL_OP1_103_19885_20120517_200533_outLine +BABEL_OP1_103_20481_20120118_004556_inLine +BABEL_OP1_103_20481_20120118_004556_outLine +BABEL_OP1_103_21020_20120517_182615_inLine +BABEL_OP1_103_21020_20120517_182615_outLine +BABEL_OP1_103_21076_20111231_203216_inLine +BABEL_OP1_103_21076_20111231_203216_outLine +BABEL_OP1_103_21208_20120113_192303_inLine +BABEL_OP1_103_21208_20120113_192303_outLine +BABEL_OP1_103_21417_20120115_235720_inLine +BABEL_OP1_103_21417_20120115_235720_outLine +BABEL_OP1_103_21648_20111229_024025_inLine +BABEL_OP1_103_21648_20111229_024025_outLine +BABEL_OP1_103_21928_20120205_210433_inLine +BABEL_OP1_103_21928_20120205_221157_inLine +BABEL_OP1_103_22338_20120203_214144_inLine +BABEL_OP1_103_22338_20120203_214144_outLine +BABEL_OP1_103_22528_20120204_221751_inLine +BABEL_OP1_103_22528_20120204_221751_outLine +BABEL_OP1_103_23237_20120201_204534_inLine +BABEL_OP1_103_23237_20120201_204534_outLine +BABEL_OP1_103_24235_20120205_171351_inLine +BABEL_OP1_103_24235_20120205_171351_outLine +BABEL_OP1_103_24340_20120526_221640_inLine +BABEL_OP1_103_24340_20120526_221640_outLine +BABEL_OP1_103_25040_20120129_215646_inLine +BABEL_OP1_103_25040_20120129_215647_outLine +BABEL_OP1_103_25489_20120107_015122_inLine +BABEL_OP1_103_25489_20120107_015122_outLine +BABEL_OP1_103_26464_20120115_153724_inLine +BABEL_OP1_103_26464_20120115_153725_outLine +BABEL_OP1_103_26603_20120519_190743_inLine +BABEL_OP1_103_26603_20120519_190743_outLine +BABEL_OP1_103_26980_20120114_151400_inLine +BABEL_OP1_103_26980_20120114_151400_outLine +BABEL_OP1_103_27159_20120109_175434_inLine +BABEL_OP1_103_27159_20120109_175434_outLine +BABEL_OP1_103_27298_20120519_164745_inLine +BABEL_OP1_103_27298_20120519_164745_outLine +BABEL_OP1_103_27374_20120608_213343_inLine +BABEL_OP1_103_27374_20120608_213344_outLine +BABEL_OP1_103_27889_20120405_184406_inLine +BABEL_OP1_103_27889_20120405_184406_outLine +BABEL_OP1_103_27895_20120407_201822_inLine +BABEL_OP1_103_27895_20120407_201822_outLine +BABEL_OP1_103_27997_20120406_024629_inLine +BABEL_OP1_103_27997_20120406_024629_outLine +BABEL_OP1_103_28150_20120421_030716_inLine +BABEL_OP1_103_28150_20120421_030716_outLine +BABEL_OP1_103_28281_20120204_215552_inLine +BABEL_OP1_103_28281_20120204_215552_outLine +BABEL_OP1_103_28325_20120421_034840_inLine +BABEL_OP1_103_28325_20120421_034840_outLine +BABEL_OP1_103_28349_20120422_230936_inLine +BABEL_OP1_103_28349_20120422_230936_outLine +BABEL_OP1_103_28452_20120423_002721_inLine +BABEL_OP1_103_28452_20120423_002721_outLine +BABEL_OP1_103_28820_20111231_235604_inLine +BABEL_OP1_103_28820_20111231_235604_outLine +BABEL_OP1_103_29083_20120524_203900_inLine +BABEL_OP1_103_29083_20120524_203900_outLine +BABEL_OP1_103_29368_20120109_152242_inLine +BABEL_OP1_103_29368_20120109_152242_outLine +BABEL_OP1_103_29757_20120607_155549_inLine +BABEL_OP1_103_29757_20120607_155549_outLine +BABEL_OP1_103_30012_20120523_210111_inLine +BABEL_OP1_103_30012_20120523_210111_outLine +BABEL_OP1_103_30031_20111231_051935_inLine +BABEL_OP1_103_30031_20111231_051935_outLine +BABEL_OP1_103_30040_20120114_164613_inLine +BABEL_OP1_103_30040_20120114_164613_outLine +BABEL_OP1_103_30243_20120115_005252_inLine +BABEL_OP1_103_30243_20120115_005252_outLine +BABEL_OP1_103_30711_20120612_211646_inLine +BABEL_OP1_103_30711_20120612_211646_outLine +BABEL_OP1_103_30847_20120108_235955_inLine +BABEL_OP1_103_30847_20120108_235955_outLine +BABEL_OP1_103_30904_20120522_013413_inLine +BABEL_OP1_103_30904_20120522_013413_outLine +BABEL_OP1_103_31201_20120523_211540_inLine +BABEL_OP1_103_31201_20120523_211540_outLine +BABEL_OP1_103_31871_20120115_205857_inLine +BABEL_OP1_103_31871_20120115_205857_outLine +BABEL_OP1_103_32453_20120116_174338_inLine +BABEL_OP1_103_32453_20120116_174338_outLine +BABEL_OP1_103_32722_20120115_005258_inLine +BABEL_OP1_103_32722_20120115_005258_outLine +BABEL_OP1_103_33223_20120108_225050_inLine +BABEL_OP1_103_33223_20120108_225050_outLine +BABEL_OP1_103_33316_20120528_173250_inLine +BABEL_OP1_103_33316_20120528_173250_outLine +BABEL_OP1_103_33534_20120122_020502_inLine +BABEL_OP1_103_33534_20120122_020502_outLine +BABEL_OP1_103_33551_20120122_194434_inLine +BABEL_OP1_103_33551_20120122_194434_outLine +BABEL_OP1_103_33699_20120122_173500_inLine +BABEL_OP1_103_33699_20120122_173500_outLine +BABEL_OP1_103_33807_20120122_190057_inLine +BABEL_OP1_103_33807_20120122_190057_outLine +BABEL_OP1_103_33885_20120125_172938_inLine +BABEL_OP1_103_33885_20120125_172938_outLine +BABEL_OP1_103_33991_20120117_202117_inLine +BABEL_OP1_103_33991_20120117_202118_outLine +BABEL_OP1_103_34137_20120529_224220_inLine +BABEL_OP1_103_34137_20120529_224220_outLine +BABEL_OP1_103_34332_20120204_191733_inLine +BABEL_OP1_103_34332_20120204_191733_outLine +BABEL_OP1_103_34545_20120118_173942_inLine +BABEL_OP1_103_34545_20120118_173942_outLine +BABEL_OP1_103_34564_20120530_211027_inLine +BABEL_OP1_103_34564_20120530_211027_outLine +BABEL_OP1_103_34925_20120112_154829_inLine +BABEL_OP1_103_34925_20120112_154829_outLine +BABEL_OP1_103_34994_20120115_213251_inLine +BABEL_OP1_103_34994_20120115_213251_outLine +BABEL_OP1_103_35144_20120123_230913_inLine +BABEL_OP1_103_35144_20120123_230913_outLine +BABEL_OP1_103_35152_20111230_220705_inLine +BABEL_OP1_103_35152_20111230_220705_outLine +BABEL_OP1_103_35157_20120124_010640_inLine +BABEL_OP1_103_35157_20120124_010640_outLine +BABEL_OP1_103_35444_20120612_203930_inLine +BABEL_OP1_103_35444_20120612_203930_outLine +BABEL_OP1_103_35660_20120122_013401_inLine +BABEL_OP1_103_35660_20120122_013402_outLine +BABEL_OP1_103_35750_20111230_025221_inLine +BABEL_OP1_103_35750_20111230_025221_outLine +BABEL_OP1_103_35892_20120120_205811_inLine +BABEL_OP1_103_35892_20120120_205811_outLine +BABEL_OP1_103_36748_20120121_230812_inLine +BABEL_OP1_103_36748_20120121_230812_outLine +BABEL_OP1_103_36962_20120810_005828_inLine +BABEL_OP1_103_36962_20120810_005828_outLine +BABEL_OP1_103_37131_20120522_165130_inLine +BABEL_OP1_103_37131_20120522_165130_outLine +BABEL_OP1_103_37551_20111229_232422_inLine +BABEL_OP1_103_37551_20111229_232422_outLine +BABEL_OP1_103_37604_20120122_203335_inLine +BABEL_OP1_103_37604_20120122_203335_outLine +BABEL_OP1_103_37687_20120124_220825_inLine +BABEL_OP1_103_37687_20120124_220826_outLine +BABEL_OP1_103_38573_20120120_234500_inLine +BABEL_OP1_103_38573_20120120_234500_outLine +BABEL_OP1_103_38573_20120121_000745_inLine +BABEL_OP1_103_38573_20120121_000745_outLine +BABEL_OP1_103_38588_20120522_215415_inLine +BABEL_OP1_103_38588_20120522_215415_outLine +BABEL_OP1_103_39320_20120207_022344_inLine +BABEL_OP1_103_39320_20120207_022344_outLine +BABEL_OP1_103_39769_20120127_213455_inLine +BABEL_OP1_103_39769_20120127_213455_outLine +BABEL_OP1_103_40410_20120124_204758_inLine +BABEL_OP1_103_40410_20120124_204758_outLine +BABEL_OP1_103_40442_20120202_174431_inLine +BABEL_OP1_103_40442_20120202_174431_outLine +BABEL_OP1_103_40889_20120206_221100_inLine +BABEL_OP1_103_40889_20120206_221100_outLine +BABEL_OP1_103_41172_20120114_134829_inLine +BABEL_OP1_103_41172_20120114_134829_outLine +BABEL_OP1_103_42332_20120126_191134_inLine +BABEL_OP1_103_42332_20120126_191134_outLine +BABEL_OP1_103_42332_20120126_192035_inLine +BABEL_OP1_103_42332_20120126_192035_outLine +BABEL_OP1_103_42651_20120122_000902_inLine +BABEL_OP1_103_42651_20120122_000902_outLine +BABEL_OP1_103_42698_20120123_230900_inLine +BABEL_OP1_103_42698_20120123_230900_outLine +BABEL_OP1_103_42790_20120129_205024_inLine +BABEL_OP1_103_42790_20120129_205025_outLine +BABEL_OP1_103_42986_20120125_204035_inLine +BABEL_OP1_103_42986_20120125_204035_outLine +BABEL_OP1_103_43442_20120120_033602_inLine +BABEL_OP1_103_43442_20120120_033602_outLine +BABEL_OP1_103_43571_20111226_210759_inLine +BABEL_OP1_103_43571_20111226_210759_outLine +BABEL_OP1_103_43812_20120124_005515_inLine +BABEL_OP1_103_43812_20120124_005515_outLine +BABEL_OP1_103_43959_20120125_223215_inLine +BABEL_OP1_103_43959_20120125_223215_outLine +BABEL_OP1_103_44838_20111229_014707_inLine +BABEL_OP1_103_44838_20111229_014707_outLine +BABEL_OP1_103_44948_20120203_011011_inLine +BABEL_OP1_103_44948_20120203_011011_outLine +BABEL_OP1_103_44967_20120207_025756_inLine +BABEL_OP1_103_44967_20120207_025756_outLine +BABEL_OP1_103_45020_20120522_170055_inLine +BABEL_OP1_103_45020_20120522_170055_outLine +BABEL_OP1_103_45029_20120608_010540_inLine +BABEL_OP1_103_45029_20120608_010540_outLine +BABEL_OP1_103_45565_20120125_220956_inLine +BABEL_OP1_103_45565_20120125_220956_outLine +BABEL_OP1_103_46197_20120524_220246_inLine +BABEL_OP1_103_46197_20120524_220246_outLine +BABEL_OP1_103_46460_20120530_183725_inLine +BABEL_OP1_103_46460_20120530_183725_outLine +BABEL_OP1_103_46460_20120530_185105_inLine +BABEL_OP1_103_46460_20120530_185106_outLine +BABEL_OP1_103_46862_20120124_195804_inLine +BABEL_OP1_103_46862_20120124_195804_outLine +BABEL_OP1_103_46862_20120204_203651_inLine +BABEL_OP1_103_46862_20120204_203651_outLine +BABEL_OP1_103_47151_20111229_233253_inLine +BABEL_OP1_103_47151_20111229_233253_outLine +BABEL_OP1_103_47177_20120127_201638_inLine +BABEL_OP1_103_47177_20120127_201638_outLine +BABEL_OP1_103_47416_20120729_181025_inLine +BABEL_OP1_103_47416_20120729_181025_outLine +BABEL_OP1_103_47424_20111231_203241_inLine +BABEL_OP1_103_47424_20111231_203241_outLine +BABEL_OP1_103_47574_20120207_034724_inLine +BABEL_OP1_103_47574_20120207_034724_outLine +BABEL_OP1_103_48176_20120206_023101_inLine +BABEL_OP1_103_48176_20120206_023101_outLine +BABEL_OP1_103_48259_20120116_022438_inLine +BABEL_OP1_103_48259_20120116_022438_outLine +BABEL_OP1_103_49175_20120206_214803_inLine +BABEL_OP1_103_49175_20120206_214803_outLine +BABEL_OP1_103_49520_20120523_172707_inLine +BABEL_OP1_103_49520_20120523_172707_outLine +BABEL_OP1_103_49629_20120104_040004_inLine +BABEL_OP1_103_49629_20120104_040004_outLine +BABEL_OP1_103_49755_20120110_010410_inLine +BABEL_OP1_103_49755_20120110_010410_outLine +BABEL_OP1_103_49819_20120127_012212_inLine +BABEL_OP1_103_49819_20120127_012212_outLine +BABEL_OP1_103_50492_20120123_211938_inLine +BABEL_OP1_103_50492_20120123_211938_outLine +BABEL_OP1_103_50523_20120607_185125_inLine +BABEL_OP1_103_50523_20120607_185126_outLine +BABEL_OP1_103_50798_20120131_022954_inLine +BABEL_OP1_103_50798_20120131_022954_outLine +BABEL_OP1_103_51243_20120201_200604_inLine +BABEL_OP1_103_51243_20120201_200604_outLine +BABEL_OP1_103_52122_20120207_025756_inLine +BABEL_OP1_103_52122_20120207_025756_outLine +BABEL_OP1_103_52753_20120521_000301_inLine +BABEL_OP1_103_52753_20120521_001422_inLine +BABEL_OP1_103_53067_20120127_225851_inLine +BABEL_OP1_103_53067_20120127_225851_outLine +BABEL_OP1_103_53262_20120204_194912_inLine +BABEL_OP1_103_53262_20120204_194912_outLine +BABEL_OP1_103_53346_20120128_214441_inLine +BABEL_OP1_103_53346_20120128_214441_outLine +BABEL_OP1_103_53636_20120127_000358_inLine +BABEL_OP1_103_53636_20120127_000358_outLine +BABEL_OP1_103_54030_20111230_220440_inLine +BABEL_OP1_103_54030_20111230_220440_outLine +BABEL_OP1_103_54263_20120206_225348_inLine +BABEL_OP1_103_54263_20120206_225349_outLine +BABEL_OP1_103_54417_20120522_172155_inLine +BABEL_OP1_103_54417_20120522_172155_outLine +BABEL_OP1_103_54975_20120207_015749_inLine +BABEL_OP1_103_54975_20120207_015749_outLine +BABEL_OP1_103_54991_20120206_003607_inLine +BABEL_OP1_103_54991_20120206_003607_outLine +BABEL_OP1_103_55166_20120119_180058_inLine +BABEL_OP1_103_55166_20120119_180058_outLine +BABEL_OP1_103_55194_20120529_215243_inLine +BABEL_OP1_103_55194_20120529_215243_outLine +BABEL_OP1_103_56704_20120606_171759_inLine +BABEL_OP1_103_56704_20120606_171759_outLine +BABEL_OP1_103_57092_20111227_044400_inLine +BABEL_OP1_103_57092_20111227_044400_outLine +BABEL_OP1_103_57351_20120612_182248_inLine +BABEL_OP1_103_57351_20120612_182248_outLine +BABEL_OP1_103_58283_20111231_230840_inLine +BABEL_OP1_103_58283_20111231_230840_outLine +BABEL_OP1_103_58925_20120113_212456_inLine +BABEL_OP1_103_58925_20120113_212456_outLine +BABEL_OP1_103_58925_20120113_214350_inLine +BABEL_OP1_103_58925_20120113_214350_outLine +BABEL_OP1_103_59482_20120612_190437_inLine +BABEL_OP1_103_59482_20120612_190437_outLine +BABEL_OP1_103_60524_20120109_213755_inLine +BABEL_OP1_103_60524_20120109_213755_outLine +BABEL_OP1_103_60806_20120117_233630_inLine +BABEL_OP1_103_60806_20120117_233630_outLine +BABEL_OP1_103_61229_20120616_151341_inLine +BABEL_OP1_103_61229_20120616_151341_outLine +BABEL_OP1_103_61558_20120106_205412_inLine +BABEL_OP1_103_61558_20120106_205412_outLine +BABEL_OP1_103_61592_20120125_225752_inLine +BABEL_OP1_103_61592_20120125_225752_outLine +BABEL_OP1_103_61629_20120127_192849_inLine +BABEL_OP1_103_61629_20120127_192849_outLine +BABEL_OP1_103_61733_20120201_183457_inLine +BABEL_OP1_103_61733_20120201_183457_outLine +BABEL_OP1_103_62097_20120307_164325_inLine +BABEL_OP1_103_62097_20120307_164325_outLine +BABEL_OP1_103_62182_20111231_003944_inLine +BABEL_OP1_103_62182_20111231_003944_outLine +BABEL_OP1_103_62222_20120122_201756_inLine +BABEL_OP1_103_62222_20120122_201756_outLine +BABEL_OP1_103_62479_20120306_025702_inLine +BABEL_OP1_103_62479_20120306_025702_outLine +BABEL_OP1_103_62558_20120124_220850_inLine +BABEL_OP1_103_62558_20120124_220850_outLine +BABEL_OP1_103_62652_20120306_015948_inLine +BABEL_OP1_103_62652_20120306_015948_outLine +BABEL_OP1_103_62720_20120308_164432_inLine +BABEL_OP1_103_62720_20120308_164432_outLine +BABEL_OP1_103_62720_20120308_165706_inLine +BABEL_OP1_103_62720_20120308_165706_outLine +BABEL_OP1_103_62843_20120310_235523_inLine +BABEL_OP1_103_62843_20120310_235523_outLine +BABEL_OP1_103_63127_20120311_184714_inLine +BABEL_OP1_103_63127_20120311_184714_outLine +BABEL_OP1_103_63129_20120311_193438_inLine +BABEL_OP1_103_63129_20120311_193438_outLine +BABEL_OP1_103_63194_20120312_010359_inLine +BABEL_OP1_103_63194_20120312_010359_outLine +BABEL_OP1_103_63215_20120513_191621_inLine +BABEL_OP1_103_63215_20120513_191621_outLine +BABEL_OP1_103_63240_20120312_021342_inLine +BABEL_OP1_103_63240_20120312_021342_outLine +BABEL_OP1_103_63373_20120315_025205_inLine +BABEL_OP1_103_63373_20120315_025205_outLine +BABEL_OP1_103_63384_20120315_031012_inLine +BABEL_OP1_103_63384_20120315_031012_outLine +BABEL_OP1_103_63510_20120318_221426_inLine +BABEL_OP1_103_63510_20120318_221426_outLine +BABEL_OP1_103_63680_20120319_214759_inLine +BABEL_OP1_103_63680_20120319_214759_outLine +BABEL_OP1_103_63687_20120320_181655_inLine +BABEL_OP1_103_63687_20120320_181655_outLine +BABEL_OP1_103_63923_20120320_172933_inLine +BABEL_OP1_103_63923_20120320_172933_outLine +BABEL_OP1_103_63929_20120123_192325_inLine +BABEL_OP1_103_63929_20120123_192325_outLine +BABEL_OP1_103_64039_20120320_215418_inLine +BABEL_OP1_103_64039_20120320_215418_outLine +BABEL_OP1_103_64145_20120404_204905_inLine +BABEL_OP1_103_64145_20120404_204905_outLine +BABEL_OP1_103_64177_20120404_212051_inLine +BABEL_OP1_103_64177_20120404_212051_outLine +BABEL_OP1_103_64231_20120310_224637_inLine +BABEL_OP1_103_64231_20120310_224637_outLine +BABEL_OP1_103_64610_20120125_223001_inLine +BABEL_OP1_103_64610_20120125_223001_outLine +BABEL_OP1_103_65512_20111229_045507_inLine +BABEL_OP1_103_65512_20111229_045507_outLine +BABEL_OP1_103_65818_20120127_011907_inLine +BABEL_OP1_103_65818_20120127_011907_outLine +BABEL_OP1_103_65954_20120205_190321_inLine +BABEL_OP1_103_65954_20120205_190321_outLine +BABEL_OP1_103_65991_20120229_215906_inLine +BABEL_OP1_103_65991_20120229_215906_outLine +BABEL_OP1_103_66005_20120229_221845_inLine +BABEL_OP1_103_66005_20120229_221845_outLine +BABEL_OP1_103_66048_20120229_225251_inLine +BABEL_OP1_103_66048_20120229_225251_outLine +BABEL_OP1_103_66287_20120108_191621_inLine +BABEL_OP1_103_66287_20120108_191621_outLine +BABEL_OP1_103_66309_20120229_232503_inLine +BABEL_OP1_103_66309_20120229_232503_outLine +BABEL_OP1_103_66719_20120116_002436_inLine +BABEL_OP1_103_66719_20120116_002436_outLine +BABEL_OP1_103_66813_20120127_151237_inLine +BABEL_OP1_103_66813_20120127_151237_outLine +BABEL_OP1_103_67001_20120305_223711_inLine +BABEL_OP1_103_67001_20120305_223711_outLine +BABEL_OP1_103_67288_20120305_233501_inLine +BABEL_OP1_103_67288_20120305_233501_outLine +BABEL_OP1_103_67358_20120128_224934_inLine +BABEL_OP1_103_67358_20120128_224934_outLine +BABEL_OP1_103_67484_20120306_212801_inLine +BABEL_OP1_103_67484_20120306_212801_outLine +BABEL_OP1_103_67685_20120118_163939_inLine +BABEL_OP1_103_67685_20120118_163939_outLine +BABEL_OP1_103_67814_20120522_200114_inLine +BABEL_OP1_103_67814_20120522_200114_outLine +BABEL_OP1_103_67824_20120116_000148_inLine +BABEL_OP1_103_67824_20120116_000148_outLine +BABEL_OP1_103_68602_20120729_174819_inLine +BABEL_OP1_103_68602_20120729_174819_outLine +BABEL_OP1_103_68811_20120531_155031_inLine +BABEL_OP1_103_68811_20120531_155031_outLine +BABEL_OP1_103_69969_20120309_020612_inLine +BABEL_OP1_103_69969_20120309_020612_outLine +BABEL_OP1_103_69990_20120305_153850_inLine +BABEL_OP1_103_69990_20120305_153850_outLine +BABEL_OP1_103_70200_20120311_000406_inLine +BABEL_OP1_103_70200_20120311_000406_outLine +BABEL_OP1_103_70476_20120117_202957_inLine +BABEL_OP1_103_70476_20120117_202957_outLine +BABEL_OP1_103_70476_20120117_204242_inLine +BABEL_OP1_103_70476_20120117_204242_outLine +BABEL_OP1_103_70651_20120131_034337_inLine +BABEL_OP1_103_70651_20120131_034337_outLine +BABEL_OP1_103_70762_20111230_015835_inLine +BABEL_OP1_103_70762_20111230_015835_outLine +BABEL_OP1_103_70858_20120201_191031_inLine +BABEL_OP1_103_70858_20120201_191031_outLine +BABEL_OP1_103_70897_20120118_020506_inLine +BABEL_OP1_103_70897_20120118_020506_outLine +BABEL_OP1_103_70919_20120202_170934_inLine +BABEL_OP1_103_70919_20120202_170934_outLine +BABEL_OP1_103_71215_20120207_001204_inLine +BABEL_OP1_103_71215_20120207_001204_outLine +BABEL_OP1_103_71293_20120101_212224_inLine +BABEL_OP1_103_71293_20120101_212224_outLine +BABEL_OP1_103_71450_20120514_181620_inLine +BABEL_OP1_103_71450_20120514_181621_outLine +BABEL_OP1_103_71666_20120514_223534_inLine +BABEL_OP1_103_71666_20120514_223534_outLine +BABEL_OP1_103_71691_20120109_034006_inLine +BABEL_OP1_103_71691_20120109_034007_outLine +BABEL_OP1_103_72179_20120511_023300_inLine +BABEL_OP1_103_72179_20120511_023300_outLine +BABEL_OP1_103_72709_20120204_231928_inLine +BABEL_OP1_103_72709_20120204_231928_outLine +BABEL_OP1_103_72714_20120126_001354_inLine +BABEL_OP1_103_72714_20120126_001354_outLine +BABEL_OP1_103_73264_20111228_184038_inLine +BABEL_OP1_103_73264_20111228_184038_outLine +BABEL_OP1_103_73881_20120120_041629_inLine +BABEL_OP1_103_73881_20120120_041629_outLine +BABEL_OP1_103_74188_20120522_172823_inLine +BABEL_OP1_103_74188_20120522_172823_outLine +BABEL_OP1_103_74334_20120102_033902_inLine +BABEL_OP1_103_74334_20120102_033902_outLine +BABEL_OP1_103_75797_20120125_192735_inLine +BABEL_OP1_103_75797_20120125_192735_outLine +BABEL_OP1_103_76276_20120114_191208_inLine +BABEL_OP1_103_76276_20120114_191208_outLine +BABEL_OP1_103_77097_20120109_024625_inLine +BABEL_OP1_103_77097_20120109_024625_outLine +BABEL_OP1_103_78722_20120126_234318_inLine +BABEL_OP1_103_78722_20120126_234318_outLine +BABEL_OP1_103_79127_20120205_215208_inLine +BABEL_OP1_103_79127_20120205_215208_outLine +BABEL_OP1_103_79788_20120201_222512_inLine +BABEL_OP1_103_79788_20120201_222512_outLine +BABEL_OP1_103_79803_20120730_020433_inLine +BABEL_OP1_103_79803_20120730_020433_outLine +BABEL_OP1_103_79857_20120111_205043_inLine +BABEL_OP1_103_79857_20120111_205043_outLine +BABEL_OP1_103_79901_20120202_193650_inLine +BABEL_OP1_103_79901_20120202_194746_inLine +BABEL_OP1_103_80118_20120126_010553_inLine +BABEL_OP1_103_80118_20120126_010553_outLine +BABEL_OP1_103_80183_20120513_182754_inLine +BABEL_OP1_103_80183_20120513_182754_outLine +BABEL_OP1_103_80313_20120106_200706_inLine +BABEL_OP1_103_80313_20120106_200706_outLine +BABEL_OP1_103_80943_20120125_185437_inLine +BABEL_OP1_103_80943_20120125_185437_outLine +BABEL_OP1_103_81800_20120531_180959_inLine +BABEL_OP1_103_81800_20120531_180959_outLine +BABEL_OP1_103_81800_20120531_182855_inLine +BABEL_OP1_103_81800_20120531_182855_outLine +BABEL_OP1_103_82094_20120522_225233_inLine +BABEL_OP1_103_82094_20120522_225233_outLine +BABEL_OP1_103_82135_20120117_213149_inLine +BABEL_OP1_103_82135_20120117_213149_outLine +BABEL_OP1_103_83819_20120125_193543_inLine +BABEL_OP1_103_83819_20120125_193543_outLine +BABEL_OP1_103_83835_20111231_193822_inLine +BABEL_OP1_103_83835_20111231_193822_outLine +BABEL_OP1_103_84654_20120515_201204_inLine +BABEL_OP1_103_84654_20120515_201204_outLine +BABEL_OP1_103_84854_20120205_001920_inLine +BABEL_OP1_103_84854_20120205_001920_outLine +BABEL_OP1_103_85457_20120521_204532_inLine +BABEL_OP1_103_85457_20120521_204532_outLine +BABEL_OP1_103_85577_20120729_215558_inLine +BABEL_OP1_103_85577_20120729_215558_outLine +BABEL_OP1_103_85730_20120116_233350_inLine +BABEL_OP1_103_85730_20120116_233350_outLine +BABEL_OP1_103_85764_20120129_192217_inLine +BABEL_OP1_103_85764_20120129_192217_outLine +BABEL_OP1_103_86537_20120511_195620_inLine +BABEL_OP1_103_86537_20120511_195620_outLine +BABEL_OP1_103_86614_20120521_220136_inLine +BABEL_OP1_103_86614_20120521_220136_outLine +BABEL_OP1_103_86680_20120105_191615_inLine +BABEL_OP1_103_86680_20120105_191615_outLine +BABEL_OP1_103_87453_20120515_170718_inLine +BABEL_OP1_103_87453_20120515_170718_outLine +BABEL_OP1_103_87677_20120121_224149_inLine +BABEL_OP1_103_87677_20120121_224149_outLine +BABEL_OP1_103_88677_20120112_032502_inLine +BABEL_OP1_103_88677_20120112_032502_outLine +BABEL_OP1_103_89464_20120205_204528_inLine +BABEL_OP1_103_89464_20120205_204528_outLine +BABEL_OP1_103_89702_20120109_021228_inLine +BABEL_OP1_103_89702_20120109_021228_outLine +BABEL_OP1_103_90641_20120102_212610_inLine +BABEL_OP1_103_90641_20120102_212610_outLine +BABEL_OP1_103_90882_20120530_230837_inLine +BABEL_OP1_103_90882_20120530_230837_outLine +BABEL_OP1_103_91161_20111229_202627_inLine +BABEL_OP1_103_91161_20111229_202627_outLine +BABEL_OP1_103_91372_20120115_023342_inLine +BABEL_OP1_103_91372_20120115_023342_outLine +BABEL_OP1_103_92722_20120512_132612_inLine +BABEL_OP1_103_92722_20120512_132612_outLine +BABEL_OP1_103_92793_20111229_200332_inLine +BABEL_OP1_103_92793_20111229_200332_outLine +BABEL_OP1_103_92910_20120205_195736_inLine +BABEL_OP1_103_92910_20120205_195736_outLine +BABEL_OP1_103_93026_20111228_235326_inLine +BABEL_OP1_103_93026_20111228_235326_outLine +BABEL_OP1_103_93358_20120107_025421_inLine +BABEL_OP1_103_93358_20120107_025421_outLine +BABEL_OP1_103_93907_20111228_051458_inLine +BABEL_OP1_103_93907_20111228_051458_outLine +BABEL_OP1_103_94793_20120102_034406_inLine +BABEL_OP1_103_94793_20120102_034406_outLine +BABEL_OP1_103_95349_20111229_201011_inLine +BABEL_OP1_103_95349_20111229_201011_outLine +BABEL_OP1_103_95349_20111229_225436_inLine +BABEL_OP1_103_95349_20111229_225436_outLine +BABEL_OP1_103_96537_20120729_165831_inLine +BABEL_OP1_103_96537_20120729_165831_outLine +BABEL_OP1_103_96690_20120131_213344_inLine +BABEL_OP1_103_96690_20120131_213344_outLine +BABEL_OP1_103_97679_20111229_191138_inLine +BABEL_OP1_103_97679_20111229_191138_outLine +BABEL_OP1_103_97971_20120111_020458_inLine +BABEL_OP1_103_97971_20120111_020459_outLine +BABEL_OP1_103_98331_20120131_213958_inLine +BABEL_OP1_103_98331_20120131_213958_outLine +BABEL_OP1_103_98446_20120101_215857_inLine +BABEL_OP1_103_98446_20120101_215857_outLine +BABEL_OP1_103_99093_20120514_161939_inLine +BABEL_OP1_103_99093_20120514_161939_outLine +BABEL_OP1_103_99510_20120515_175659_inLine +BABEL_OP1_103_99510_20120515_175659_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list b/egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list new file mode 100644 index 00000000000..5a1273fe091 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list @@ -0,0 +1,255 @@ +BABEL_OP1_103_10911_20120521_172505_inLine +BABEL_OP1_103_10911_20120521_172505_outLine +BABEL_OP1_103_10974_20121003_140938_inLine +BABEL_OP1_103_10974_20121003_140938_outLine +BABEL_OP1_103_11386_20121003_121747_inLine +BABEL_OP1_103_11386_20121003_121747_outLine +BABEL_OP1_103_12092_20121214_014753_inLine +BABEL_OP1_103_12092_20121214_014753_outLine +BABEL_OP1_103_13064_20120812_170202_inLine +BABEL_OP1_103_13064_20120812_170202_outLine +BABEL_OP1_103_13834_20121003_194231_inLine +BABEL_OP1_103_13834_20121003_194231_outLine +BABEL_OP1_103_14631_20120927_192723_inLine +BABEL_OP1_103_14631_20120927_192723_outLine +BABEL_OP1_103_15440_20120516_145148_inLine +BABEL_OP1_103_15440_20120516_145148_outLine +BABEL_OP1_103_17813_20120120_005856_inLine +BABEL_OP1_103_17813_20120120_005856_outLine +BABEL_OP1_103_17813_20120124_014523_inLine +BABEL_OP1_103_17813_20120124_014523_outLine +BABEL_OP1_103_18084_20120530_155334_inLine +BABEL_OP1_103_18331_20121007_213032_inLine +BABEL_OP1_103_18331_20121007_213032_outLine +BABEL_OP1_103_21083_20120514_145620_inLine +BABEL_OP1_103_21083_20120514_145620_outLine +BABEL_OP1_103_21352_20120930_143535_inLine +BABEL_OP1_103_21352_20120930_143535_outLine +BABEL_OP1_103_23378_20120531_010537_inLine +BABEL_OP1_103_23378_20120531_010537_outLine +BABEL_OP1_103_24303_20120809_003638_inLine +BABEL_OP1_103_24303_20120809_003638_outLine +BABEL_OP1_103_26536_20120523_220246_inLine +BABEL_OP1_103_26536_20120523_220246_outLine +BABEL_OP1_103_27187_20120929_030115_inLine +BABEL_OP1_103_27187_20120929_030115_outLine +BABEL_OP1_103_27356_20120618_212359_inLine +BABEL_OP1_103_27356_20120618_212359_outLine +BABEL_OP1_103_27378_20121005_211922_inLine +BABEL_OP1_103_27378_20121005_211922_outLine +BABEL_OP1_103_27679_20120528_215730_inLine +BABEL_OP1_103_27679_20120528_215730_outLine +BABEL_OP1_103_27891_20121006_234744_inLine +BABEL_OP1_103_27891_20121006_234744_outLine +BABEL_OP1_103_29103_20120516_171814_inLine +BABEL_OP1_103_29103_20120516_171814_outLine +BABEL_OP1_103_29690_20120930_135813_inLine +BABEL_OP1_103_29690_20120930_135813_outLine +BABEL_OP1_103_29911_20120607_231532_inLine +BABEL_OP1_103_30638_20120928_141651_inLine +BABEL_OP1_103_30638_20120928_141651_outLine +BABEL_OP1_103_30817_20120806_163759_inLine +BABEL_OP1_103_30817_20120806_163759_outLine +BABEL_OP1_103_31485_20120609_184729_inLine +BABEL_OP1_103_31485_20120609_184729_outLine +BABEL_OP1_103_33279_20120928_011938_inLine +BABEL_OP1_103_33279_20120928_011939_outLine +BABEL_OP1_103_37731_20120526_213340_inLine +BABEL_OP1_103_37731_20120526_213340_outLine +BABEL_OP1_103_39215_20121002_013230_inLine +BABEL_OP1_103_39215_20121002_013230_outLine +BABEL_OP1_103_39783_20121002_221911_inLine +BABEL_OP1_103_39783_20121002_221911_outLine +BABEL_OP1_103_42098_20121007_204200_inLine +BABEL_OP1_103_42098_20121007_204200_outLine +BABEL_OP1_103_44267_20121005_232936_inLine +BABEL_OP1_103_44267_20121005_232936_outLine +BABEL_OP1_103_44419_20121225_001833_inLine +BABEL_OP1_103_44419_20121225_001833_outLine +BABEL_OP1_103_44747_20121002_164108_inLine +BABEL_OP1_103_44747_20121002_164108_outLine +BABEL_OP1_103_46947_20120522_173213_inLine +BABEL_OP1_103_46947_20120522_173213_outLine +BABEL_OP1_103_47049_20120522_182020_inLine +BABEL_OP1_103_47049_20120522_182020_outLine +BABEL_OP1_103_47251_20120522_194654_inLine +BABEL_OP1_103_47251_20120522_194654_outLine +BABEL_OP1_103_48313_20120522_202903_inLine +BABEL_OP1_103_48313_20120522_202903_outLine +BABEL_OP1_103_48416_20120606_165040_inLine +BABEL_OP1_103_48416_20120606_165040_outLine +BABEL_OP1_103_48795_20120612_193506_inLine +BABEL_OP1_103_48795_20120612_193506_outLine +BABEL_OP1_103_49201_20120930_161546_inLine +BABEL_OP1_103_49201_20120930_161546_outLine +BABEL_OP1_103_49208_20120522_233157_inLine +BABEL_OP1_103_49208_20120522_233157_outLine +BABEL_OP1_103_49443_20120928_004643_inLine +BABEL_OP1_103_49443_20120928_004643_outLine +BABEL_OP1_103_49545_20120530_163034_inLine +BABEL_OP1_103_49545_20120530_163034_outLine +BABEL_OP1_103_49548_20120523_162625_inLine +BABEL_OP1_103_49548_20120523_162625_outLine +BABEL_OP1_103_49885_20121002_000523_inLine +BABEL_OP1_103_49885_20121002_000523_outLine +BABEL_OP1_103_51973_20120102_032210_inLine +BABEL_OP1_103_51973_20120102_032210_outLine +BABEL_OP1_103_51973_20120102_033759_inLine +BABEL_OP1_103_51973_20120102_033759_outLine +BABEL_OP1_103_53659_20120802_001534_inLine +BABEL_OP1_103_53659_20120802_001534_outLine +BABEL_OP1_103_54393_20120928_235549_inLine +BABEL_OP1_103_54393_20120928_235549_outLine +BABEL_OP1_103_55382_20120629_230445_inLine +BABEL_OP1_103_55382_20120629_230445_outLine +BABEL_OP1_103_56283_20121007_180739_inLine +BABEL_OP1_103_56283_20121007_180739_outLine +BABEL_OP1_103_57584_20120725_224449_inLine +BABEL_OP1_103_57584_20120725_224449_outLine +BABEL_OP1_103_58298_20120528_222416_inLine +BABEL_OP1_103_58298_20120528_222416_outLine +BABEL_OP1_103_59488_20120929_145848_inLine +BABEL_OP1_103_59488_20120929_145848_outLine +BABEL_OP1_103_59799_20121229_171348_inLine +BABEL_OP1_103_59799_20121229_171348_outLine +BABEL_OP1_103_60055_20120819_171855_inLine +BABEL_OP1_103_60055_20120819_171855_outLine +BABEL_OP1_103_60572_20120530_175437_inLine +BABEL_OP1_103_60572_20120530_175437_outLine +BABEL_OP1_103_60730_20120514_223932_inLine +BABEL_OP1_103_60730_20120514_223932_outLine +BABEL_OP1_103_61635_20120928_234720_inLine +BABEL_OP1_103_61635_20120928_234720_outLine +BABEL_OP1_103_61655_20120809_233557_inLine +BABEL_OP1_103_61655_20120809_233557_outLine +BABEL_OP1_103_62109_20120512_223919_inLine +BABEL_OP1_103_62109_20120512_223919_outLine +BABEL_OP1_103_63043_20121007_231348_inLine +BABEL_OP1_103_63043_20121007_231348_outLine +BABEL_OP1_103_63043_20121007_232702_inLine +BABEL_OP1_103_63043_20121007_232702_outLine +BABEL_OP1_103_63390_20120513_174652_inLine +BABEL_OP1_103_63390_20120513_174652_outLine +BABEL_OP1_103_63603_20121011_004426_inLine +BABEL_OP1_103_63603_20121011_004426_outLine +BABEL_OP1_103_63842_20121005_162812_inLine +BABEL_OP1_103_63842_20121005_162812_outLine +BABEL_OP1_103_63996_20120516_162255_inLine +BABEL_OP1_103_63996_20120516_162255_outLine +BABEL_OP1_103_64695_20120731_171306_inLine +BABEL_OP1_103_64695_20120731_171306_outLine +BABEL_OP1_103_66842_20120516_153359_inLine +BABEL_OP1_103_66842_20120516_153400_outLine +BABEL_OP1_103_66879_20120524_201608_inLine +BABEL_OP1_103_66879_20120524_201608_outLine +BABEL_OP1_103_68102_20120601_163256_inLine +BABEL_OP1_103_68102_20120601_163256_outLine +BABEL_OP1_103_68189_20120524_212606_inLine +BABEL_OP1_103_68189_20120524_212606_outLine +BABEL_OP1_103_68538_20120608_172925_inLine +BABEL_OP1_103_68538_20120608_172925_outLine +BABEL_OP1_103_68538_20120608_174508_inLine +BABEL_OP1_103_68538_20120608_174508_outLine +BABEL_OP1_103_71224_20121005_221009_inLine +BABEL_OP1_103_71224_20121005_221009_outLine +BABEL_OP1_103_71996_20120522_225024_inLine +BABEL_OP1_103_71996_20120522_225024_outLine +BABEL_OP1_103_72088_20121003_002504_inLine +BABEL_OP1_103_72088_20121003_002504_outLine +BABEL_OP1_103_75345_20121001_203932_inLine +BABEL_OP1_103_75345_20121001_203932_outLine +BABEL_OP1_103_76149_20121004_032258_inLine +BABEL_OP1_103_76149_20121004_032258_outLine +BABEL_OP1_103_76372_20120514_235628_inLine +BABEL_OP1_103_76372_20120514_235628_outLine +BABEL_OP1_103_76832_20120528_201751_inLine +BABEL_OP1_103_76832_20120528_201751_outLine +BABEL_OP1_103_77294_20120616_144707_inLine +BABEL_OP1_103_77294_20120616_144707_outLine +BABEL_OP1_103_78792_20120522_191207_inLine +BABEL_OP1_103_78792_20120522_191207_outLine +BABEL_OP1_103_78938_20120512_201016_inLine +BABEL_OP1_103_78938_20120512_201016_outLine +BABEL_OP1_103_79006_20120521_012957_outLine +BABEL_OP1_103_79387_20120522_211025_inLine +BABEL_OP1_103_79387_20120522_211025_outLine +BABEL_OP1_103_79989_20120928_013138_inLine +BABEL_OP1_103_79989_20120928_013138_outLine +BABEL_OP1_103_80679_20120930_163521_inLine +BABEL_OP1_103_80679_20120930_163521_outLine +BABEL_OP1_103_81492_20120206_014433_inLine +BABEL_OP1_103_81492_20120206_014433_outLine +BABEL_OP1_103_81492_20120206_020249_inLine +BABEL_OP1_103_81492_20120206_020249_outLine +BABEL_OP1_103_82181_20120929_042042_inLine +BABEL_OP1_103_82181_20120929_042042_outLine +BABEL_OP1_103_84111_20120930_144529_inLine +BABEL_OP1_103_84111_20120930_144529_outLine +BABEL_OP1_103_84946_20120619_234231_inLine +BABEL_OP1_103_84946_20120619_234231_outLine +BABEL_OP1_103_85272_20120531_172145_inLine +BABEL_OP1_103_85272_20120531_172145_outLine +BABEL_OP1_103_85388_20120512_131608_inLine +BABEL_OP1_103_85388_20120512_131608_outLine +BABEL_OP1_103_85443_20120512_163256_inLine +BABEL_OP1_103_85443_20120512_163256_outLine +BABEL_OP1_103_85443_20120512_164633_inLine +BABEL_OP1_103_85443_20120512_164633_outLine +BABEL_OP1_103_86067_20121008_031300_inLine +BABEL_OP1_103_86067_20121008_031300_outLine +BABEL_OP1_103_86121_20121011_040043_inLine +BABEL_OP1_103_86121_20121011_040043_outLine +BABEL_OP1_103_87741_20121231_225715_inLine +BABEL_OP1_103_87741_20121231_225715_outLine +BABEL_OP1_103_89091_20130104_015514_inLine +BABEL_OP1_103_89091_20130104_015514_outLine +BABEL_OP1_103_89091_20130104_032531_inLine +BABEL_OP1_103_89091_20130104_032531_outLine +BABEL_OP1_103_89190_20130106_003028_inLine +BABEL_OP1_103_89190_20130106_003028_outLine +BABEL_OP1_103_90326_20120522_175819_inLine +BABEL_OP1_103_90326_20120522_175819_outLine +BABEL_OP1_103_90672_20121225_182001_inLine +BABEL_OP1_103_90672_20121225_182001_outLine +BABEL_OP1_103_91105_20120516_141445_inLine +BABEL_OP1_103_91105_20120516_141445_outLine +BABEL_OP1_103_91670_20121225_195825_inLine +BABEL_OP1_103_91670_20121225_195825_outLine +BABEL_OP1_103_91723_20121226_011745_inLine +BABEL_OP1_103_91723_20121226_011745_outLine +BABEL_OP1_103_91733_20121227_001726_inLine +BABEL_OP1_103_91733_20121227_001726_outLine +BABEL_OP1_103_91744_20121227_005513_inLine +BABEL_OP1_103_91744_20121227_005513_outLine +BABEL_OP1_103_91815_20121230_215316_inLine +BABEL_OP1_103_91815_20121230_215316_outLine +BABEL_OP1_103_91838_20121230_210441_inLine +BABEL_OP1_103_91838_20121230_210441_outLine +BABEL_OP1_103_91957_20130103_192518_inLine +BABEL_OP1_103_91957_20130103_192518_outLine +BABEL_OP1_103_92027_20130104_160934_inLine +BABEL_OP1_103_92027_20130104_160934_outLine +BABEL_OP1_103_92083_20130105_170057_inLine +BABEL_OP1_103_92083_20130105_170057_outLine +BABEL_OP1_103_92192_20130105_180415_inLine +BABEL_OP1_103_92192_20130105_180415_outLine +BABEL_OP1_103_92277_20130105_173147_inLine +BABEL_OP1_103_92277_20130105_173147_outLine +BABEL_OP1_103_93600_20120812_151245_inLine +BABEL_OP1_103_93600_20120812_151245_outLine +BABEL_OP1_103_94057_20130101_231512_inLine +BABEL_OP1_103_94057_20130101_231513_outLine +BABEL_OP1_103_94065_20130105_015217_inLine +BABEL_OP1_103_94065_20130105_015217_outLine +BABEL_OP1_103_94069_20130101_234436_inLine +BABEL_OP1_103_94069_20130101_234436_outLine +BABEL_OP1_103_96844_20121224_193654_inLine +BABEL_OP1_103_96844_20121224_193654_outLine +BABEL_OP1_103_96868_20120528_161710_inLine +BABEL_OP1_103_96868_20120528_161710_outLine +BABEL_OP1_103_97289_20120806_174807_inLine +BABEL_OP1_103_97289_20120806_174807_outLine +BABEL_OP1_103_98325_20120805_170336_inLine +BABEL_OP1_103_98325_20120805_170336_outLine +BABEL_OP1_103_99446_20120523_164823_inLine +BABEL_OP1_103_99446_20120523_164823_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/dev.list b/egs/babel/s5d/conf/lists/104-pashto/dev.list new file mode 100644 index 00000000000..7624d5decb2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/dev.list @@ -0,0 +1,143 @@ +BABEL_BP_104_04221_20120310_194031_inLine +BABEL_BP_104_04221_20120310_194031_outLine +BABEL_BP_104_08861_20120226_050237_inLine +BABEL_BP_104_08861_20120226_050237_outLine +BABEL_BP_104_10712_20120205_004135_inLine +BABEL_BP_104_10712_20120205_004135_outLine +BABEL_BP_104_10712_20120205_005332_inLine +BABEL_BP_104_10712_20120205_005332_outLine +BABEL_BP_104_13196_20120130_151929_inLine +BABEL_BP_104_13196_20120130_151929_outLine +BABEL_BP_104_14002_20120218_235147_inLine +BABEL_BP_104_14002_20120218_235147_outLine +BABEL_BP_104_15268_20120110_154803_inLine +BABEL_BP_104_15268_20120110_154803_outLine +BABEL_BP_104_15268_20120110_154803_outLine +BABEL_BP_104_16210_20111223_035614_inLine +BABEL_BP_104_16210_20111223_041103_inLine +BABEL_BP_104_17749_20120314_233247_inLine +BABEL_BP_104_17749_20120314_233247_outLine +BABEL_BP_104_21113_20120319_010218_inLine +BABEL_BP_104_22338_20120128_204829_inLine +BABEL_BP_104_22713_20120205_170953_inLine +BABEL_BP_104_23560_20120124_200340_inLine +BABEL_BP_104_28102_20120326_164501_inLine +BABEL_BP_104_28102_20120326_164501_outLine +BABEL_BP_104_28102_20120326_171523_inLine +BABEL_BP_104_28102_20120326_171523_outLine +BABEL_BP_104_29368_20120321_233801_inLine +BABEL_BP_104_29368_20120321_233802_outLine +BABEL_BP_104_29368_20120321_235133_inLine +BABEL_BP_104_29368_20120321_235133_outLine +BABEL_BP_104_33955_20120218_033644_inLine +BABEL_BP_104_33955_20120218_033644_outLine +BABEL_BP_104_34541_20120321_005610_inLine +BABEL_BP_104_34541_20120321_005610_outLine +BABEL_BP_104_35756_20120311_223543_inLine +BABEL_BP_104_35756_20120311_223543_outLine +BABEL_BP_104_36867_20120208_233318_inLine +BABEL_BP_104_36867_20120208_233318_outLine +BABEL_BP_104_37314_20120208_184924_inLine +BABEL_BP_104_37314_20120208_184924_outLine +BABEL_BP_104_39030_20120119_225755_outLine +BABEL_BP_104_39279_20120227_144602_inLine +BABEL_BP_104_39279_20120227_144602_outLine +BABEL_BP_104_40410_20120320_225202_inLine +BABEL_BP_104_40410_20120320_225202_inLine +BABEL_BP_104_40410_20120320_225202_outLine +BABEL_BP_104_40475_20120205_221544_inLine +BABEL_BP_104_40956_20120310_235812_inLine +BABEL_BP_104_40956_20120310_235812_outLine +BABEL_BP_104_43170_20120205_035143_inLine +BABEL_BP_104_44838_20120324_232540_inLine +BABEL_BP_104_44838_20120324_232540_outLine +BABEL_BP_104_53864_20120203_213736_outLine +BABEL_BP_104_54222_20120309_160035_inLine +BABEL_BP_104_54222_20120309_160035_outLine +BABEL_BP_104_56005_20120113_205235_outLine +BABEL_BP_104_56226_20120205_235429_outLine +BABEL_BP_104_60524_20120319_160420_inLine +BABEL_BP_104_60524_20120319_160420_outLine +BABEL_BP_104_60524_20120319_161719_inLine +BABEL_BP_104_60524_20120319_161719_outLine +BABEL_BP_104_61592_20120126_181735_inLine +BABEL_BP_104_61592_20120126_181735_outLine +BABEL_BP_104_61616_20120108_214701_inLine +BABEL_BP_104_61616_20120108_214701_outLine +BABEL_BP_104_62984_20120219_053758_inLine +BABEL_BP_104_62984_20120219_053758_outLine +BABEL_BP_104_64610_20120302_153346_inLine +BABEL_BP_104_64610_20120302_153346_outLine +BABEL_BP_104_66017_20120215_233406_inLine +BABEL_BP_104_66017_20120215_233406_outLine +BABEL_BP_104_70476_20120309_130456_inLine +BABEL_BP_104_70476_20120309_130456_outLine +BABEL_BP_104_72176_20120213_194841_inLine +BABEL_BP_104_72176_20120213_194841_outLine +BABEL_BP_104_73728_20111222_192324_inLine +BABEL_BP_104_73728_20111222_192324_outLine +BABEL_BP_104_74678_20120314_021415_inLine +BABEL_BP_104_74678_20120314_021415_outLine +BABEL_BP_104_74824_20120218_204154_inLine +BABEL_BP_104_74824_20120218_204154_outLine +BABEL_BP_104_75839_20120208_035003_inLine +BABEL_BP_104_75839_20120208_035003_outLine +BABEL_BP_104_76654_20111220_202441_inLine +BABEL_BP_104_76812_20120320_180439_inLine +BABEL_BP_104_76812_20120320_180439_outLine +BABEL_BP_104_76812_20120320_181229_inLine +BABEL_BP_104_76812_20120320_181229_outLine +BABEL_BP_104_78141_20120317_034317_inLine +BABEL_BP_104_78141_20120317_034317_outLine +BABEL_BP_104_81274_20120207_202722_inLine +BABEL_BP_104_81510_20120217_194417_inLine +BABEL_BP_104_81510_20120217_194417_inLine +BABEL_BP_104_82160_20120126_022907_inLine +BABEL_BP_104_82160_20120126_022907_inLine +BABEL_BP_104_82160_20120126_022907_outLine +BABEL_BP_104_83980_20120205_184505_inLine +BABEL_BP_104_83980_20120205_184505_inLine +BABEL_BP_104_83992_20120219_185819_inLine +BABEL_BP_104_83992_20120219_185819_outLine +BABEL_BP_104_84041_20111222_044010_inLine +BABEL_BP_104_84041_20111222_044010_outLine +BABEL_BP_104_84274_20120216_161121_inLine +BABEL_BP_104_84274_20120216_161121_outLine +BABEL_BP_104_85078_20120320_212106_inLine +BABEL_BP_104_85078_20120320_212106_outLine +BABEL_BP_104_85424_20120216_025024_inLine +BABEL_BP_104_85424_20120216_025024_outLine +BABEL_BP_104_85455_20120310_210107_inLine +BABEL_BP_104_85455_20120310_210107_outLine +BABEL_BP_104_85730_20120128_041419_inLine +BABEL_BP_104_85730_20120128_041419_outLine +BABEL_BP_104_85730_20120128_041419_outLine +BABEL_BP_104_86614_20111222_040726_inLine +BABEL_BP_104_86614_20111222_040726_outLine +BABEL_BP_104_86680_20120309_180429_inLine +BABEL_BP_104_86680_20120309_180429_outLine +BABEL_BP_104_86680_20120309_181746_inLine +BABEL_BP_104_86680_20120309_181746_outLine +BABEL_BP_104_86680_20120309_181746_outLine +BABEL_BP_104_87723_20120206_183706_inLine +BABEL_BP_104_87723_20120206_183706_outLine +BABEL_BP_104_88598_20120216_014512_inLine +BABEL_BP_104_88598_20120216_014512_outLine +BABEL_BP_104_88598_20120216_022402_inLine +BABEL_BP_104_88598_20120216_022402_outLine +BABEL_BP_104_89308_20120131_214111_inLine +BABEL_BP_104_89308_20120131_214111_outLine +BABEL_BP_104_89382_20120207_192751_inLine +BABEL_BP_104_89382_20120207_192751_outLine +BABEL_BP_104_90003_20120127_173210_inLine +BABEL_BP_104_91275_20120219_055247_outLine +BABEL_BP_104_91372_20120309_201355_inLine +BABEL_BP_104_91372_20120309_201355_outLine +BABEL_BP_104_93026_20120121_010508_inLine +BABEL_BP_104_93026_20120121_010508_outLine +BABEL_BP_104_94682_20120126_173632_outLine +BABEL_BP_104_96606_20120308_154908_inLine +BABEL_BP_104_96606_20120308_154908_outLine +BABEL_BP_104_97950_20120129_035347_inLine +BABEL_BP_104_99407_20120217_190330_inLine +BABEL_BP_104_99407_20120217_190330_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/eval.list b/egs/babel/s5d/conf/lists/104-pashto/eval.list new file mode 100644 index 00000000000..f3b4a90b6e6 --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/eval.list @@ -0,0 +1,198 @@ +BABEL_BP_104_01275_20120316_230646_inLine +BABEL_BP_104_01275_20120316_230646_outLine +BABEL_BP_104_01275_20120316_231711_inLine +BABEL_BP_104_01275_20120316_231711_outLine +BABEL_BP_104_03053_20120129_025619_inLine +BABEL_BP_104_03053_20120129_025619_outLine +BABEL_BP_104_03053_20120129_030931_inLine +BABEL_BP_104_03053_20120129_030931_outLine +BABEL_BP_104_10348_20120313_005811_inLine +BABEL_BP_104_10348_20120313_005811_outLine +BABEL_BP_104_10494_20120219_173118_inLine +BABEL_BP_104_10494_20120219_173118_outLine +BABEL_BP_104_11386_20120501_221559_inLine +BABEL_BP_104_11386_20120501_221559_outLine +BABEL_BP_104_11894_20120120_154648_inLine +BABEL_BP_104_11894_20120120_154648_outLine +BABEL_BP_104_12719_20120309_214313_inLine +BABEL_BP_104_12722_20120409_210032_inLine +BABEL_BP_104_12722_20120409_210033_outLine +BABEL_BP_104_15269_20120508_173455_inLine +BABEL_BP_104_15997_20120212_170900_inLine +BABEL_BP_104_15997_20120212_170900_outLine +BABEL_BP_104_16352_20120206_004350_inLine +BABEL_BP_104_16352_20120206_004350_outLine +BABEL_BP_104_16629_20120501_184857_inLine +BABEL_BP_104_17218_20120206_041300_inLine +BABEL_BP_104_17218_20120206_041301_outLine +BABEL_BP_104_18358_20120525_224141_inLine +BABEL_BP_104_19112_20120316_145312_inLine +BABEL_BP_104_19112_20120316_145312_outLine +BABEL_BP_104_19686_20120518_175511_inLine +BABEL_BP_104_19760_20120501_194354_inLine +BABEL_BP_104_20157_20120122_171556_inLine +BABEL_BP_104_20157_20120122_171556_outLine +BABEL_BP_104_21180_20120216_024537_inLine +BABEL_BP_104_21180_20120216_024537_outLine +BABEL_BP_104_22422_20120213_143323_inLine +BABEL_BP_104_22422_20120213_143323_outLine +BABEL_BP_104_24378_20120322_201121_inLine +BABEL_BP_104_24378_20120322_201121_outLine +BABEL_BP_104_25365_20120525_172149_inLine +BABEL_BP_104_27891_20120331_012612_inLine +BABEL_BP_104_27891_20120331_012612_outLine +BABEL_BP_104_28573_20120322_135901_inLine +BABEL_BP_104_28573_20120322_135901_outLine +BABEL_BP_104_29083_20120128_011719_inLine +BABEL_BP_104_29083_20120128_011719_outLine +BABEL_BP_104_30978_20120309_013805_inLine +BABEL_BP_104_30978_20120309_013805_outLine +BABEL_BP_104_32245_20120115_200120_outLine +BABEL_BP_104_32669_20120223_164026_inLine +BABEL_BP_104_32669_20120223_164026_outLine +BABEL_BP_104_32708_20120518_190441_inLine +BABEL_BP_104_33429_20120303_015431_inLine +BABEL_BP_104_33429_20120303_015431_outLine +BABEL_BP_104_34137_20120219_183642_inLine +BABEL_BP_104_34137_20120219_183642_outLine +BABEL_BP_104_35317_20120208_173659_inLine +BABEL_BP_104_35317_20120208_173659_outLine +BABEL_BP_104_35764_20120518_193509_inLine +BABEL_BP_104_36227_20120211_165128_inLine +BABEL_BP_104_36227_20120211_165128_outLine +BABEL_BP_104_36227_20120211_181406_inLine +BABEL_BP_104_36227_20120211_181406_outLine +BABEL_BP_104_38954_20120316_173708_inLine +BABEL_BP_104_38954_20120316_173708_outLine +BABEL_BP_104_39577_20120321_011346_inLine +BABEL_BP_104_39577_20120321_011346_outLine +BABEL_BP_104_39696_20120218_034224_inLine +BABEL_BP_104_39696_20120218_034224_outLine +BABEL_BP_104_40445_20120314_225446_inLine +BABEL_BP_104_40445_20120314_225446_outLine +BABEL_BP_104_41545_20120317_151247_inLine +BABEL_BP_104_41545_20120317_151247_outLine +BABEL_BP_104_42397_20120219_050708_inLine +BABEL_BP_104_42397_20120219_050708_outLine +BABEL_BP_104_42427_20120229_145052_inLine +BABEL_BP_104_42427_20120229_145052_outLine +BABEL_BP_104_42728_20120204_220817_inLine +BABEL_BP_104_42728_20120204_220817_outLine +BABEL_BP_104_42730_20120229_010941_inLine +BABEL_BP_104_42730_20120229_010941_outLine +BABEL_BP_104_44792_20120210_023955_inLine +BABEL_BP_104_46216_20120207_194728_inLine +BABEL_BP_104_46216_20120207_194728_outLine +BABEL_BP_104_46862_20120316_155735_inLine +BABEL_BP_104_46862_20120316_155735_outLine +BABEL_BP_104_47771_20120519_163449_inLine +BABEL_BP_104_48518_20120219_154144_inLine +BABEL_BP_104_48518_20120219_154144_outLine +BABEL_BP_104_49662_20120518_205502_inLine +BABEL_BP_104_52533_20120310_204257_inLine +BABEL_BP_104_52533_20120310_204257_outLine +BABEL_BP_104_54646_20120119_215025_inLine +BABEL_BP_104_54646_20120119_215025_outLine +BABEL_BP_104_55043_20120316_021531_inLine +BABEL_BP_104_55043_20120316_021531_outLine +BABEL_BP_104_56605_20120220_012855_inLine +BABEL_BP_104_56605_20120220_012855_outLine +BABEL_BP_104_58283_20111227_182227_inLine +BABEL_BP_104_58283_20111227_182227_outLine +BABEL_BP_104_59121_20120120_170101_inLine +BABEL_BP_104_59121_20120120_170101_outLine +BABEL_BP_104_60055_20120120_151813_inLine +BABEL_BP_104_60055_20120120_151813_outLine +BABEL_BP_104_60523_20120303_012610_inLine +BABEL_BP_104_60523_20120303_012610_outLine +BABEL_BP_104_61400_20120518_194526_inLine +BABEL_BP_104_61755_20120518_180255_inLine +BABEL_BP_104_61786_20120216_204511_inLine +BABEL_BP_104_61786_20120216_204511_outLine +BABEL_BP_104_64198_20120219_231453_inLine +BABEL_BP_104_64198_20120219_231453_outLine +BABEL_BP_104_65668_20120203_175644_inLine +BABEL_BP_104_65668_20120203_175644_outLine +BABEL_BP_104_66153_20120212_161723_inLine +BABEL_BP_104_66153_20120212_161724_outLine +BABEL_BP_104_66842_20120126_174251_inLine +BABEL_BP_104_66842_20120126_174251_outLine +BABEL_BP_104_66847_20120308_230422_inLine +BABEL_BP_104_66847_20120308_230422_outLine +BABEL_BP_104_68538_20120314_231228_inLine +BABEL_BP_104_68538_20120314_231228_outLine +BABEL_BP_104_69336_20120201_211015_inLine +BABEL_BP_104_69336_20120201_211015_outLine +BABEL_BP_104_69336_20120201_213613_inLine +BABEL_BP_104_69336_20120201_213613_outLine +BABEL_BP_104_69728_20120129_180746_inLine +BABEL_BP_104_69728_20120129_180746_outLine +BABEL_BP_104_71284_20111228_210355_inLine +BABEL_BP_104_71284_20111228_210355_outLine +BABEL_BP_104_71284_20111228_215349_inLine +BABEL_BP_104_71284_20111228_215349_outLine +BABEL_BP_104_71925_20120309_151315_inLine +BABEL_BP_104_71925_20120309_151315_outLine +BABEL_BP_104_75869_20111220_204852_inLine +BABEL_BP_104_75869_20111220_204852_outLine +BABEL_BP_104_77082_20120109_183551_inLine +BABEL_BP_104_77082_20120109_183551_outLine +BABEL_BP_104_77290_20120403_023516_inLine +BABEL_BP_104_77290_20120403_023516_outLine +BABEL_BP_104_77621_20120517_225556_inLine +BABEL_BP_104_77737_20120320_204452_inLine +BABEL_BP_104_77737_20120320_204452_outLine +BABEL_BP_104_78298_20120308_204105_inLine +BABEL_BP_104_78298_20120308_204105_outLine +BABEL_BP_104_80644_20120222_222458_inLine +BABEL_BP_104_80644_20120222_222458_outLine +BABEL_BP_104_83327_20120217_233846_inLine +BABEL_BP_104_83327_20120217_233846_outLine +BABEL_BP_104_83782_20120519_153147_inLine +BABEL_BP_104_84398_20120219_052212_inLine +BABEL_BP_104_84398_20120219_052212_outLine +BABEL_BP_104_85897_20120221_033320_inLine +BABEL_BP_104_85897_20120221_033320_outLine +BABEL_BP_104_86231_20120224_065736_inLine +BABEL_BP_104_86231_20120224_065736_outLine +BABEL_BP_104_86793_20120309_185403_inLine +BABEL_BP_104_86793_20120309_185403_outLine +BABEL_BP_104_86873_20120519_160538_inLine +BABEL_BP_104_87124_20120315_000929_inLine +BABEL_BP_104_87124_20120315_000929_outLine +BABEL_BP_104_87734_20120117_154033_inLine +BABEL_BP_104_87734_20120117_154033_outLine +BABEL_BP_104_89463_20111225_195251_inLine +BABEL_BP_104_89463_20111225_195251_outLine +BABEL_BP_104_89702_20120318_005220_inLine +BABEL_BP_104_89702_20120318_005220_outLine +BABEL_BP_104_89851_20120322_183302_inLine +BABEL_BP_104_89851_20120322_183302_outLine +BABEL_BP_104_89851_20120322_194407_inLine +BABEL_BP_104_89851_20120322_194407_outLine +BABEL_BP_104_90758_20120315_015433_inLine +BABEL_BP_104_90758_20120315_015433_outLine +BABEL_BP_104_91105_20120501_195037_inLine +BABEL_BP_104_92247_20120220_023207_inLine +BABEL_BP_104_92247_20120220_023207_outLine +BABEL_BP_104_92721_20120401_235515_inLine +BABEL_BP_104_92721_20120401_235515_outLine +BABEL_BP_104_92721_20120402_000651_inLine +BABEL_BP_104_92721_20120402_000651_outLine +BABEL_BP_104_93180_20111223_033642_inLine +BABEL_BP_104_93180_20111223_033642_outLine +BABEL_BP_104_93742_20120308_233140_inLine +BABEL_BP_104_93742_20120308_233140_outLine +BABEL_BP_104_93748_20120316_223342_inLine +BABEL_BP_104_93748_20120316_223342_outLine +BABEL_BP_104_94934_20120525_175309_inLine +BABEL_BP_104_96186_20120320_210010_inLine +BABEL_BP_104_96186_20120320_210010_outLine +BABEL_BP_104_96868_20120326_145653_inLine +BABEL_BP_104_96868_20120326_145653_outLine +BABEL_BP_104_97574_20120228_161829_inLine +BABEL_BP_104_98271_20120110_010959_inLine +BABEL_BP_104_98271_20120110_010959_outLine +BABEL_BP_104_98420_20120507_174842_inLine +BABEL_BP_104_99428_20120211_174655_inLine +BABEL_BP_104_99428_20120211_174655_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/evalpart1.list b/egs/babel/s5d/conf/lists/104-pashto/evalpart1.list new file mode 100644 index 00000000000..2cf59b81f00 --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/evalpart1.list @@ -0,0 +1,70 @@ +BABEL_BP_104_11894_20120120_154648_inLine +BABEL_BP_104_11894_20120120_154648_outLine +BABEL_BP_104_12722_20120409_210032_inLine +BABEL_BP_104_12722_20120409_210033_outLine +BABEL_BP_104_16352_20120206_004350_inLine +BABEL_BP_104_16352_20120206_004350_outLine +BABEL_BP_104_20157_20120122_171556_inLine +BABEL_BP_104_20157_20120122_171556_outLine +BABEL_BP_104_21180_20120216_024537_inLine +BABEL_BP_104_21180_20120216_024537_outLine +BABEL_BP_104_24378_20120322_201121_inLine +BABEL_BP_104_24378_20120322_201121_outLine +BABEL_BP_104_27891_20120331_012612_inLine +BABEL_BP_104_27891_20120331_012612_outLine +BABEL_BP_104_28573_20120322_135901_inLine +BABEL_BP_104_28573_20120322_135901_outLine +BABEL_BP_104_32669_20120223_164026_inLine +BABEL_BP_104_32669_20120223_164026_outLine +BABEL_BP_104_34137_20120219_183642_inLine +BABEL_BP_104_34137_20120219_183642_outLine +BABEL_BP_104_35317_20120208_173659_inLine +BABEL_BP_104_35317_20120208_173659_outLine +BABEL_BP_104_36227_20120211_165128_inLine +BABEL_BP_104_36227_20120211_165128_outLine +BABEL_BP_104_36227_20120211_181406_inLine +BABEL_BP_104_36227_20120211_181406_outLine +BABEL_BP_104_39577_20120321_011346_inLine +BABEL_BP_104_39577_20120321_011346_outLine +BABEL_BP_104_39696_20120218_034224_inLine +BABEL_BP_104_39696_20120218_034224_outLine +BABEL_BP_104_42427_20120229_145052_inLine +BABEL_BP_104_42427_20120229_145052_outLine +BABEL_BP_104_48518_20120219_154144_inLine +BABEL_BP_104_48518_20120219_154144_outLine +BABEL_BP_104_52533_20120310_204257_inLine +BABEL_BP_104_52533_20120310_204257_outLine +BABEL_BP_104_54646_20120119_215025_inLine +BABEL_BP_104_54646_20120119_215025_outLine +BABEL_BP_104_66153_20120212_161723_inLine +BABEL_BP_104_66153_20120212_161724_outLine +BABEL_BP_104_69336_20120201_211015_inLine +BABEL_BP_104_69336_20120201_211015_outLine +BABEL_BP_104_69336_20120201_213613_inLine +BABEL_BP_104_69336_20120201_213613_outLine +BABEL_BP_104_75869_20111220_204852_inLine +BABEL_BP_104_75869_20111220_204852_outLine +BABEL_BP_104_77082_20120109_183551_inLine +BABEL_BP_104_77082_20120109_183551_outLine +BABEL_BP_104_78298_20120308_204105_inLine +BABEL_BP_104_78298_20120308_204105_outLine +BABEL_BP_104_85897_20120221_033320_inLine +BABEL_BP_104_85897_20120221_033320_outLine +BABEL_BP_104_86793_20120309_185403_inLine +BABEL_BP_104_86793_20120309_185403_outLine +BABEL_BP_104_87124_20120315_000929_inLine +BABEL_BP_104_87124_20120315_000929_outLine +BABEL_BP_104_89851_20120322_183302_inLine +BABEL_BP_104_89851_20120322_183302_outLine +BABEL_BP_104_89851_20120322_194407_inLine +BABEL_BP_104_89851_20120322_194407_outLine +BABEL_BP_104_92721_20120401_235515_inLine +BABEL_BP_104_92721_20120401_235515_outLine +BABEL_BP_104_92721_20120402_000651_inLine +BABEL_BP_104_92721_20120402_000651_outLine +BABEL_BP_104_93748_20120316_223342_inLine +BABEL_BP_104_93748_20120316_223342_outLine +BABEL_BP_104_96868_20120326_145653_inLine +BABEL_BP_104_96868_20120326_145653_outLine +BABEL_BP_104_99428_20120211_174655_inLine +BABEL_BP_104_99428_20120211_174655_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list b/egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list new file mode 100644 index 00000000000..9aefcaef2bb --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list @@ -0,0 +1,512 @@ +BABEL_BP_104_03770_20120109_014606_inLine +BABEL_BP_104_03770_20120109_014606_outLine +BABEL_BP_104_08036_20111220_013826_inLine +BABEL_BP_104_08139_20120126_021604_inLine +BABEL_BP_104_10193_20120213_031930_inLine +BABEL_BP_104_10193_20120213_031930_outLine +BABEL_BP_104_10289_20120128_035330_inLine +BABEL_BP_104_10289_20120128_035330_outLine +BABEL_BP_104_10642_20120321_210945_outLine +BABEL_BP_104_10911_20111222_025120_inLine +BABEL_BP_104_10911_20111222_025120_outLine +BABEL_BP_104_11146_20120224_000248_inLine +BABEL_BP_104_11146_20120224_000248_outLine +BABEL_BP_104_11153_20120108_191820_inLine +BABEL_BP_104_11153_20120108_191820_outLine +BABEL_BP_104_11202_20120213_235334_inLine +BABEL_BP_104_11202_20120213_235334_outLine +BABEL_BP_104_11442_20120218_234445_inLine +BABEL_BP_104_11442_20120218_234445_outLine +BABEL_BP_104_11647_20120315_022645_inLine +BABEL_BP_104_11647_20120315_022645_outLine +BABEL_BP_104_12562_20120307_152654_inLine +BABEL_BP_104_12682_20120223_031401_inLine +BABEL_BP_104_13064_20120220_040256_inLine +BABEL_BP_104_13064_20120220_040256_outLine +BABEL_BP_104_13189_20120112_020041_inLine +BABEL_BP_104_13189_20120112_020041_outLine +BABEL_BP_104_13456_20120111_024843_outLine +BABEL_BP_104_13694_20120321_001123_outLine +BABEL_BP_104_13798_20120105_221125_inLine +BABEL_BP_104_13798_20120105_221125_outLine +BABEL_BP_104_13952_20120126_185217_inLine +BABEL_BP_104_13952_20120126_185217_outLine +BABEL_BP_104_14147_20120320_003436_inLine +BABEL_BP_104_14147_20120320_003436_outLine +BABEL_BP_104_14225_20120331_015956_inLine +BABEL_BP_104_14225_20120331_020908_inLine +BABEL_BP_104_14527_20120207_235446_inLine +BABEL_BP_104_14527_20120207_235446_outLine +BABEL_BP_104_14927_20111224_041309_inLine +BABEL_BP_104_14927_20111224_041309_outLine +BABEL_BP_104_15324_20120126_222036_inLine +BABEL_BP_104_15324_20120126_222036_outLine +BABEL_BP_104_15324_20120127_023323_inLine +BABEL_BP_104_15324_20120127_023323_outLine +BABEL_BP_104_15377_20120322_045329_inLine +BABEL_BP_104_15415_20120219_181352_inLine +BABEL_BP_104_15861_20120401_024411_inLine +BABEL_BP_104_15949_20120229_140434_inLine +BABEL_BP_104_15949_20120229_140434_outLine +BABEL_BP_104_16074_20120129_041107_inLine +BABEL_BP_104_16074_20120129_041107_outLine +BABEL_BP_104_16290_20120220_200234_inLine +BABEL_BP_104_16290_20120220_200234_outLine +BABEL_BP_104_16339_20120131_184255_inLine +BABEL_BP_104_16350_20120315_043233_outLine +BABEL_BP_104_16416_20120317_205531_inLine +BABEL_BP_104_16416_20120317_205531_outLine +BABEL_BP_104_16416_20120317_211129_inLine +BABEL_BP_104_16416_20120317_211129_outLine +BABEL_BP_104_16633_20120311_053635_inLine +BABEL_BP_104_16633_20120311_053635_outLine +BABEL_BP_104_17081_20120128_030343_inLine +BABEL_BP_104_17081_20120128_030343_outLine +BABEL_BP_104_17180_20120321_215255_inLine +BABEL_BP_104_17180_20120321_215255_outLine +BABEL_BP_104_17216_20120128_015245_inLine +BABEL_BP_104_17216_20120128_020324_inLine +BABEL_BP_104_17319_20111225_210159_inLine +BABEL_BP_104_17319_20111225_210159_outLine +BABEL_BP_104_17429_20120209_024521_inLine +BABEL_BP_104_17429_20120209_024521_outLine +BABEL_BP_104_17904_20120320_014817_inLine +BABEL_BP_104_17904_20120320_014817_outLine +BABEL_BP_104_18084_20111230_210850_outLine +BABEL_BP_104_18537_20120130_181101_inLine +BABEL_BP_104_18537_20120130_181101_outLine +BABEL_BP_104_18667_20120208_175014_inLine +BABEL_BP_104_18667_20120208_175014_outLine +BABEL_BP_104_19044_20120218_182247_outLine +BABEL_BP_104_19044_20120218_183017_outLine +BABEL_BP_104_19044_20120218_183849_outLine +BABEL_BP_104_19324_20120310_192849_inLine +BABEL_BP_104_19324_20120310_192849_outLine +BABEL_BP_104_19481_20120207_235626_inLine +BABEL_BP_104_19481_20120207_235626_outLine +BABEL_BP_104_20016_20120206_215156_inLine +BABEL_BP_104_20016_20120206_215156_outLine +BABEL_BP_104_21256_20120217_202248_inLine +BABEL_BP_104_21256_20120217_202248_outLine +BABEL_BP_104_21928_20120204_212612_inLine +BABEL_BP_104_21928_20120204_212612_outLine +BABEL_BP_104_21968_20120131_180237_inLine +BABEL_BP_104_21968_20120131_180237_outLine +BABEL_BP_104_22548_20120125_211519_inLine +BABEL_BP_104_22590_20120209_224232_inLine +BABEL_BP_104_22590_20120209_224232_outLine +BABEL_BP_104_23381_20120216_161115_outLine +BABEL_BP_104_24235_20120209_030431_outLine +BABEL_BP_104_24585_20120117_225722_inLine +BABEL_BP_104_24585_20120117_225722_outLine +BABEL_BP_104_24735_20120316_221529_inLine +BABEL_BP_104_24735_20120316_221529_outLine +BABEL_BP_104_24750_20120130_183131_inLine +BABEL_BP_104_24750_20120130_183131_outLine +BABEL_BP_104_24810_20120319_165838_outLine +BABEL_BP_104_25015_20120216_005135_inLine +BABEL_BP_104_25015_20120216_005135_outLine +BABEL_BP_104_25525_20120316_140847_outLine +BABEL_BP_104_25911_20111222_051549_inLine +BABEL_BP_104_25911_20111222_051549_outLine +BABEL_BP_104_26946_20120130_034221_outLine +BABEL_BP_104_27298_20111225_192028_inLine +BABEL_BP_104_27298_20111225_192028_outLine +BABEL_BP_104_28289_20120310_202856_inLine +BABEL_BP_104_28289_20120310_202856_outLine +BABEL_BP_104_28330_20120306_194033_inLine +BABEL_BP_104_28330_20120306_195756_inLine +BABEL_BP_104_28734_20120126_205422_inLine +BABEL_BP_104_28734_20120126_212950_inLine +BABEL_BP_104_29009_20120319_164025_outLine +BABEL_BP_104_29967_20120208_201355_inLine +BABEL_BP_104_29967_20120208_201355_outLine +BABEL_BP_104_30143_20111227_132440_inLine +BABEL_BP_104_30271_20120205_163755_inLine +BABEL_BP_104_30271_20120205_165111_inLine +BABEL_BP_104_30628_20120219_182744_inLine +BABEL_BP_104_30628_20120219_182744_outLine +BABEL_BP_104_30848_20120204_154057_inLine +BABEL_BP_104_30848_20120204_154058_outLine +BABEL_BP_104_31663_20120210_140419_inLine +BABEL_BP_104_31663_20120210_140419_outLine +BABEL_BP_104_31919_20120405_023221_inLine +BABEL_BP_104_31926_20120319_040036_outLine +BABEL_BP_104_32956_20120221_133851_inLine +BABEL_BP_104_32956_20120221_133851_outLine +BABEL_BP_104_33337_20120220_005047_inLine +BABEL_BP_104_33337_20120220_005047_outLine +BABEL_BP_104_33846_20120123_194027_inLine +BABEL_BP_104_34164_20120221_141502_inLine +BABEL_BP_104_34164_20120221_141502_outLine +BABEL_BP_104_34188_20120219_000455_inLine +BABEL_BP_104_34188_20120219_000455_outLine +BABEL_BP_104_34335_20111225_224055_outLine +BABEL_BP_104_34833_20120215_025837_inLine +BABEL_BP_104_34833_20120215_025837_outLine +BABEL_BP_104_34994_20120314_001810_outLine +BABEL_BP_104_34994_20120314_003701_outLine +BABEL_BP_104_35073_20120208_223917_outLine +BABEL_BP_104_35444_20120310_190608_inLine +BABEL_BP_104_35444_20120310_190608_outLine +BABEL_BP_104_35646_20120202_222418_inLine +BABEL_BP_104_35646_20120202_222418_outLine +BABEL_BP_104_35874_20120403_213324_inLine +BABEL_BP_104_35916_20120204_030147_inLine +BABEL_BP_104_35916_20120204_030147_outLine +BABEL_BP_104_35923_20120216_021137_inLine +BABEL_BP_104_35923_20120216_021137_outLine +BABEL_BP_104_36138_20120206_210519_inLine +BABEL_BP_104_36138_20120206_210519_outLine +BABEL_BP_104_36413_20120310_185758_inLine +BABEL_BP_104_36413_20120310_185758_outLine +BABEL_BP_104_36487_20120209_211827_inLine +BABEL_BP_104_36487_20120209_211827_outLine +BABEL_BP_104_37131_20120318_210220_inLine +BABEL_BP_104_37131_20120318_210220_outLine +BABEL_BP_104_37135_20120219_044437_inLine +BABEL_BP_104_37135_20120219_044437_outLine +BABEL_BP_104_37593_20120130_203434_inLine +BABEL_BP_104_37593_20120130_203434_outLine +BABEL_BP_104_38479_20120213_011154_inLine +BABEL_BP_104_38479_20120213_011154_outLine +BABEL_BP_104_38563_20120127_181357_outLine +BABEL_BP_104_39178_20120109_195710_inLine +BABEL_BP_104_39320_20120110_190913_inLine +BABEL_BP_104_39320_20120110_190913_outLine +BABEL_BP_104_39390_20120322_042714_outLine +BABEL_BP_104_39525_20120217_200400_inLine +BABEL_BP_104_39525_20120217_200400_outLine +BABEL_BP_104_39999_20120326_194721_inLine +BABEL_BP_104_39999_20120326_194721_outLine +BABEL_BP_104_40136_20120222_030818_inLine +BABEL_BP_104_40136_20120222_030823_outLine +BABEL_BP_104_40607_20120324_163524_inLine +BABEL_BP_104_40612_20120106_024347_inLine +BABEL_BP_104_40612_20120106_024347_outLine +BABEL_BP_104_40640_20120131_044455_outLine +BABEL_BP_104_41306_20120223_191213_inLine +BABEL_BP_104_41306_20120223_191213_outLine +BABEL_BP_104_41531_20120331_010320_inLine +BABEL_BP_104_41531_20120331_010320_outLine +BABEL_BP_104_42145_20120127_042217_inLine +BABEL_BP_104_42145_20120127_042217_outLine +BABEL_BP_104_42571_20120229_014427_inLine +BABEL_BP_104_42571_20120229_014427_outLine +BABEL_BP_104_42571_20120229_020000_inLine +BABEL_BP_104_42571_20120229_020000_outLine +BABEL_BP_104_42929_20120307_150902_inLine +BABEL_BP_104_42929_20120307_150902_outLine +BABEL_BP_104_43322_20120126_040725_inLine +BABEL_BP_104_43462_20120216_210005_inLine +BABEL_BP_104_43462_20120216_210005_outLine +BABEL_BP_104_43480_20120326_155717_inLine +BABEL_BP_104_43501_20120331_220724_outLine +BABEL_BP_104_43501_20120331_222326_outLine +BABEL_BP_104_43684_20120128_182736_outLine +BABEL_BP_104_43724_20120219_213737_inLine +BABEL_BP_104_43724_20120219_213737_outLine +BABEL_BP_104_43725_20120205_002936_inLine +BABEL_BP_104_43725_20120205_002936_outLine +BABEL_BP_104_43833_20120331_193735_outLine +BABEL_BP_104_44468_20120222_125222_inLine +BABEL_BP_104_44468_20120222_125222_outLine +BABEL_BP_104_44515_20120326_144709_inLine +BABEL_BP_104_44515_20120326_150551_inLine +BABEL_BP_104_44799_20120119_040419_inLine +BABEL_BP_104_44799_20120119_040419_outLine +BABEL_BP_104_45356_20120324_234702_outLine +BABEL_BP_104_45403_20111222_014909_outLine +BABEL_BP_104_45562_20120131_200753_inLine +BABEL_BP_104_45926_20120127_162212_inLine +BABEL_BP_104_45926_20120127_162212_outLine +BABEL_BP_104_45947_20120313_214251_inLine +BABEL_BP_104_46168_20120217_200729_inLine +BABEL_BP_104_46168_20120217_200729_outLine +BABEL_BP_104_46734_20120219_025954_outLine +BABEL_BP_104_46979_20120223_173811_inLine +BABEL_BP_104_46979_20120223_173811_outLine +BABEL_BP_104_47015_20120222_053105_inLine +BABEL_BP_104_47015_20120222_053105_outLine +BABEL_BP_104_47917_20120319_003035_inLine +BABEL_BP_104_47917_20120319_003035_outLine +BABEL_BP_104_48000_20120323_171146_inLine +BABEL_BP_104_48000_20120323_171146_outLine +BABEL_BP_104_48001_20120204_231603_inLine +BABEL_BP_104_48001_20120204_231603_outLine +BABEL_BP_104_48259_20120217_200412_inLine +BABEL_BP_104_48259_20120217_200412_outLine +BABEL_BP_104_48944_20120218_011825_inLine +BABEL_BP_104_48944_20120218_011825_outLine +BABEL_BP_104_48946_20120320_192250_inLine +BABEL_BP_104_48946_20120320_192250_outLine +BABEL_BP_104_49141_20120330_015342_inLine +BABEL_BP_104_49629_20120312_155816_outLine +BABEL_BP_104_50407_20120318_232348_inLine +BABEL_BP_104_50407_20120318_232348_outLine +BABEL_BP_104_50682_20120116_205741_inLine +BABEL_BP_104_50682_20120116_205741_outLine +BABEL_BP_104_50820_20120213_140300_inLine +BABEL_BP_104_50820_20120213_140300_outLine +BABEL_BP_104_51024_20120131_172745_inLine +BABEL_BP_104_51047_20120319_042347_outLine +BABEL_BP_104_51329_20120222_203129_inLine +BABEL_BP_104_51329_20120222_203129_outLine +BABEL_BP_104_51329_20120222_205332_inLine +BABEL_BP_104_51329_20120222_205332_outLine +BABEL_BP_104_51519_20120220_052247_inLine +BABEL_BP_104_51519_20120220_052247_outLine +BABEL_BP_104_51570_20120118_225333_inLine +BABEL_BP_104_51570_20120118_225333_outLine +BABEL_BP_104_51716_20120221_005215_inLine +BABEL_BP_104_51716_20120221_005215_outLine +BABEL_BP_104_52300_20120203_210256_inLine +BABEL_BP_104_52300_20120203_210256_outLine +BABEL_BP_104_52753_20120209_225916_inLine +BABEL_BP_104_52753_20120209_225916_outLine +BABEL_BP_104_52753_20120213_014050_inLine +BABEL_BP_104_52753_20120213_014050_outLine +BABEL_BP_104_52954_20120313_170902_inLine +BABEL_BP_104_52954_20120313_170902_outLine +BABEL_BP_104_53159_20120402_035901_inLine +BABEL_BP_104_53159_20120402_035901_outLine +BABEL_BP_104_53334_20120309_184805_inLine +BABEL_BP_104_53334_20120309_184805_outLine +BABEL_BP_104_53659_20120218_205643_inLine +BABEL_BP_104_53659_20120218_205643_outLine +BABEL_BP_104_53718_20120202_220720_outLine +BABEL_BP_104_54909_20120130_194003_inLine +BABEL_BP_104_54909_20120130_194003_outLine +BABEL_BP_104_55213_20120331_185824_outLine +BABEL_BP_104_55668_20120212_011829_inLine +BABEL_BP_104_55668_20120212_011829_outLine +BABEL_BP_104_56201_20120126_180227_outLine +BABEL_BP_104_56308_20120402_024809_outLine +BABEL_BP_104_56704_20120120_155806_inLine +BABEL_BP_104_56704_20120120_155806_outLine +BABEL_BP_104_56753_20120322_204356_outLine +BABEL_BP_104_56805_20120320_045112_inLine +BABEL_BP_104_56805_20120320_045112_outLine +BABEL_BP_104_57005_20120321_034143_inLine +BABEL_BP_104_57082_20120110_024829_inLine +BABEL_BP_104_57116_20120110_180036_inLine +BABEL_BP_104_57167_20111230_213737_outLine +BABEL_BP_104_57210_20120321_020212_inLine +BABEL_BP_104_57210_20120321_020212_outLine +BABEL_BP_104_57263_20120302_211404_inLine +BABEL_BP_104_57320_20120204_230109_inLine +BABEL_BP_104_57320_20120204_230109_outLine +BABEL_BP_104_57531_20120203_165801_inLine +BABEL_BP_104_57531_20120203_165801_outLine +BABEL_BP_104_57672_20120204_030206_outLine +BABEL_BP_104_58149_20120218_161613_outLine +BABEL_BP_104_58298_20120208_214852_inLine +BABEL_BP_104_58298_20120208_214852_outLine +BABEL_BP_104_58939_20120212_184855_inLine +BABEL_BP_104_58939_20120212_184855_outLine +BABEL_BP_104_58963_20120331_015840_inLine +BABEL_BP_104_58963_20120331_015840_outLine +BABEL_BP_104_59219_20120131_225115_outLine +BABEL_BP_104_59399_20120318_144751_inLine +BABEL_BP_104_59399_20120318_144752_outLine +BABEL_BP_104_59482_20120309_190927_inLine +BABEL_BP_104_59482_20120309_190927_outLine +BABEL_BP_104_59681_20120123_213306_inLine +BABEL_BP_104_59681_20120123_213306_outLine +BABEL_BP_104_60462_20120201_181707_inLine +BABEL_BP_104_60462_20120201_181707_outLine +BABEL_BP_104_60806_20120213_161652_outLine +BABEL_BP_104_61029_20120201_224200_outLine +BABEL_BP_104_61523_20120212_035522_inLine +BABEL_BP_104_61655_20120208_203143_inLine +BABEL_BP_104_61655_20120208_203143_outLine +BABEL_BP_104_61733_20120205_220251_outLine +BABEL_BP_104_61735_20120314_012744_inLine +BABEL_BP_104_61909_20120320_190739_inLine +BABEL_BP_104_61909_20120320_190739_outLine +BABEL_BP_104_62815_20120318_025812_outLine +BABEL_BP_104_62816_20120312_153937_outLine +BABEL_BP_104_63111_20120204_232445_outLine +BABEL_BP_104_63215_20120213_040737_inLine +BABEL_BP_104_63215_20120213_040737_outLine +BABEL_BP_104_63220_20120131_155658_inLine +BABEL_BP_104_63220_20120131_155658_outLine +BABEL_BP_104_63390_20120123_212718_outLine +BABEL_BP_104_63397_20120217_194928_inLine +BABEL_BP_104_63397_20120217_194928_outLine +BABEL_BP_104_63784_20120216_015608_inLine +BABEL_BP_104_63784_20120216_015608_outLine +BABEL_BP_104_63934_20120318_201706_inLine +BABEL_BP_104_63934_20120318_201706_outLine +BABEL_BP_104_64990_20120119_173958_inLine +BABEL_BP_104_64990_20120119_173958_outLine +BABEL_BP_104_65341_20120220_222356_inLine +BABEL_BP_104_65341_20120220_222356_outLine +BABEL_BP_104_65590_20120109_001414_inLine +BABEL_BP_104_65590_20120109_001414_outLine +BABEL_BP_104_65954_20120128_163139_inLine +BABEL_BP_104_65954_20120128_163139_outLine +BABEL_BP_104_65974_20120316_195524_inLine +BABEL_BP_104_65974_20120316_195524_outLine +BABEL_BP_104_66784_20111225_190506_outLine +BABEL_BP_104_66879_20120213_004555_inLine +BABEL_BP_104_66879_20120213_004555_outLine +BABEL_BP_104_67106_20120208_201829_inLine +BABEL_BP_104_67106_20120208_201829_outLine +BABEL_BP_104_67423_20120205_220658_outLine +BABEL_BP_104_67685_20120217_235729_inLine +BABEL_BP_104_67685_20120217_235729_outLine +BABEL_BP_104_67718_20120131_164436_inLine +BABEL_BP_104_67718_20120131_164436_outLine +BABEL_BP_104_68077_20120219_155535_outLine +BABEL_BP_104_68111_20120321_185146_outLine +BABEL_BP_104_68144_20120210_223106_outLine +BABEL_BP_104_68189_20120128_005011_inLine +BABEL_BP_104_68189_20120128_005011_outLine +BABEL_BP_104_68209_20120219_045221_inLine +BABEL_BP_104_68997_20120126_010839_inLine +BABEL_BP_104_70333_20120210_033437_outLine +BABEL_BP_104_70528_20120128_013553_inLine +BABEL_BP_104_70528_20120128_013553_outLine +BABEL_BP_104_70762_20120213_175054_outLine +BABEL_BP_104_70897_20120315_000410_inLine +BABEL_BP_104_70897_20120315_000410_outLine +BABEL_BP_104_70897_20120315_013535_inLine +BABEL_BP_104_70897_20120315_013535_outLine +BABEL_BP_104_71948_20120210_012347_inLine +BABEL_BP_104_71970_20120310_195048_inLine +BABEL_BP_104_72874_20120213_191257_inLine +BABEL_BP_104_72874_20120213_191257_outLine +BABEL_BP_104_72910_20120310_185203_outLine +BABEL_BP_104_73450_20120206_024342_inLine +BABEL_BP_104_73450_20120206_024342_outLine +BABEL_BP_104_73925_20120123_233630_inLine +BABEL_BP_104_73925_20120123_233630_outLine +BABEL_BP_104_74261_20120331_191708_outLine +BABEL_BP_104_74334_20111230_035012_inLine +BABEL_BP_104_74940_20120228_225523_inLine +BABEL_BP_104_74940_20120228_225523_outLine +BABEL_BP_104_75390_20120218_133736_inLine +BABEL_BP_104_75390_20120218_133736_outLine +BABEL_BP_104_75402_20120319_160944_inLine +BABEL_BP_104_76714_20120313_220017_inLine +BABEL_BP_104_76714_20120313_220017_outLine +BABEL_BP_104_76738_20120210_010510_inLine +BABEL_BP_104_77097_20120214_235954_inLine +BABEL_BP_104_77097_20120214_235954_outLine +BABEL_BP_104_77256_20120309_064948_inLine +BABEL_BP_104_77537_20120206_034628_outLine +BABEL_BP_104_77711_20120229_163050_inLine +BABEL_BP_104_77711_20120229_163050_outLine +BABEL_BP_104_77711_20120229_164115_inLine +BABEL_BP_104_77711_20120229_164115_outLine +BABEL_BP_104_78225_20120126_170942_outLine +BABEL_BP_104_78443_20120128_211331_inLine +BABEL_BP_104_78443_20120128_211331_outLine +BABEL_BP_104_79120_20120127_021912_inLine +BABEL_BP_104_79120_20120127_021912_outLine +BABEL_BP_104_79120_20120127_030132_inLine +BABEL_BP_104_79120_20120127_030132_outLine +BABEL_BP_104_79156_20120126_191440_outLine +BABEL_BP_104_79753_20120203_173233_inLine +BABEL_BP_104_79753_20120203_173233_outLine +BABEL_BP_104_80134_20120313_215613_inLine +BABEL_BP_104_80134_20120313_215613_outLine +BABEL_BP_104_80284_20120109_235306_inLine +BABEL_BP_104_80284_20120109_235306_outLine +BABEL_BP_104_80559_20120319_152020_outLine +BABEL_BP_104_80616_20120223_193040_inLine +BABEL_BP_104_80616_20120223_193040_outLine +BABEL_BP_104_80867_20120309_034536_inLine +BABEL_BP_104_80867_20120309_034536_outLine +BABEL_BP_104_80929_20120310_194854_inLine +BABEL_BP_104_80929_20120310_194854_outLine +BABEL_BP_104_81726_20120229_154500_inLine +BABEL_BP_104_81726_20120229_154500_outLine +BABEL_BP_104_81996_20120128_185859_outLine +BABEL_BP_104_82499_20120215_024134_inLine +BABEL_BP_104_82499_20120215_024134_outLine +BABEL_BP_104_82595_20120324_154901_outLine +BABEL_BP_104_82964_20120218_181351_outLine +BABEL_BP_104_83072_20120213_170201_inLine +BABEL_BP_104_83072_20120213_170201_outLine +BABEL_BP_104_83112_20120204_161112_inLine +BABEL_BP_104_83112_20120204_161112_outLine +BABEL_BP_104_83747_20120120_153904_outLine +BABEL_BP_104_83866_20120206_040504_inLine +BABEL_BP_104_83866_20120206_040505_outLine +BABEL_BP_104_84854_20120129_233819_inLine +BABEL_BP_104_84854_20120129_233819_outLine +BABEL_BP_104_84885_20120217_215436_inLine +BABEL_BP_104_84885_20120217_215436_outLine +BABEL_BP_104_84950_20120130_131546_inLine +BABEL_BP_104_84950_20120130_131546_outLine +BABEL_BP_104_85558_20120413_044033_inLine +BABEL_BP_104_86528_20120128_211228_inLine +BABEL_BP_104_86537_20120128_022125_inLine +BABEL_BP_104_86537_20120128_023523_inLine +BABEL_BP_104_87067_20120324_182930_inLine +BABEL_BP_104_87067_20120324_182930_outLine +BABEL_BP_104_87517_20120207_200619_inLine +BABEL_BP_104_87517_20120207_200619_outLine +BABEL_BP_104_88070_20120318_164350_outLine +BABEL_BP_104_88434_20120319_170128_inLine +BABEL_BP_104_88434_20120319_170128_outLine +BABEL_BP_104_88921_20120205_215225_inLine +BABEL_BP_104_88921_20120205_215225_outLine +BABEL_BP_104_89036_20120327_211455_inLine +BABEL_BP_104_89925_20120202_000208_inLine +BABEL_BP_104_89925_20120202_000208_outLine +BABEL_BP_104_89952_20120131_212850_inLine +BABEL_BP_104_89952_20120131_212850_outLine +BABEL_BP_104_90263_20120205_044035_inLine +BABEL_BP_104_90263_20120205_044035_outLine +BABEL_BP_104_90310_20120129_024342_outLine +BABEL_BP_104_91161_20120311_032449_inLine +BABEL_BP_104_91161_20120311_032449_outLine +BABEL_BP_104_92342_20120320_041334_inLine +BABEL_BP_104_92342_20120320_041334_outLine +BABEL_BP_104_92722_20120209_235113_outLine +BABEL_BP_104_92793_20120118_235358_inLine +BABEL_BP_104_93300_20120221_135558_inLine +BABEL_BP_104_93300_20120221_135558_outLine +BABEL_BP_104_93713_20120121_004435_inLine +BABEL_BP_104_93730_20120220_052912_outLine +BABEL_BP_104_93730_20120220_053327_outLine +BABEL_BP_104_93730_20120220_054726_outLine +BABEL_BP_104_93844_20120316_014157_inLine +BABEL_BP_104_93844_20120327_194612_inLine +BABEL_BP_104_94572_20120321_022026_inLine +BABEL_BP_104_94683_20120126_024342_inLine +BABEL_BP_104_94775_20120321_230436_inLine +BABEL_BP_104_94775_20120321_230436_outLine +BABEL_BP_104_94793_20120204_043218_inLine +BABEL_BP_104_94793_20120204_043218_outLine +BABEL_BP_104_95349_20111229_162101_inLine +BABEL_BP_104_95360_20120205_133312_inLine +BABEL_BP_104_95360_20120205_133312_outLine +BABEL_BP_104_95465_20120223_040653_inLine +BABEL_BP_104_95465_20120223_040653_outLine +BABEL_BP_104_95904_20120218_183758_inLine +BABEL_BP_104_95904_20120218_183758_outLine +BABEL_BP_104_96343_20120130_143444_outLine +BABEL_BP_104_96690_20120321_005155_inLine +BABEL_BP_104_96811_20120217_021933_inLine +BABEL_BP_104_96811_20120217_021933_outLine +BABEL_BP_104_96956_20120209_025537_inLine +BABEL_BP_104_96956_20120209_025537_outLine +BABEL_BP_104_97050_20120314_144713_outLine +BABEL_BP_104_97803_20120116_184019_inLine +BABEL_BP_104_97803_20120116_184019_outLine +BABEL_BP_104_97971_20120317_004835_inLine +BABEL_BP_104_97971_20120317_004835_outLine +BABEL_BP_104_98067_20120221_131601_inLine +BABEL_BP_104_98067_20120221_131601_outLine +BABEL_BP_104_98110_20120218_193615_outLine +BABEL_BP_104_98503_20120402_230340_inLine +BABEL_BP_104_98503_20120403_025554_inLine +BABEL_BP_104_98588_20120119_011655_inLine +BABEL_BP_104_98588_20120119_011655_outLine +BABEL_BP_104_98942_20120205_224026_outLine +BABEL_BP_104_99354_20120203_152733_inLine +BABEL_BP_104_99354_20120203_152733_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list b/egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list new file mode 100644 index 00000000000..293419a111d --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list @@ -0,0 +1,131 @@ +BABEL_BP_104_08036_20111220_013826_inLine +BABEL_BP_104_08139_20120126_021604_inLine +BABEL_BP_104_11647_20120315_022645_inLine +BABEL_BP_104_11647_20120315_022645_outLine +BABEL_BP_104_13952_20120126_185217_inLine +BABEL_BP_104_13952_20120126_185217_outLine +BABEL_BP_104_14147_20120320_003436_inLine +BABEL_BP_104_14147_20120320_003436_outLine +BABEL_BP_104_14527_20120207_235446_inLine +BABEL_BP_104_14527_20120207_235446_outLine +BABEL_BP_104_15324_20120126_222036_inLine +BABEL_BP_104_15324_20120126_222036_outLine +BABEL_BP_104_15324_20120127_023323_inLine +BABEL_BP_104_15324_20120127_023323_outLine +BABEL_BP_104_15415_20120219_181352_inLine +BABEL_BP_104_15949_20120229_140434_inLine +BABEL_BP_104_15949_20120229_140434_outLine +BABEL_BP_104_16074_20120129_041107_inLine +BABEL_BP_104_16074_20120129_041107_outLine +BABEL_BP_104_16290_20120220_200234_inLine +BABEL_BP_104_16290_20120220_200234_outLine +BABEL_BP_104_16339_20120131_184255_inLine +BABEL_BP_104_17904_20120320_014817_inLine +BABEL_BP_104_17904_20120320_014817_outLine +BABEL_BP_104_18084_20111230_210850_outLine +BABEL_BP_104_19324_20120310_192849_inLine +BABEL_BP_104_19324_20120310_192849_outLine +BABEL_BP_104_21256_20120217_202248_inLine +BABEL_BP_104_21256_20120217_202248_outLine +BABEL_BP_104_23381_20120216_161115_outLine +BABEL_BP_104_24235_20120209_030431_outLine +BABEL_BP_104_24735_20120316_221529_inLine +BABEL_BP_104_24735_20120316_221529_outLine +BABEL_BP_104_26946_20120130_034221_outLine +BABEL_BP_104_28289_20120310_202856_inLine +BABEL_BP_104_28289_20120310_202856_outLine +BABEL_BP_104_28734_20120126_205422_inLine +BABEL_BP_104_28734_20120126_212950_inLine +BABEL_BP_104_30271_20120205_163755_inLine +BABEL_BP_104_30271_20120205_165111_inLine +BABEL_BP_104_30628_20120219_182744_inLine +BABEL_BP_104_30628_20120219_182744_outLine +BABEL_BP_104_34188_20120219_000455_inLine +BABEL_BP_104_34188_20120219_000455_outLine +BABEL_BP_104_35073_20120208_223917_outLine +BABEL_BP_104_35444_20120310_190608_inLine +BABEL_BP_104_35444_20120310_190608_outLine +BABEL_BP_104_36413_20120310_185758_inLine +BABEL_BP_104_36413_20120310_185758_outLine +BABEL_BP_104_38479_20120213_011154_inLine +BABEL_BP_104_38479_20120213_011154_outLine +BABEL_BP_104_39999_20120326_194721_inLine +BABEL_BP_104_39999_20120326_194721_outLine +BABEL_BP_104_41306_20120223_191213_inLine +BABEL_BP_104_41306_20120223_191213_outLine +BABEL_BP_104_42571_20120229_014427_inLine +BABEL_BP_104_42571_20120229_014427_outLine +BABEL_BP_104_42571_20120229_020000_inLine +BABEL_BP_104_42571_20120229_020000_outLine +BABEL_BP_104_43322_20120126_040725_inLine +BABEL_BP_104_43724_20120219_213737_inLine +BABEL_BP_104_43724_20120219_213737_outLine +BABEL_BP_104_45926_20120127_162212_inLine +BABEL_BP_104_45926_20120127_162212_outLine +BABEL_BP_104_46734_20120219_025954_outLine +BABEL_BP_104_48000_20120323_171146_inLine +BABEL_BP_104_48000_20120323_171146_outLine +BABEL_BP_104_48259_20120217_200412_inLine +BABEL_BP_104_48259_20120217_200412_outLine +BABEL_BP_104_48944_20120218_011825_inLine +BABEL_BP_104_48944_20120218_011825_outLine +BABEL_BP_104_48946_20120320_192250_inLine +BABEL_BP_104_48946_20120320_192250_outLine +BABEL_BP_104_50407_20120318_232348_inLine +BABEL_BP_104_50407_20120318_232348_outLine +BABEL_BP_104_51519_20120220_052247_inLine +BABEL_BP_104_51519_20120220_052247_outLine +BABEL_BP_104_51716_20120221_005215_inLine +BABEL_BP_104_51716_20120221_005215_outLine +BABEL_BP_104_52753_20120209_225916_inLine +BABEL_BP_104_52753_20120209_225916_outLine +BABEL_BP_104_52753_20120213_014050_inLine +BABEL_BP_104_52753_20120213_014050_outLine +BABEL_BP_104_56805_20120320_045112_inLine +BABEL_BP_104_56805_20120320_045112_outLine +BABEL_BP_104_57210_20120321_020212_inLine +BABEL_BP_104_57210_20120321_020212_outLine +BABEL_BP_104_57672_20120204_030206_outLine +BABEL_BP_104_59219_20120131_225115_outLine +BABEL_BP_104_60806_20120213_161652_outLine +BABEL_BP_104_63397_20120217_194928_inLine +BABEL_BP_104_63397_20120217_194928_outLine +BABEL_BP_104_63934_20120318_201706_inLine +BABEL_BP_104_63934_20120318_201706_outLine +BABEL_BP_104_65590_20120109_001414_inLine +BABEL_BP_104_65590_20120109_001414_outLine +BABEL_BP_104_66784_20111225_190506_outLine +BABEL_BP_104_67685_20120217_235729_inLine +BABEL_BP_104_67685_20120217_235729_outLine +BABEL_BP_104_68189_20120128_005011_inLine +BABEL_BP_104_68189_20120128_005011_outLine +BABEL_BP_104_68209_20120219_045221_inLine +BABEL_BP_104_68997_20120126_010839_inLine +BABEL_BP_104_70762_20120213_175054_outLine +BABEL_BP_104_70897_20120315_000410_inLine +BABEL_BP_104_70897_20120315_000410_outLine +BABEL_BP_104_70897_20120315_013535_inLine +BABEL_BP_104_70897_20120315_013535_outLine +BABEL_BP_104_71948_20120210_012347_inLine +BABEL_BP_104_73925_20120123_233630_inLine +BABEL_BP_104_73925_20120123_233630_outLine +BABEL_BP_104_76738_20120210_010510_inLine +BABEL_BP_104_77097_20120214_235954_inLine +BABEL_BP_104_77097_20120214_235954_outLine +BABEL_BP_104_80929_20120310_194854_inLine +BABEL_BP_104_80929_20120310_194854_outLine +BABEL_BP_104_81996_20120128_185859_outLine +BABEL_BP_104_87067_20120324_182930_inLine +BABEL_BP_104_87067_20120324_182930_outLine +BABEL_BP_104_92342_20120320_041334_inLine +BABEL_BP_104_92342_20120320_041334_outLine +BABEL_BP_104_92793_20120118_235358_inLine +BABEL_BP_104_94683_20120126_024342_inLine +BABEL_BP_104_94775_20120321_230436_inLine +BABEL_BP_104_94775_20120321_230436_outLine +BABEL_BP_104_95349_20111229_162101_inLine +BABEL_BP_104_95360_20120205_133312_inLine +BABEL_BP_104_95360_20120205_133312_outLine +BABEL_BP_104_95904_20120218_183758_inLine +BABEL_BP_104_95904_20120218_183758_outLine +BABEL_BP_104_96343_20120130_143444_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/training.list b/egs/babel/s5d/conf/lists/104-pashto/training.list new file mode 100644 index 00000000000..deb9bc55dfe --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/training.list @@ -0,0 +1,1026 @@ +BABEL_BP_104_01820_20120313_212614_inLine +BABEL_BP_104_01820_20120313_212614_outLine +BABEL_BP_104_02362_20120229_213454_inLine +BABEL_BP_104_03770_20120109_014606_inLine +BABEL_BP_104_03770_20120109_014606_outLine +BABEL_BP_104_04074_20120318_203458_outLine +BABEL_BP_104_05545_20120126_034408_inLine +BABEL_BP_104_05545_20120126_034408_outLine +BABEL_BP_104_08036_20111220_013826_inLine +BABEL_BP_104_08139_20120126_021604_inLine +BABEL_BP_104_10193_20120213_031930_inLine +BABEL_BP_104_10193_20120213_031930_outLine +BABEL_BP_104_10289_20120128_035330_inLine +BABEL_BP_104_10289_20120128_035330_outLine +BABEL_BP_104_10642_20120321_210945_outLine +BABEL_BP_104_10668_20120311_014815_inLine +BABEL_BP_104_10668_20120311_014815_outLine +BABEL_BP_104_10911_20111222_025120_inLine +BABEL_BP_104_10911_20111222_025120_outLine +BABEL_BP_104_11145_20120321_154029_inLine +BABEL_BP_104_11145_20120321_154029_outLine +BABEL_BP_104_11146_20120224_000248_inLine +BABEL_BP_104_11146_20120224_000248_outLine +BABEL_BP_104_11153_20120108_191820_inLine +BABEL_BP_104_11153_20120108_191820_outLine +BABEL_BP_104_11202_20120213_235334_inLine +BABEL_BP_104_11202_20120213_235334_outLine +BABEL_BP_104_11388_20120202_224148_inLine +BABEL_BP_104_11388_20120202_224148_outLine +BABEL_BP_104_11442_20120218_234445_inLine +BABEL_BP_104_11442_20120218_234445_outLine +BABEL_BP_104_11626_20120316_193802_inLine +BABEL_BP_104_11626_20120316_193802_outLine +BABEL_BP_104_11647_20120315_022645_inLine +BABEL_BP_104_11647_20120315_022645_outLine +BABEL_BP_104_12171_20120212_154823_inLine +BABEL_BP_104_12171_20120212_154823_outLine +BABEL_BP_104_12474_20120309_193318_inLine +BABEL_BP_104_12474_20120309_193318_outLine +BABEL_BP_104_12494_20120213_180757_inLine +BABEL_BP_104_12494_20120213_180757_outLine +BABEL_BP_104_12562_20120307_152654_inLine +BABEL_BP_104_12682_20120223_031401_inLine +BABEL_BP_104_12843_20120202_221656_inLine +BABEL_BP_104_12843_20120202_221656_outLine +BABEL_BP_104_12946_20120224_013645_inLine +BABEL_BP_104_12946_20120224_013645_outLine +BABEL_BP_104_13064_20120220_040256_inLine +BABEL_BP_104_13064_20120220_040256_outLine +BABEL_BP_104_13157_20120207_204725_inLine +BABEL_BP_104_13157_20120207_204725_outLine +BABEL_BP_104_13189_20120112_020041_inLine +BABEL_BP_104_13189_20120112_020041_outLine +BABEL_BP_104_13354_20120121_164912_inLine +BABEL_BP_104_13354_20120121_164912_outLine +BABEL_BP_104_13456_20120111_024843_outLine +BABEL_BP_104_13546_20120327_004548_outLine +BABEL_BP_104_13580_20120222_195120_inLine +BABEL_BP_104_13580_20120222_195120_outLine +BABEL_BP_104_13615_20120314_233732_inLine +BABEL_BP_104_13615_20120314_233732_outLine +BABEL_BP_104_13694_20120321_001123_outLine +BABEL_BP_104_13771_20120316_004856_inLine +BABEL_BP_104_13771_20120316_004856_outLine +BABEL_BP_104_13798_20120105_221125_inLine +BABEL_BP_104_13798_20120105_221125_outLine +BABEL_BP_104_13952_20120126_185217_inLine +BABEL_BP_104_13952_20120126_185217_outLine +BABEL_BP_104_14147_20120320_003436_inLine +BABEL_BP_104_14147_20120320_003436_outLine +BABEL_BP_104_14225_20120331_015956_inLine +BABEL_BP_104_14225_20120331_020908_inLine +BABEL_BP_104_14527_20120207_235446_inLine +BABEL_BP_104_14527_20120207_235446_outLine +BABEL_BP_104_14927_20111224_041309_inLine +BABEL_BP_104_14927_20111224_041309_outLine +BABEL_BP_104_14984_20120205_195333_inLine +BABEL_BP_104_14984_20120205_195333_outLine +BABEL_BP_104_15176_20120316_181716_outLine +BABEL_BP_104_15324_20120126_222036_inLine +BABEL_BP_104_15324_20120126_222036_outLine +BABEL_BP_104_15324_20120127_023323_inLine +BABEL_BP_104_15324_20120127_023323_outLine +BABEL_BP_104_15377_20120322_045329_inLine +BABEL_BP_104_15415_20120219_181352_inLine +BABEL_BP_104_15552_20120304_160459_inLine +BABEL_BP_104_15600_20111230_233908_inLine +BABEL_BP_104_15600_20111230_233908_outLine +BABEL_BP_104_15600_20111230_234837_inLine +BABEL_BP_104_15600_20111230_234837_outLine +BABEL_BP_104_15630_20120402_033748_inLine +BABEL_BP_104_15749_20120131_005221_inLine +BABEL_BP_104_15749_20120131_005221_outLine +BABEL_BP_104_15803_20120117_155821_inLine +BABEL_BP_104_15803_20120117_155821_outLine +BABEL_BP_104_15861_20120401_024411_inLine +BABEL_BP_104_15949_20120229_140434_inLine +BABEL_BP_104_15949_20120229_140434_outLine +BABEL_BP_104_16074_20120129_041107_inLine +BABEL_BP_104_16074_20120129_041107_outLine +BABEL_BP_104_16290_20120220_200234_inLine +BABEL_BP_104_16290_20120220_200234_outLine +BABEL_BP_104_16339_20120131_184255_inLine +BABEL_BP_104_16350_20120315_043233_outLine +BABEL_BP_104_16416_20120317_205531_inLine +BABEL_BP_104_16416_20120317_205531_outLine +BABEL_BP_104_16416_20120317_211129_inLine +BABEL_BP_104_16416_20120317_211129_outLine +BABEL_BP_104_16633_20120311_053635_inLine +BABEL_BP_104_16633_20120311_053635_outLine +BABEL_BP_104_17081_20120128_030343_inLine +BABEL_BP_104_17081_20120128_030343_outLine +BABEL_BP_104_17180_20120321_215255_inLine +BABEL_BP_104_17180_20120321_215255_outLine +BABEL_BP_104_17216_20120128_015245_inLine +BABEL_BP_104_17216_20120128_020324_inLine +BABEL_BP_104_17319_20111225_210159_inLine +BABEL_BP_104_17319_20111225_210159_outLine +BABEL_BP_104_17410_20120129_211432_inLine +BABEL_BP_104_17410_20120129_211432_outLine +BABEL_BP_104_17429_20120209_024521_inLine +BABEL_BP_104_17429_20120209_024521_outLine +BABEL_BP_104_17450_20120331_021646_inLine +BABEL_BP_104_17612_20120205_043931_outLine +BABEL_BP_104_17783_20120205_045923_inLine +BABEL_BP_104_17783_20120205_045923_outLine +BABEL_BP_104_17904_20120320_014817_inLine +BABEL_BP_104_17904_20120320_014817_outLine +BABEL_BP_104_17930_20120321_161410_outLine +BABEL_BP_104_18084_20111230_210850_outLine +BABEL_BP_104_18537_20120130_181101_inLine +BABEL_BP_104_18537_20120130_181101_outLine +BABEL_BP_104_18616_20120126_040622_inLine +BABEL_BP_104_18616_20120126_040622_outLine +BABEL_BP_104_18667_20120208_175014_inLine +BABEL_BP_104_18667_20120208_175014_outLine +BABEL_BP_104_18861_20120218_221303_inLine +BABEL_BP_104_19044_20120218_182247_outLine +BABEL_BP_104_19044_20120218_183017_outLine +BABEL_BP_104_19044_20120218_183849_outLine +BABEL_BP_104_19137_20120119_001516_inLine +BABEL_BP_104_19207_20111224_044525_inLine +BABEL_BP_104_19207_20111224_044525_outLine +BABEL_BP_104_19324_20120310_192849_inLine +BABEL_BP_104_19324_20120310_192849_outLine +BABEL_BP_104_19346_20120205_182121_outLine +BABEL_BP_104_19481_20120207_235626_inLine +BABEL_BP_104_19481_20120207_235626_outLine +BABEL_BP_104_19494_20120219_214920_inLine +BABEL_BP_104_19494_20120219_214920_outLine +BABEL_BP_104_19759_20111223_184346_outLine +BABEL_BP_104_20016_20120206_215156_inLine +BABEL_BP_104_20016_20120206_215156_outLine +BABEL_BP_104_20114_20120324_213414_inLine +BABEL_BP_104_20171_20120318_012849_inLine +BABEL_BP_104_20171_20120318_012849_outLine +BABEL_BP_104_20171_20120318_014226_inLine +BABEL_BP_104_20171_20120318_014226_outLine +BABEL_BP_104_20219_20120221_223942_inLine +BABEL_BP_104_20219_20120221_223942_outLine +BABEL_BP_104_20386_20120226_160551_inLine +BABEL_BP_104_20386_20120226_160551_outLine +BABEL_BP_104_20557_20120402_215807_inLine +BABEL_BP_104_20738_20120129_182528_inLine +BABEL_BP_104_20738_20120129_182528_outLine +BABEL_BP_104_21041_20120309_143920_inLine +BABEL_BP_104_21041_20120309_143920_outLine +BABEL_BP_104_21061_20120205_192140_inLine +BABEL_BP_104_21061_20120205_192140_outLine +BABEL_BP_104_21224_20120322_040006_inLine +BABEL_BP_104_21224_20120322_040006_outLine +BABEL_BP_104_21256_20120217_202248_inLine +BABEL_BP_104_21256_20120217_202248_outLine +BABEL_BP_104_21489_20120213_163025_inLine +BABEL_BP_104_21489_20120213_163025_outLine +BABEL_BP_104_21928_20120204_212612_inLine +BABEL_BP_104_21928_20120204_212612_outLine +BABEL_BP_104_21968_20120131_180237_inLine +BABEL_BP_104_21968_20120131_180237_outLine +BABEL_BP_104_22548_20120125_211519_inLine +BABEL_BP_104_22590_20120209_224232_inLine +BABEL_BP_104_22590_20120209_224232_outLine +BABEL_BP_104_23322_20120204_173810_inLine +BABEL_BP_104_23322_20120204_173810_outLine +BABEL_BP_104_23370_20120216_013240_inLine +BABEL_BP_104_23370_20120216_013240_outLine +BABEL_BP_104_23381_20120216_161115_outLine +BABEL_BP_104_23705_20120219_011051_inLine +BABEL_BP_104_23705_20120219_011051_outLine +BABEL_BP_104_24235_20120209_030431_outLine +BABEL_BP_104_24585_20120117_225722_inLine +BABEL_BP_104_24585_20120117_225722_outLine +BABEL_BP_104_24735_20120316_221529_inLine +BABEL_BP_104_24735_20120316_221529_outLine +BABEL_BP_104_24750_20120130_183131_inLine +BABEL_BP_104_24750_20120130_183131_outLine +BABEL_BP_104_24810_20120319_165838_outLine +BABEL_BP_104_25015_20120216_005135_inLine +BABEL_BP_104_25015_20120216_005135_outLine +BABEL_BP_104_25525_20120316_140847_outLine +BABEL_BP_104_25911_20111222_051549_inLine +BABEL_BP_104_25911_20111222_051549_outLine +BABEL_BP_104_26475_20120309_222554_inLine +BABEL_BP_104_26654_20120130_035807_inLine +BABEL_BP_104_26654_20120130_035807_outLine +BABEL_BP_104_26801_20120401_022159_inLine +BABEL_BP_104_26946_20120130_034221_outLine +BABEL_BP_104_27184_20120505_152626_outLine +BABEL_BP_104_27267_20120325_002713_outLine +BABEL_BP_104_27298_20111225_192028_inLine +BABEL_BP_104_27298_20111225_192028_outLine +BABEL_BP_104_27374_20120318_190552_outLine +BABEL_BP_104_28066_20120318_173932_inLine +BABEL_BP_104_28066_20120318_173932_outLine +BABEL_BP_104_28281_20120111_045749_inLine +BABEL_BP_104_28281_20120111_045749_outLine +BABEL_BP_104_28289_20120310_202856_inLine +BABEL_BP_104_28289_20120310_202856_outLine +BABEL_BP_104_28330_20120306_194033_inLine +BABEL_BP_104_28330_20120306_195756_inLine +BABEL_BP_104_28734_20120126_205422_inLine +BABEL_BP_104_28734_20120126_212950_inLine +BABEL_BP_104_29009_20120319_164025_outLine +BABEL_BP_104_29103_20120127_183035_inLine +BABEL_BP_104_29103_20120127_183035_outLine +BABEL_BP_104_29757_20120208_191006_inLine +BABEL_BP_104_29757_20120208_191006_outLine +BABEL_BP_104_29809_20120209_044252_inLine +BABEL_BP_104_29809_20120209_044252_outLine +BABEL_BP_104_29967_20120208_201355_inLine +BABEL_BP_104_29967_20120208_201355_outLine +BABEL_BP_104_30031_20120319_035012_inLine +BABEL_BP_104_30031_20120319_035012_outLine +BABEL_BP_104_30040_20120313_211534_inLine +BABEL_BP_104_30040_20120313_211534_outLine +BABEL_BP_104_30040_20120313_212609_inLine +BABEL_BP_104_30040_20120313_212609_outLine +BABEL_BP_104_30143_20111227_132440_inLine +BABEL_BP_104_30197_20120213_160025_inLine +BABEL_BP_104_30197_20120213_160025_outLine +BABEL_BP_104_30271_20120205_163755_inLine +BABEL_BP_104_30271_20120205_165111_inLine +BABEL_BP_104_30620_20120321_151904_inLine +BABEL_BP_104_30620_20120321_151904_outLine +BABEL_BP_104_30628_20120219_182744_inLine +BABEL_BP_104_30628_20120219_182744_outLine +BABEL_BP_104_30711_20120219_175435_outLine +BABEL_BP_104_30847_20120128_045058_inLine +BABEL_BP_104_30847_20120128_050033_inLine +BABEL_BP_104_30848_20120204_154057_inLine +BABEL_BP_104_30848_20120204_154058_outLine +BABEL_BP_104_31046_20120203_161436_inLine +BABEL_BP_104_31095_20120210_132537_inLine +BABEL_BP_104_31215_20120228_003446_inLine +BABEL_BP_104_31215_20120228_003446_outLine +BABEL_BP_104_31215_20120228_032743_inLine +BABEL_BP_104_31215_20120228_032743_outLine +BABEL_BP_104_31663_20120210_140419_inLine +BABEL_BP_104_31663_20120210_140419_outLine +BABEL_BP_104_31919_20120405_023221_inLine +BABEL_BP_104_31926_20120319_040036_outLine +BABEL_BP_104_32703_20120110_212645_outLine +BABEL_BP_104_32703_20120110_212646_inLine +BABEL_BP_104_32956_20120221_133851_inLine +BABEL_BP_104_32956_20120221_133851_outLine +BABEL_BP_104_33223_20120319_050332_inLine +BABEL_BP_104_33272_20120318_233319_outLine +BABEL_BP_104_33337_20120220_005047_inLine +BABEL_BP_104_33337_20120220_005047_outLine +BABEL_BP_104_33510_20120324_185136_outLine +BABEL_BP_104_33518_20120218_204645_inLine +BABEL_BP_104_33518_20120218_204645_outLine +BABEL_BP_104_33846_20120123_194027_inLine +BABEL_BP_104_34037_20120318_232512_inLine +BABEL_BP_104_34037_20120318_235541_inLine +BABEL_BP_104_34164_20120221_141502_inLine +BABEL_BP_104_34164_20120221_141502_outLine +BABEL_BP_104_34188_20120219_000455_inLine +BABEL_BP_104_34188_20120219_000455_outLine +BABEL_BP_104_34335_20111225_224055_outLine +BABEL_BP_104_34540_20120314_153124_inLine +BABEL_BP_104_34540_20120314_153124_outLine +BABEL_BP_104_34708_20120125_173011_inLine +BABEL_BP_104_34708_20120125_173011_outLine +BABEL_BP_104_34714_20120313_142435_inLine +BABEL_BP_104_34714_20120313_142435_outLine +BABEL_BP_104_34833_20120215_025837_inLine +BABEL_BP_104_34833_20120215_025837_outLine +BABEL_BP_104_34994_20120314_001810_outLine +BABEL_BP_104_34994_20120314_003701_outLine +BABEL_BP_104_35073_20120208_223917_outLine +BABEL_BP_104_35152_20111229_025446_inLine +BABEL_BP_104_35152_20111229_025446_outLine +BABEL_BP_104_35241_20120314_052346_inLine +BABEL_BP_104_35241_20120314_052346_outLine +BABEL_BP_104_35444_20120310_190608_inLine +BABEL_BP_104_35444_20120310_190608_outLine +BABEL_BP_104_35544_20120131_174538_inLine +BABEL_BP_104_35544_20120131_174538_outLine +BABEL_BP_104_35646_20120202_222418_inLine +BABEL_BP_104_35646_20120202_222418_outLine +BABEL_BP_104_35874_20120403_213324_inLine +BABEL_BP_104_35916_20120204_030147_inLine +BABEL_BP_104_35916_20120204_030147_outLine +BABEL_BP_104_35923_20120216_021137_inLine +BABEL_BP_104_35923_20120216_021137_outLine +BABEL_BP_104_35983_20120324_152856_outLine +BABEL_BP_104_36017_20120123_222703_outLine +BABEL_BP_104_36138_20120206_210519_inLine +BABEL_BP_104_36138_20120206_210519_outLine +BABEL_BP_104_36413_20120310_185758_inLine +BABEL_BP_104_36413_20120310_185758_outLine +BABEL_BP_104_36487_20120209_211827_inLine +BABEL_BP_104_36487_20120209_211827_outLine +BABEL_BP_104_37131_20120318_210220_inLine +BABEL_BP_104_37131_20120318_210220_outLine +BABEL_BP_104_37135_20120219_044437_inLine +BABEL_BP_104_37135_20120219_044437_outLine +BABEL_BP_104_37357_20120321_212732_inLine +BABEL_BP_104_37357_20120321_212732_outLine +BABEL_BP_104_37551_20120201_032910_inLine +BABEL_BP_104_37551_20120201_032910_outLine +BABEL_BP_104_37593_20120130_203434_inLine +BABEL_BP_104_37593_20120130_203434_outLine +BABEL_BP_104_37687_20120316_175600_outLine +BABEL_BP_104_37731_20120213_034923_inLine +BABEL_BP_104_37731_20120213_034923_outLine +BABEL_BP_104_37733_20120207_040916_inLine +BABEL_BP_104_37733_20120207_040936_outLine +BABEL_BP_104_38163_20120109_022356_inLine +BABEL_BP_104_38163_20120109_022356_outLine +BABEL_BP_104_38223_20120129_195918_inLine +BABEL_BP_104_38223_20120129_195918_outLine +BABEL_BP_104_38255_20120312_231219_inLine +BABEL_BP_104_38255_20120312_231819_inLine +BABEL_BP_104_38255_20120322_142237_inLine +BABEL_BP_104_38479_20120213_011154_inLine +BABEL_BP_104_38479_20120213_011154_outLine +BABEL_BP_104_38563_20120127_181357_outLine +BABEL_BP_104_38685_20120205_205815_inLine +BABEL_BP_104_38685_20120205_205815_outLine +BABEL_BP_104_38902_20120331_152704_inLine +BABEL_BP_104_39119_20120203_183149_inLine +BABEL_BP_104_39119_20120203_183149_outLine +BABEL_BP_104_39178_20120109_195710_inLine +BABEL_BP_104_39320_20120110_190913_inLine +BABEL_BP_104_39320_20120110_190913_outLine +BABEL_BP_104_39390_20120322_042714_outLine +BABEL_BP_104_39525_20120217_200400_inLine +BABEL_BP_104_39525_20120217_200400_outLine +BABEL_BP_104_39999_20120326_194721_inLine +BABEL_BP_104_39999_20120326_194721_outLine +BABEL_BP_104_40136_20120222_030818_inLine +BABEL_BP_104_40136_20120222_030823_outLine +BABEL_BP_104_40585_20120309_200652_outLine +BABEL_BP_104_40607_20120324_163524_inLine +BABEL_BP_104_40612_20120106_024347_inLine +BABEL_BP_104_40612_20120106_024347_outLine +BABEL_BP_104_40640_20120131_044455_outLine +BABEL_BP_104_40701_20120209_031300_inLine +BABEL_BP_104_40701_20120209_031301_outLine +BABEL_BP_104_40866_20120119_030533_inLine +BABEL_BP_104_40866_20120119_030533_outLine +BABEL_BP_104_40889_20120227_180714_inLine +BABEL_BP_104_40889_20120227_180714_outLine +BABEL_BP_104_41306_20120223_191213_inLine +BABEL_BP_104_41306_20120223_191213_outLine +BABEL_BP_104_41498_20120309_223111_outLine +BABEL_BP_104_41531_20120331_010320_inLine +BABEL_BP_104_41531_20120331_010320_outLine +BABEL_BP_104_41610_20111225_214331_inLine +BABEL_BP_104_41610_20111225_214331_outLine +BABEL_BP_104_41871_20120310_200016_inLine +BABEL_BP_104_42006_20120304_162643_inLine +BABEL_BP_104_42006_20120304_162643_outLine +BABEL_BP_104_42145_20120127_042217_inLine +BABEL_BP_104_42145_20120127_042217_outLine +BABEL_BP_104_42571_20120229_014427_inLine +BABEL_BP_104_42571_20120229_014427_outLine +BABEL_BP_104_42571_20120229_020000_inLine +BABEL_BP_104_42571_20120229_020000_outLine +BABEL_BP_104_42836_20120331_181552_outLine +BABEL_BP_104_42929_20120307_150902_inLine +BABEL_BP_104_42929_20120307_150902_outLine +BABEL_BP_104_42952_20120318_233729_inLine +BABEL_BP_104_43036_20120128_024308_inLine +BABEL_BP_104_43036_20120128_025047_inLine +BABEL_BP_104_43036_20120128_025047_outLine +BABEL_BP_104_43036_20120128_030158_inLine +BABEL_BP_104_43036_20120128_030158_outLine +BABEL_BP_104_43051_20120207_002833_inLine +BABEL_BP_104_43267_20120229_211432_inLine +BABEL_BP_104_43267_20120229_211432_outLine +BABEL_BP_104_43322_20120126_040725_inLine +BABEL_BP_104_43462_20120216_210005_inLine +BABEL_BP_104_43462_20120216_210005_outLine +BABEL_BP_104_43480_20120326_155717_inLine +BABEL_BP_104_43501_20120331_220724_outLine +BABEL_BP_104_43501_20120331_222326_outLine +BABEL_BP_104_43571_20120203_040537_inLine +BABEL_BP_104_43571_20120203_040537_outLine +BABEL_BP_104_43684_20120128_182736_outLine +BABEL_BP_104_43714_20120219_132220_inLine +BABEL_BP_104_43714_20120219_132220_outLine +BABEL_BP_104_43724_20120219_213737_inLine +BABEL_BP_104_43724_20120219_213737_outLine +BABEL_BP_104_43725_20120205_002936_inLine +BABEL_BP_104_43725_20120205_002936_outLine +BABEL_BP_104_43833_20120331_193735_outLine +BABEL_BP_104_43974_20120320_213041_inLine +BABEL_BP_104_43974_20120320_215224_inLine +BABEL_BP_104_44103_20120218_005711_inLine +BABEL_BP_104_44103_20120218_005711_outLine +BABEL_BP_104_44141_20120209_171547_inLine +BABEL_BP_104_44141_20120209_171547_outLine +BABEL_BP_104_44267_20120404_010500_inLine +BABEL_BP_104_44278_20120318_152209_inLine +BABEL_BP_104_44278_20120318_152209_outLine +BABEL_BP_104_44468_20120222_125222_inLine +BABEL_BP_104_44468_20120222_125222_outLine +BABEL_BP_104_44515_20120326_144709_inLine +BABEL_BP_104_44515_20120326_150551_inLine +BABEL_BP_104_44799_20120119_040419_inLine +BABEL_BP_104_44799_20120119_040419_outLine +BABEL_BP_104_44976_20120128_211450_inLine +BABEL_BP_104_44976_20120128_211450_outLine +BABEL_BP_104_45064_20120218_205233_inLine +BABEL_BP_104_45064_20120218_205233_outLine +BABEL_BP_104_45188_20120121_023218_outLine +BABEL_BP_104_45356_20120324_234702_outLine +BABEL_BP_104_45403_20111222_014909_outLine +BABEL_BP_104_45562_20120131_200753_inLine +BABEL_BP_104_45823_20120103_201816_inLine +BABEL_BP_104_45842_20120210_164857_inLine +BABEL_BP_104_45842_20120210_164857_outLine +BABEL_BP_104_45926_20120127_162212_inLine +BABEL_BP_104_45926_20120127_162212_outLine +BABEL_BP_104_45947_20120313_214251_inLine +BABEL_BP_104_46004_20120223_160156_inLine +BABEL_BP_104_46004_20120223_160156_outLine +BABEL_BP_104_46168_20120217_200729_inLine +BABEL_BP_104_46168_20120217_200729_outLine +BABEL_BP_104_46319_20120210_225923_outLine +BABEL_BP_104_46361_20120126_004615_outLine +BABEL_BP_104_46455_20120218_222247_inLine +BABEL_BP_104_46455_20120218_222247_outLine +BABEL_BP_104_46734_20120219_025954_outLine +BABEL_BP_104_46827_20120210_134310_inLine +BABEL_BP_104_46827_20120210_134310_outLine +BABEL_BP_104_46900_20120110_181315_inLine +BABEL_BP_104_46900_20120110_181315_outLine +BABEL_BP_104_46979_20120223_173811_inLine +BABEL_BP_104_46979_20120223_173811_outLine +BABEL_BP_104_47015_20120222_053105_inLine +BABEL_BP_104_47015_20120222_053105_outLine +BABEL_BP_104_47177_20120127_223720_outLine +BABEL_BP_104_47424_20120120_233633_inLine +BABEL_BP_104_47424_20120120_233633_outLine +BABEL_BP_104_47836_20120331_183954_outLine +BABEL_BP_104_47916_20120401_014343_inLine +BABEL_BP_104_47917_20120319_003035_inLine +BABEL_BP_104_47917_20120319_003035_outLine +BABEL_BP_104_48000_20120323_171146_inLine +BABEL_BP_104_48000_20120323_171146_outLine +BABEL_BP_104_48001_20120204_231603_inLine +BABEL_BP_104_48001_20120204_231603_outLine +BABEL_BP_104_48259_20120217_200412_inLine +BABEL_BP_104_48259_20120217_200412_outLine +BABEL_BP_104_48365_20120212_043935_inLine +BABEL_BP_104_48365_20120212_043935_outLine +BABEL_BP_104_48416_20120218_203541_inLine +BABEL_BP_104_48416_20120218_203542_outLine +BABEL_BP_104_48834_20111221_032658_inLine +BABEL_BP_104_48834_20111221_032658_outLine +BABEL_BP_104_48944_20120218_011825_inLine +BABEL_BP_104_48944_20120218_011825_outLine +BABEL_BP_104_48946_20120320_192250_inLine +BABEL_BP_104_48946_20120320_192250_outLine +BABEL_BP_104_49141_20120330_015342_inLine +BABEL_BP_104_49629_20120312_155816_outLine +BABEL_BP_104_50030_20120404_005406_inLine +BABEL_BP_104_50407_20120318_232348_inLine +BABEL_BP_104_50407_20120318_232348_outLine +BABEL_BP_104_50523_20120314_033747_inLine +BABEL_BP_104_50523_20120314_033747_outLine +BABEL_BP_104_50523_20120314_231004_inLine +BABEL_BP_104_50523_20120314_231004_outLine +BABEL_BP_104_50583_20120404_000547_inLine +BABEL_BP_104_50682_20120116_205741_inLine +BABEL_BP_104_50682_20120116_205741_outLine +BABEL_BP_104_50820_20120213_140300_inLine +BABEL_BP_104_50820_20120213_140300_outLine +BABEL_BP_104_50940_20120309_160847_inLine +BABEL_BP_104_50940_20120322_132036_inLine +BABEL_BP_104_51024_20120131_172745_inLine +BABEL_BP_104_51047_20120319_042347_outLine +BABEL_BP_104_51079_20120316_150756_outLine +BABEL_BP_104_51329_20120222_203129_inLine +BABEL_BP_104_51329_20120222_203129_outLine +BABEL_BP_104_51329_20120222_205332_inLine +BABEL_BP_104_51329_20120222_205332_outLine +BABEL_BP_104_51388_20120221_175113_inLine +BABEL_BP_104_51519_20120220_052247_inLine +BABEL_BP_104_51519_20120220_052247_outLine +BABEL_BP_104_51570_20120118_225333_inLine +BABEL_BP_104_51570_20120118_225333_outLine +BABEL_BP_104_51716_20120221_005215_inLine +BABEL_BP_104_51716_20120221_005215_outLine +BABEL_BP_104_52067_20120313_210602_inLine +BABEL_BP_104_52067_20120313_210602_outLine +BABEL_BP_104_52116_20120316_225019_inLine +BABEL_BP_104_52116_20120316_225020_outLine +BABEL_BP_104_52300_20120203_210256_inLine +BABEL_BP_104_52300_20120203_210256_outLine +BABEL_BP_104_52359_20120328_212912_inLine +BABEL_BP_104_52753_20120209_225916_inLine +BABEL_BP_104_52753_20120209_225916_outLine +BABEL_BP_104_52753_20120213_014050_inLine +BABEL_BP_104_52753_20120213_014050_outLine +BABEL_BP_104_52954_20120313_170902_inLine +BABEL_BP_104_52954_20120313_170902_outLine +BABEL_BP_104_53159_20120402_035901_inLine +BABEL_BP_104_53159_20120402_035901_outLine +BABEL_BP_104_53262_20120311_192937_inLine +BABEL_BP_104_53334_20120309_184805_inLine +BABEL_BP_104_53334_20120309_184805_outLine +BABEL_BP_104_53346_20120205_222257_inLine +BABEL_BP_104_53659_20120218_205643_inLine +BABEL_BP_104_53659_20120218_205643_outLine +BABEL_BP_104_53718_20120202_220720_outLine +BABEL_BP_104_53820_20120327_182222_inLine +BABEL_BP_104_53820_20120327_182222_outLine +BABEL_BP_104_54263_20120114_032041_inLine +BABEL_BP_104_54417_20120119_045736_inLine +BABEL_BP_104_54417_20120119_045736_outLine +BABEL_BP_104_54780_20120403_231516_inLine +BABEL_BP_104_54780_20120403_232436_inLine +BABEL_BP_104_54909_20120130_194003_inLine +BABEL_BP_104_54909_20120130_194003_outLine +BABEL_BP_104_54975_20120111_002324_inLine +BABEL_BP_104_54975_20120111_002324_outLine +BABEL_BP_104_55131_20111225_220753_outLine +BABEL_BP_104_55213_20120331_185824_outLine +BABEL_BP_104_55316_20111221_024834_inLine +BABEL_BP_104_55382_20120318_154619_inLine +BABEL_BP_104_55544_20120108_200418_inLine +BABEL_BP_104_55544_20120108_200418_outLine +BABEL_BP_104_55668_20120212_011829_inLine +BABEL_BP_104_55668_20120212_011829_outLine +BABEL_BP_104_55855_20111220_211829_outLine +BABEL_BP_104_56119_20120216_183711_inLine +BABEL_BP_104_56119_20120216_183711_outLine +BABEL_BP_104_56201_20120126_180227_outLine +BABEL_BP_104_56308_20120402_024809_outLine +BABEL_BP_104_56704_20120120_155806_inLine +BABEL_BP_104_56704_20120120_155806_outLine +BABEL_BP_104_56753_20120322_204356_outLine +BABEL_BP_104_56805_20120320_045112_inLine +BABEL_BP_104_56805_20120320_045112_outLine +BABEL_BP_104_57005_20120321_034143_inLine +BABEL_BP_104_57082_20120110_024829_inLine +BABEL_BP_104_57116_20120110_180036_inLine +BABEL_BP_104_57167_20111230_213737_outLine +BABEL_BP_104_57210_20120321_020212_inLine +BABEL_BP_104_57210_20120321_020212_outLine +BABEL_BP_104_57263_20120302_211404_inLine +BABEL_BP_104_57320_20120204_230109_inLine +BABEL_BP_104_57320_20120204_230109_outLine +BABEL_BP_104_57492_20120316_185552_inLine +BABEL_BP_104_57492_20120316_185552_outLine +BABEL_BP_104_57531_20120203_165801_inLine +BABEL_BP_104_57531_20120203_165801_outLine +BABEL_BP_104_57618_20120203_144717_inLine +BABEL_BP_104_57618_20120203_144717_outLine +BABEL_BP_104_57672_20120204_030206_outLine +BABEL_BP_104_58041_20120129_165617_inLine +BABEL_BP_104_58041_20120129_165617_outLine +BABEL_BP_104_58089_20120111_210636_inLine +BABEL_BP_104_58089_20120111_210636_outLine +BABEL_BP_104_58094_20120211_202938_outLine +BABEL_BP_104_58149_20120218_161613_outLine +BABEL_BP_104_58188_20120124_150608_inLine +BABEL_BP_104_58188_20120124_150608_outLine +BABEL_BP_104_58298_20120208_214852_inLine +BABEL_BP_104_58298_20120208_214852_outLine +BABEL_BP_104_58807_20120327_175726_outLine +BABEL_BP_104_58939_20120212_184855_inLine +BABEL_BP_104_58939_20120212_184855_outLine +BABEL_BP_104_58963_20120331_015840_inLine +BABEL_BP_104_58963_20120331_015840_outLine +BABEL_BP_104_59158_20120212_005248_inLine +BABEL_BP_104_59158_20120212_005248_outLine +BABEL_BP_104_59183_20120312_190106_inLine +BABEL_BP_104_59219_20120131_225115_outLine +BABEL_BP_104_59399_20120318_144751_inLine +BABEL_BP_104_59399_20120318_144752_outLine +BABEL_BP_104_59482_20120309_190927_inLine +BABEL_BP_104_59482_20120309_190927_outLine +BABEL_BP_104_59681_20120123_213306_inLine +BABEL_BP_104_59681_20120123_213306_outLine +BABEL_BP_104_59835_20120212_162802_inLine +BABEL_BP_104_59835_20120212_162802_outLine +BABEL_BP_104_60462_20120201_181707_inLine +BABEL_BP_104_60462_20120201_181707_outLine +BABEL_BP_104_60737_20120208_204130_inLine +BABEL_BP_104_60737_20120208_204130_outLine +BABEL_BP_104_60806_20120213_161652_outLine +BABEL_BP_104_60921_20120220_050615_inLine +BABEL_BP_104_60921_20120220_050615_outLine +BABEL_BP_104_61029_20120201_224200_outLine +BABEL_BP_104_61166_20120220_033838_inLine +BABEL_BP_104_61166_20120220_034717_inLine +BABEL_BP_104_61327_20120326_140350_inLine +BABEL_BP_104_61327_20120326_140350_outLine +BABEL_BP_104_61523_20120212_035522_inLine +BABEL_BP_104_61606_20120131_174533_inLine +BABEL_BP_104_61655_20120208_203143_inLine +BABEL_BP_104_61655_20120208_203143_outLine +BABEL_BP_104_61733_20120205_220251_outLine +BABEL_BP_104_61735_20120314_012744_inLine +BABEL_BP_104_61909_20120320_190739_inLine +BABEL_BP_104_61909_20120320_190739_outLine +BABEL_BP_104_62182_20111226_205547_inLine +BABEL_BP_104_62182_20111226_205547_outLine +BABEL_BP_104_62388_20120204_031740_inLine +BABEL_BP_104_62388_20120204_031740_outLine +BABEL_BP_104_62815_20120318_025812_outLine +BABEL_BP_104_62816_20120312_153937_outLine +BABEL_BP_104_62978_20120318_211036_inLine +BABEL_BP_104_62978_20120318_211036_outLine +BABEL_BP_104_63111_20120204_232445_outLine +BABEL_BP_104_63215_20120213_040737_inLine +BABEL_BP_104_63215_20120213_040737_outLine +BABEL_BP_104_63220_20120131_155658_inLine +BABEL_BP_104_63220_20120131_155658_outLine +BABEL_BP_104_63390_20120123_212718_outLine +BABEL_BP_104_63397_20120217_194928_inLine +BABEL_BP_104_63397_20120217_194928_outLine +BABEL_BP_104_63603_20120128_213000_outLine +BABEL_BP_104_63784_20120216_015608_inLine +BABEL_BP_104_63784_20120216_015608_outLine +BABEL_BP_104_63929_20120319_155419_inLine +BABEL_BP_104_63929_20120319_155419_outLine +BABEL_BP_104_63934_20120318_201706_inLine +BABEL_BP_104_63934_20120318_201706_outLine +BABEL_BP_104_64055_20120111_034236_outLine +BABEL_BP_104_64297_20120205_031234_inLine +BABEL_BP_104_64297_20120205_031234_outLine +BABEL_BP_104_64646_20120319_163845_outLine +BABEL_BP_104_64695_20120128_014256_inLine +BABEL_BP_104_64695_20120128_014256_outLine +BABEL_BP_104_64820_20120111_032311_inLine +BABEL_BP_104_64820_20120111_032311_outLine +BABEL_BP_104_64905_20120206_221140_inLine +BABEL_BP_104_64905_20120206_221140_outLine +BABEL_BP_104_64990_20120119_173958_inLine +BABEL_BP_104_64990_20120119_173958_outLine +BABEL_BP_104_65211_20120119_015405_inLine +BABEL_BP_104_65211_20120119_015405_outLine +BABEL_BP_104_65341_20120220_222356_inLine +BABEL_BP_104_65341_20120220_222356_outLine +BABEL_BP_104_65357_20120309_190849_inLine +BABEL_BP_104_65357_20120309_190849_outLine +BABEL_BP_104_65590_20120109_001414_inLine +BABEL_BP_104_65590_20120109_001414_outLine +BABEL_BP_104_65741_20120218_010022_inLine +BABEL_BP_104_65788_20120131_172922_outLine +BABEL_BP_104_65954_20120128_163139_inLine +BABEL_BP_104_65954_20120128_163139_outLine +BABEL_BP_104_65974_20120316_195524_inLine +BABEL_BP_104_65974_20120316_195524_outLine +BABEL_BP_104_66351_20120317_181035_inLine +BABEL_BP_104_66351_20120317_181035_outLine +BABEL_BP_104_66643_20120316_004947_inLine +BABEL_BP_104_66643_20120316_004947_outLine +BABEL_BP_104_66784_20111225_190506_outLine +BABEL_BP_104_66879_20120213_004555_inLine +BABEL_BP_104_66879_20120213_004555_outLine +BABEL_BP_104_67106_20120208_201829_inLine +BABEL_BP_104_67106_20120208_201829_outLine +BABEL_BP_104_67374_20120210_034059_inLine +BABEL_BP_104_67374_20120210_034100_outLine +BABEL_BP_104_67423_20120205_220658_outLine +BABEL_BP_104_67534_20120204_181436_inLine +BABEL_BP_104_67534_20120204_181436_outLine +BABEL_BP_104_67655_20120218_035728_outLine +BABEL_BP_104_67684_20120316_135144_inLine +BABEL_BP_104_67684_20120316_135144_outLine +BABEL_BP_104_67685_20120217_235729_inLine +BABEL_BP_104_67685_20120217_235729_outLine +BABEL_BP_104_67718_20120131_164436_inLine +BABEL_BP_104_67718_20120131_164436_outLine +BABEL_BP_104_67928_20120109_174230_inLine +BABEL_BP_104_67928_20120109_174230_outLine +BABEL_BP_104_68077_20120219_155535_outLine +BABEL_BP_104_68111_20120321_185146_outLine +BABEL_BP_104_68144_20120210_223106_outLine +BABEL_BP_104_68189_20120128_005011_inLine +BABEL_BP_104_68189_20120128_005011_outLine +BABEL_BP_104_68209_20120219_045221_inLine +BABEL_BP_104_68926_20120229_145934_inLine +BABEL_BP_104_68926_20120229_145934_outLine +BABEL_BP_104_68997_20120126_010839_inLine +BABEL_BP_104_69127_20120402_221743_outLine +BABEL_BP_104_69398_20111219_215754_inLine +BABEL_BP_104_69638_20120205_022624_inLine +BABEL_BP_104_69638_20120205_022624_outLine +BABEL_BP_104_69656_20120129_050158_inLine +BABEL_BP_104_69656_20120129_050158_outLine +BABEL_BP_104_69656_20120129_051238_inLine +BABEL_BP_104_69656_20120129_051238_outLine +BABEL_BP_104_69771_20120220_034015_inLine +BABEL_BP_104_69771_20120220_034015_outLine +BABEL_BP_104_70207_20120209_001133_inLine +BABEL_BP_104_70207_20120209_001133_outLine +BABEL_BP_104_70333_20120210_033437_outLine +BABEL_BP_104_70528_20120128_013553_inLine +BABEL_BP_104_70528_20120128_013553_outLine +BABEL_BP_104_70762_20120213_175054_outLine +BABEL_BP_104_70858_20120204_012205_inLine +BABEL_BP_104_70897_20120315_000410_inLine +BABEL_BP_104_70897_20120315_000410_outLine +BABEL_BP_104_70897_20120315_013535_inLine +BABEL_BP_104_70897_20120315_013535_outLine +BABEL_BP_104_71204_20120315_040441_inLine +BABEL_BP_104_71324_20111220_215105_outLine +BABEL_BP_104_71786_20120219_212052_outLine +BABEL_BP_104_71948_20120210_012347_inLine +BABEL_BP_104_71970_20120310_195048_inLine +BABEL_BP_104_72179_20120129_175206_inLine +BABEL_BP_104_72179_20120129_175206_outLine +BABEL_BP_104_72480_20120211_223904_inLine +BABEL_BP_104_72480_20120211_224426_inLine +BABEL_BP_104_72693_20120209_005646_inLine +BABEL_BP_104_72693_20120209_005646_outLine +BABEL_BP_104_72709_20120209_034548_inLine +BABEL_BP_104_72709_20120209_034548_outLine +BABEL_BP_104_72874_20120213_191257_inLine +BABEL_BP_104_72874_20120213_191257_outLine +BABEL_BP_104_72910_20120310_185203_outLine +BABEL_BP_104_73188_20120128_003921_inLine +BABEL_BP_104_73188_20120128_003921_outLine +BABEL_BP_104_73199_20120119_195108_outLine +BABEL_BP_104_73403_20120320_183508_outLine +BABEL_BP_104_73403_20120320_184757_outLine +BABEL_BP_104_73450_20120206_024342_inLine +BABEL_BP_104_73450_20120206_024342_outLine +BABEL_BP_104_73607_20120203_163328_inLine +BABEL_BP_104_73607_20120203_163328_outLine +BABEL_BP_104_73925_20120123_233630_inLine +BABEL_BP_104_73925_20120123_233630_outLine +BABEL_BP_104_74261_20120331_191708_outLine +BABEL_BP_104_74334_20111230_035012_inLine +BABEL_BP_104_74940_20120228_225523_inLine +BABEL_BP_104_74940_20120228_225523_outLine +BABEL_BP_104_75390_20120218_133736_inLine +BABEL_BP_104_75390_20120218_133736_outLine +BABEL_BP_104_75402_20120319_160944_inLine +BABEL_BP_104_75724_20120207_172820_outLine +BABEL_BP_104_75822_20120205_214035_inLine +BABEL_BP_104_75895_20120206_024214_inLine +BABEL_BP_104_75895_20120206_024214_outLine +BABEL_BP_104_76375_20120226_014726_inLine +BABEL_BP_104_76375_20120226_014726_outLine +BABEL_BP_104_76573_20120213_150121_inLine +BABEL_BP_104_76573_20120213_150121_outLine +BABEL_BP_104_76714_20120313_220017_inLine +BABEL_BP_104_76714_20120313_220017_outLine +BABEL_BP_104_76738_20120210_010510_inLine +BABEL_BP_104_76742_20111215_203118_outLine +BABEL_BP_104_76832_20120210_030141_outLine +BABEL_BP_104_77097_20120214_235954_inLine +BABEL_BP_104_77097_20120214_235954_outLine +BABEL_BP_104_77256_20120309_064948_inLine +BABEL_BP_104_77294_20120318_224422_inLine +BABEL_BP_104_77294_20120318_224422_outLine +BABEL_BP_104_77537_20120206_034628_outLine +BABEL_BP_104_77693_20111228_014255_outLine +BABEL_BP_104_77711_20120229_163050_inLine +BABEL_BP_104_77711_20120229_163050_outLine +BABEL_BP_104_77711_20120229_164115_inLine +BABEL_BP_104_77711_20120229_164115_outLine +BABEL_BP_104_78225_20120126_170942_outLine +BABEL_BP_104_78254_20120209_222912_inLine +BABEL_BP_104_78254_20120209_222912_outLine +BABEL_BP_104_78254_20120209_234516_inLine +BABEL_BP_104_78254_20120209_234516_outLine +BABEL_BP_104_78367_20120105_012610_inLine +BABEL_BP_104_78367_20120105_012610_outLine +BABEL_BP_104_78443_20120128_211331_inLine +BABEL_BP_104_78443_20120128_211331_outLine +BABEL_BP_104_78452_20120316_005121_inLine +BABEL_BP_104_78452_20120316_005121_outLine +BABEL_BP_104_78452_20120316_005946_inLine +BABEL_BP_104_78452_20120316_005946_outLine +BABEL_BP_104_78462_20120112_181459_inLine +BABEL_BP_104_78737_20120316_173217_inLine +BABEL_BP_104_78737_20120316_173217_outLine +BABEL_BP_104_78978_20120322_041159_inLine +BABEL_BP_104_78978_20120322_042345_inLine +BABEL_BP_104_79030_20120222_170416_inLine +BABEL_BP_104_79030_20120222_170416_outLine +BABEL_BP_104_79030_20120222_211653_inLine +BABEL_BP_104_79030_20120222_211653_outLine +BABEL_BP_104_79120_20120127_021912_inLine +BABEL_BP_104_79120_20120127_021912_outLine +BABEL_BP_104_79120_20120127_030132_inLine +BABEL_BP_104_79120_20120127_030132_outLine +BABEL_BP_104_79127_20120127_171446_outLine +BABEL_BP_104_79156_20120126_191440_outLine +BABEL_BP_104_79185_20120126_025253_inLine +BABEL_BP_104_79185_20120126_025253_outLine +BABEL_BP_104_79191_20120125_210322_inLine +BABEL_BP_104_79191_20120125_210322_outLine +BABEL_BP_104_79244_20111230_180239_inLine +BABEL_BP_104_79378_20120302_011529_outLine +BABEL_BP_104_79387_20120104_201110_inLine +BABEL_BP_104_79387_20120104_201110_outLine +BABEL_BP_104_79679_20120215_053807_inLine +BABEL_BP_104_79679_20120215_053807_outLine +BABEL_BP_104_79753_20120203_173233_inLine +BABEL_BP_104_79753_20120203_173233_outLine +BABEL_BP_104_79888_20120318_024215_outLine +BABEL_BP_104_80105_20120205_233041_inLine +BABEL_BP_104_80105_20120205_233041_outLine +BABEL_BP_104_80134_20120313_215613_inLine +BABEL_BP_104_80134_20120313_215613_outLine +BABEL_BP_104_80226_20120210_182546_inLine +BABEL_BP_104_80226_20120210_182546_outLine +BABEL_BP_104_80284_20120109_235306_inLine +BABEL_BP_104_80284_20120109_235306_outLine +BABEL_BP_104_80424_20120207_221904_inLine +BABEL_BP_104_80424_20120207_221904_outLine +BABEL_BP_104_80559_20120319_152020_outLine +BABEL_BP_104_80616_20120223_193040_inLine +BABEL_BP_104_80616_20120223_193040_outLine +BABEL_BP_104_80679_20120331_033903_outLine +BABEL_BP_104_80815_20120322_001246_outLine +BABEL_BP_104_80867_20120309_034536_inLine +BABEL_BP_104_80867_20120309_034536_outLine +BABEL_BP_104_80929_20120310_194854_inLine +BABEL_BP_104_80929_20120310_194854_outLine +BABEL_BP_104_81726_20120229_154500_inLine +BABEL_BP_104_81726_20120229_154500_outLine +BABEL_BP_104_81773_20120404_000845_outLine +BABEL_BP_104_81923_20120128_004752_inLine +BABEL_BP_104_81923_20120128_004752_outLine +BABEL_BP_104_81996_20120128_185859_outLine +BABEL_BP_104_82068_20120320_233307_inLine +BABEL_BP_104_82068_20120320_234626_inLine +BABEL_BP_104_82149_20120112_163113_inLine +BABEL_BP_104_82499_20120215_024134_inLine +BABEL_BP_104_82499_20120215_024134_outLine +BABEL_BP_104_82526_20120201_124800_inLine +BABEL_BP_104_82526_20120201_124800_outLine +BABEL_BP_104_82583_20120211_041829_outLine +BABEL_BP_104_82595_20120324_154901_outLine +BABEL_BP_104_82677_20120206_173830_outLine +BABEL_BP_104_82838_20120313_152742_inLine +BABEL_BP_104_82838_20120313_152742_outLine +BABEL_BP_104_82838_20120313_154639_inLine +BABEL_BP_104_82838_20120313_154639_outLine +BABEL_BP_104_82849_20120212_185110_inLine +BABEL_BP_104_82849_20120212_185110_outLine +BABEL_BP_104_82964_20120218_181351_outLine +BABEL_BP_104_83050_20120114_231129_inLine +BABEL_BP_104_83050_20120114_231129_outLine +BABEL_BP_104_83072_20120213_170201_inLine +BABEL_BP_104_83072_20120213_170201_outLine +BABEL_BP_104_83112_20120204_161112_inLine +BABEL_BP_104_83112_20120204_161112_outLine +BABEL_BP_104_83747_20120120_153904_outLine +BABEL_BP_104_83835_20120321_145755_inLine +BABEL_BP_104_83835_20120321_145755_outLine +BABEL_BP_104_83866_20120206_040504_inLine +BABEL_BP_104_83866_20120206_040505_outLine +BABEL_BP_104_83941_20120119_030904_inLine +BABEL_BP_104_83941_20120119_030904_outLine +BABEL_BP_104_84132_20120312_054349_outLine +BABEL_BP_104_84315_20120318_184410_outLine +BABEL_BP_104_84360_20111228_033339_inLine +BABEL_BP_104_84360_20111228_033339_outLine +BABEL_BP_104_84854_20120129_233819_inLine +BABEL_BP_104_84854_20120129_233819_outLine +BABEL_BP_104_84885_20120217_215436_inLine +BABEL_BP_104_84885_20120217_215436_outLine +BABEL_BP_104_84950_20120130_131546_inLine +BABEL_BP_104_84950_20120130_131546_outLine +BABEL_BP_104_84985_20120319_172452_outLine +BABEL_BP_104_84985_20120319_173047_outLine +BABEL_BP_104_85147_20120129_180533_inLine +BABEL_BP_104_85147_20120129_180533_outLine +BABEL_BP_104_85272_20120127_032845_inLine +BABEL_BP_104_85272_20120127_032845_outLine +BABEL_BP_104_85388_20120128_190259_inLine +BABEL_BP_104_85388_20120128_190259_outLine +BABEL_BP_104_85558_20120413_044033_inLine +BABEL_BP_104_85579_20120205_170917_inLine +BABEL_BP_104_85579_20120205_170917_outLine +BABEL_BP_104_85597_20120320_231227_inLine +BABEL_BP_104_86528_20120128_211228_inLine +BABEL_BP_104_86537_20120128_022125_inLine +BABEL_BP_104_86537_20120128_023523_inLine +BABEL_BP_104_87032_20120111_203623_inLine +BABEL_BP_104_87032_20120111_203623_outLine +BABEL_BP_104_87067_20120324_182930_inLine +BABEL_BP_104_87067_20120324_182930_outLine +BABEL_BP_104_87422_20120212_021635_outLine +BABEL_BP_104_87453_20120131_210831_inLine +BABEL_BP_104_87453_20120131_210831_outLine +BABEL_BP_104_87517_20120207_200619_inLine +BABEL_BP_104_87517_20120207_200619_outLine +BABEL_BP_104_87970_20120221_172638_inLine +BABEL_BP_104_87970_20120221_172638_outLine +BABEL_BP_104_88006_20120207_214550_inLine +BABEL_BP_104_88006_20120207_214550_outLine +BABEL_BP_104_88070_20120318_164350_outLine +BABEL_BP_104_88434_20120319_170128_inLine +BABEL_BP_104_88434_20120319_170128_outLine +BABEL_BP_104_88604_20120111_001257_inLine +BABEL_BP_104_88604_20120111_001257_outLine +BABEL_BP_104_88921_20120205_215225_inLine +BABEL_BP_104_88921_20120205_215225_outLine +BABEL_BP_104_89036_20120327_211455_inLine +BABEL_BP_104_89053_20120129_232038_inLine +BABEL_BP_104_89053_20120129_232038_outLine +BABEL_BP_104_89402_20120205_045136_outLine +BABEL_BP_104_89925_20120202_000208_inLine +BABEL_BP_104_89925_20120202_000208_outLine +BABEL_BP_104_89952_20120131_212850_inLine +BABEL_BP_104_89952_20120131_212850_outLine +BABEL_BP_104_90022_20120207_051223_inLine +BABEL_BP_104_90022_20120207_051223_outLine +BABEL_BP_104_90263_20120205_044035_inLine +BABEL_BP_104_90263_20120205_044035_outLine +BABEL_BP_104_90310_20120129_024342_outLine +BABEL_BP_104_91161_20120311_032449_inLine +BABEL_BP_104_91161_20120311_032449_outLine +BABEL_BP_104_91495_20120210_163107_inLine +BABEL_BP_104_91495_20120210_163107_outLine +BABEL_BP_104_91875_20120210_004013_inLine +BABEL_BP_104_91875_20120210_004013_outLine +BABEL_BP_104_91880_20120226_221957_inLine +BABEL_BP_104_91880_20120226_221957_outLine +BABEL_BP_104_92000_20120206_011350_inLine +BABEL_BP_104_92000_20120206_011350_outLine +BABEL_BP_104_92310_20120206_033517_inLine +BABEL_BP_104_92310_20120206_033517_outLine +BABEL_BP_104_92342_20120320_041334_inLine +BABEL_BP_104_92342_20120320_041334_outLine +BABEL_BP_104_92636_20120128_193247_inLine +BABEL_BP_104_92636_20120128_193247_outLine +BABEL_BP_104_92679_20111226_171331_outLine +BABEL_BP_104_92722_20120209_235113_outLine +BABEL_BP_104_92793_20120118_235358_inLine +BABEL_BP_104_93129_20120218_130813_inLine +BABEL_BP_104_93129_20120218_130813_outLine +BABEL_BP_104_93227_20120216_190245_inLine +BABEL_BP_104_93227_20120216_190245_outLine +BABEL_BP_104_93300_20120221_135558_inLine +BABEL_BP_104_93300_20120221_135558_outLine +BABEL_BP_104_93358_20120321_002737_inLine +BABEL_BP_104_93358_20120321_003427_inLine +BABEL_BP_104_93713_20120121_004435_inLine +BABEL_BP_104_93730_20120220_052912_outLine +BABEL_BP_104_93730_20120220_054726_outLine +BABEL_BP_104_93844_20120316_014157_inLine +BABEL_BP_104_93844_20120327_194612_inLine +BABEL_BP_104_93976_20120206_181449_outLine +BABEL_BP_104_94051_20120309_174814_outLine +BABEL_BP_104_94533_20120128_020431_inLine +BABEL_BP_104_94533_20120128_020431_outLine +BABEL_BP_104_94572_20120321_022026_inLine +BABEL_BP_104_94683_20120126_024342_inLine +BABEL_BP_104_94775_20120321_230436_inLine +BABEL_BP_104_94775_20120321_230436_outLine +BABEL_BP_104_94793_20120204_043218_inLine +BABEL_BP_104_94793_20120204_043218_outLine +BABEL_BP_104_94951_20120110_231948_inLine +BABEL_BP_104_94951_20120110_231948_outLine +BABEL_BP_104_95202_20120309_185925_inLine +BABEL_BP_104_95202_20120309_185925_outLine +BABEL_BP_104_95349_20111229_162101_inLine +BABEL_BP_104_95360_20120205_133312_inLine +BABEL_BP_104_95360_20120205_133312_outLine +BABEL_BP_104_95465_20120223_040653_inLine +BABEL_BP_104_95465_20120223_040653_outLine +BABEL_BP_104_95904_20120218_183758_inLine +BABEL_BP_104_95904_20120218_183758_outLine +BABEL_BP_104_96343_20120130_143444_outLine +BABEL_BP_104_96621_20120127_235745_inLine +BABEL_BP_104_96621_20120127_235745_outLine +BABEL_BP_104_96690_20120321_005155_inLine +BABEL_BP_104_96811_20120217_021933_inLine +BABEL_BP_104_96811_20120217_021933_outLine +BABEL_BP_104_96956_20120209_025537_inLine +BABEL_BP_104_96956_20120209_025537_outLine +BABEL_BP_104_97050_20120314_144713_outLine +BABEL_BP_104_97258_20120129_060817_inLine +BABEL_BP_104_97258_20120129_060817_outLine +BABEL_BP_104_97335_20120131_013929_inLine +BABEL_BP_104_97335_20120131_013929_outLine +BABEL_BP_104_97492_20120117_173450_inLine +BABEL_BP_104_97492_20120117_173450_outLine +BABEL_BP_104_97803_20120116_184019_inLine +BABEL_BP_104_97803_20120116_184019_outLine +BABEL_BP_104_97971_20120317_004835_inLine +BABEL_BP_104_97971_20120317_004835_outLine +BABEL_BP_104_98067_20120221_131601_inLine +BABEL_BP_104_98067_20120221_131601_outLine +BABEL_BP_104_98110_20120218_193615_outLine +BABEL_BP_104_98331_20120223_014233_inLine +BABEL_BP_104_98446_20120312_135630_inLine +BABEL_BP_104_98503_20120402_230340_inLine +BABEL_BP_104_98503_20120403_025554_inLine +BABEL_BP_104_98588_20120119_011655_inLine +BABEL_BP_104_98588_20120119_011655_outLine +BABEL_BP_104_98942_20120205_224026_outLine +BABEL_BP_104_98987_20120220_184452_inLine +BABEL_BP_104_98987_20120220_184452_outLine +BABEL_BP_104_98993_20120516_040504_inLine +BABEL_BP_104_98993_20120516_040504_outLine +BABEL_BP_104_99093_20120212_062850_inLine +BABEL_BP_104_99093_20120212_062850_outLine +BABEL_BP_104_99354_20120203_152733_inLine +BABEL_BP_104_99354_20120203_152733_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/dev.list b/egs/babel/s5d/conf/lists/105-turkish/dev.list new file mode 100644 index 00000000000..405c3a7662b --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/dev.list @@ -0,0 +1,127 @@ +BABEL_BP_105_11521_20120602_034839_inLine +BABEL_BP_105_11521_20120602_034839_outLine +BABEL_BP_105_12844_20120208_220114_inLine +BABEL_BP_105_12844_20120208_220114_outLine +BABEL_BP_105_12963_20120122_062911_inLine +BABEL_BP_105_12963_20120122_062911_outLine +BABEL_BP_105_13795_20120125_230526_inLine +BABEL_BP_105_13795_20120125_230526_outLine +BABEL_BP_105_13795_20120125_232747_inLine +BABEL_BP_105_13795_20120125_232747_outLine +BABEL_BP_105_15146_20120106_223718_inLine +BABEL_BP_105_15146_20120106_223719_outLine +BABEL_BP_105_15916_20120201_072825_inLine +BABEL_BP_105_15916_20120201_072825_outLine +BABEL_BP_105_16185_20120609_224507_inLine +BABEL_BP_105_16185_20120609_224507_outLine +BABEL_BP_105_19861_20120530_035456_inLine +BABEL_BP_105_19861_20120530_035456_outLine +BABEL_BP_105_20213_20120123_011920_inLine +BABEL_BP_105_20213_20120123_011920_outLine +BABEL_BP_105_21541_20120518_012528_inLine +BABEL_BP_105_22973_20120502_204152_inLine +BABEL_BP_105_22973_20120502_204152_outLine +BABEL_BP_105_26275_20120620_014345_inLine +BABEL_BP_105_26275_20120620_014345_outLine +BABEL_BP_105_29545_20120621_041202_inLine +BABEL_BP_105_29545_20120621_041203_outLine +BABEL_BP_105_31256_20120531_015506_inLine +BABEL_BP_105_31256_20120531_015506_outLine +BABEL_BP_105_31345_20120515_214849_inLine +BABEL_BP_105_31345_20120515_214849_outLine +BABEL_BP_105_32236_20120516_221311_inLine +BABEL_BP_105_32236_20120516_221311_outLine +BABEL_BP_105_35175_20120125_082450_inLine +BABEL_BP_105_35175_20120125_082450_outLine +BABEL_BP_105_39774_20120623_021020_inLine +BABEL_BP_105_39774_20120623_021020_outLine +BABEL_BP_105_39774_20120623_021946_inLine +BABEL_BP_105_39774_20120623_021946_outLine +BABEL_BP_105_39963_20120209_083935_inLine +BABEL_BP_105_39963_20120209_083935_outLine +BABEL_BP_105_40477_20120208_010255_inLine +BABEL_BP_105_40477_20120208_010256_outLine +BABEL_BP_105_40759_20120316_014011_inLine +BABEL_BP_105_40759_20120316_014011_outLine +BABEL_BP_105_42212_20120706_194059_inLine +BABEL_BP_105_42212_20120706_194059_outLine +BABEL_BP_105_42229_20120115_063922_inLine +BABEL_BP_105_42229_20120115_063922_outLine +BABEL_BP_105_44023_20120530_220359_inLine +BABEL_BP_105_44023_20120530_220359_outLine +BABEL_BP_105_44117_20120621_032955_inLine +BABEL_BP_105_44117_20120621_032956_outLine +BABEL_BP_105_48536_20120208_212737_inLine +BABEL_BP_105_48536_20120208_212737_outLine +BABEL_BP_105_49192_20120206_012605_inLine +BABEL_BP_105_49192_20120206_012605_outLine +BABEL_BP_105_54339_20120125_230415_inLine +BABEL_BP_105_54339_20120125_230415_outLine +BABEL_BP_105_55786_20120205_051854_inLine +BABEL_BP_105_55786_20120205_051854_outLine +BABEL_BP_105_55823_20120512_202135_inLine +BABEL_BP_105_55823_20120512_202135_outLine +BABEL_BP_105_56342_20120127_023015_inLine +BABEL_BP_105_56342_20120127_023015_outLine +BABEL_BP_105_60064_20120606_000812_inLine +BABEL_BP_105_60064_20120606_000812_outLine +BABEL_BP_105_60881_20120207_064233_inLine +BABEL_BP_105_60881_20120207_064233_outLine +BABEL_BP_105_66330_20120209_005003_inLine +BABEL_BP_105_66330_20120209_005003_outLine +BABEL_BP_105_66441_20120207_050412_inLine +BABEL_BP_105_66441_20120207_050412_outLine +BABEL_BP_105_66790_20120128_220452_inLine +BABEL_BP_105_66790_20120128_220452_outLine +BABEL_BP_105_66883_20120207_051718_inLine +BABEL_BP_105_66883_20120207_051718_outLine +BABEL_BP_105_67555_20120207_212802_inLine +BABEL_BP_105_67555_20120207_212802_outLine +BABEL_BP_105_67733_20120207_234950_inLine +BABEL_BP_105_67733_20120207_234950_outLine +BABEL_BP_105_69052_20120124_062415_inLine +BABEL_BP_105_69052_20120124_062415_outLine +BABEL_BP_105_75151_20120602_061054_inLine +BABEL_BP_105_75151_20120602_061054_outLine +BABEL_BP_105_76372_20120709_015738_inLine +BABEL_BP_105_76372_20120709_015738_outLine +BABEL_BP_105_76716_20120606_195423_inLine +BABEL_BP_105_76763_20120107_022524_inLine +BABEL_BP_105_76763_20120107_022524_outLine +BABEL_BP_105_78487_20120318_080534_inLine +BABEL_BP_105_80856_20120205_231607_inLine +BABEL_BP_105_80856_20120205_231607_outLine +BABEL_BP_105_84394_20120426_185010_inLine +BABEL_BP_105_84394_20120426_185010_outLine +BABEL_BP_105_84608_20120423_050353_inLine +BABEL_BP_105_84608_20120423_050353_outLine +BABEL_BP_105_87077_20120516_000252_inLine +BABEL_BP_105_87077_20120516_000252_outLine +BABEL_BP_105_87806_20120201_235442_inLine +BABEL_BP_105_87806_20120201_235442_outLine +BABEL_BP_105_88385_20120430_004520_inLine +BABEL_BP_105_88385_20120430_004520_outLine +BABEL_BP_105_90393_20120125_034434_inLine +BABEL_BP_105_90393_20120125_034434_outLine +BABEL_BP_105_91136_20120422_062317_inLine +BABEL_BP_105_91136_20120422_062317_outLine +BABEL_BP_105_91330_20120209_002721_inLine +BABEL_BP_105_91330_20120209_002721_outLine +BABEL_BP_105_91865_20120528_230057_inLine +BABEL_BP_105_91865_20120528_230057_outLine +BABEL_BP_105_92386_20120517_234302_inLine +BABEL_BP_105_92386_20120517_234302_outLine +BABEL_BP_105_92591_20120129_012358_inLine +BABEL_BP_105_92591_20120129_012358_outLine +BABEL_BP_105_93192_20120128_005138_inLine +BABEL_BP_105_93192_20120128_005138_outLine +BABEL_BP_105_93454_20120128_235224_inLine +BABEL_BP_105_93454_20120128_235224_outLine +BABEL_BP_105_93509_20120127_075513_inLine +BABEL_BP_105_93509_20120127_075513_outLine +BABEL_BP_105_95350_20120127_234045_inLine +BABEL_BP_105_95350_20120127_234045_outLine +BABEL_BP_105_95952_20120512_225006_inLine +BABEL_BP_105_95952_20120512_225006_outLine +BABEL_BP_105_95952_20120512_230254_inLine +BABEL_BP_105_95952_20120512_230254_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/eval.list b/egs/babel/s5d/conf/lists/105-turkish/eval.list new file mode 100644 index 00000000000..47736cf7f28 --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/eval.list @@ -0,0 +1,194 @@ +BABEL_BP_105_11158_20120609_061134_inLine +BABEL_BP_105_11158_20120609_061134_outLine +BABEL_BP_105_11478_20120128_081119_inLine +BABEL_BP_105_11478_20120128_081119_outLine +BABEL_BP_105_12535_20120528_235510_inLine +BABEL_BP_105_12535_20120528_235510_outLine +BABEL_BP_105_12667_20120502_025008_inLine +BABEL_BP_105_12667_20120502_025008_outLine +BABEL_BP_105_15859_20120313_033308_inLine +BABEL_BP_105_15859_20120313_033308_outLine +BABEL_BP_105_19153_20120125_060542_inLine +BABEL_BP_105_19153_20120125_060542_outLine +BABEL_BP_105_20332_20120615_235730_inLine +BABEL_BP_105_20332_20120615_235730_outLine +BABEL_BP_105_22229_20120106_234925_inLine +BABEL_BP_105_22229_20120106_234925_outLine +BABEL_BP_105_22229_20120107_000755_inLine +BABEL_BP_105_22229_20120107_000755_outLine +BABEL_BP_105_22566_20120621_011722_inLine +BABEL_BP_105_22566_20120621_011722_outLine +BABEL_BP_105_22696_20120529_224618_inLine +BABEL_BP_105_22696_20120529_224618_outLine +BABEL_BP_105_23714_20120531_230422_inLine +BABEL_BP_105_23714_20120531_230422_outLine +BABEL_BP_105_24642_20120525_033813_inLine +BABEL_BP_105_24642_20120525_033814_outLine +BABEL_BP_105_24661_20120615_203702_inLine +BABEL_BP_105_24661_20120615_203702_outLine +BABEL_BP_105_27178_20120816_063733_inLine +BABEL_BP_105_27178_20120816_063733_outLine +BABEL_BP_105_27645_20120501_182457_inLine +BABEL_BP_105_27645_20120501_182457_outLine +BABEL_BP_105_27825_20120205_013051_inLine +BABEL_BP_105_27825_20120205_013051_outLine +BABEL_BP_105_27916_20120530_234813_inLine +BABEL_BP_105_27916_20120530_234813_outLine +BABEL_BP_105_28768_20120531_033622_inLine +BABEL_BP_105_28768_20120531_033622_outLine +BABEL_BP_105_28768_20120531_035033_inLine +BABEL_BP_105_28768_20120531_035033_outLine +BABEL_BP_105_29512_20120129_020437_inLine +BABEL_BP_105_29512_20120129_020437_outLine +BABEL_BP_105_30227_20120519_234337_inLine +BABEL_BP_105_30227_20120519_234337_outLine +BABEL_BP_105_31393_20120814_054655_inLine +BABEL_BP_105_31393_20120814_054658_outLine +BABEL_BP_105_33969_20120430_013648_inLine +BABEL_BP_105_33969_20120430_013648_outLine +BABEL_BP_105_34370_20120209_233721_inLine +BABEL_BP_105_34370_20120209_233721_outLine +BABEL_BP_105_38464_20120531_202824_inLine +BABEL_BP_105_38464_20120531_202824_outLine +BABEL_BP_105_38985_20120123_064936_inLine +BABEL_BP_105_38985_20120123_064936_outLine +BABEL_BP_105_40385_20120626_182511_inLine +BABEL_BP_105_40385_20120626_182511_outLine +BABEL_BP_105_40439_20120603_221429_inLine +BABEL_BP_105_40439_20120603_221429_outLine +BABEL_BP_105_41513_20120127_091800_inLine +BABEL_BP_105_41513_20120127_091800_outLine +BABEL_BP_105_41541_20120610_220640_inLine +BABEL_BP_105_41989_20120828_232255_inLine +BABEL_BP_105_41989_20120828_232255_outLine +BABEL_BP_105_42749_20120504_192522_inLine +BABEL_BP_105_42749_20120504_192522_outLine +BABEL_BP_105_42768_20120517_203439_inLine +BABEL_BP_105_42768_20120517_203439_outLine +BABEL_BP_105_42768_20120517_204350_inLine +BABEL_BP_105_42768_20120517_204350_outLine +BABEL_BP_105_44038_20120628_032429_inLine +BABEL_BP_105_44038_20120628_032429_outLine +BABEL_BP_105_45106_20120106_231201_inLine +BABEL_BP_105_45106_20120106_231201_outLine +BABEL_BP_105_45145_20120207_231842_inLine +BABEL_BP_105_45145_20120207_231842_outLine +BABEL_BP_105_45677_20120527_022244_inLine +BABEL_BP_105_45677_20120527_022244_outLine +BABEL_BP_105_45786_20120518_034117_inLine +BABEL_BP_105_45786_20120518_034118_outLine +BABEL_BP_105_45893_20120131_060048_inLine +BABEL_BP_105_45893_20120131_060048_outLine +BABEL_BP_105_46427_20120208_230929_inLine +BABEL_BP_105_46427_20120208_230929_outLine +BABEL_BP_105_46813_20120521_040045_inLine +BABEL_BP_105_46813_20120521_040046_outLine +BABEL_BP_105_47263_20120603_001729_inLine +BABEL_BP_105_47263_20120603_001729_outLine +BABEL_BP_105_48191_20120616_010543_inLine +BABEL_BP_105_48191_20120616_010543_outLine +BABEL_BP_105_49714_20120529_004423_inLine +BABEL_BP_105_49714_20120529_004423_outLine +BABEL_BP_105_50915_20120606_030647_inLine +BABEL_BP_105_50915_20120606_030647_outLine +BABEL_BP_105_51042_20120609_053754_inLine +BABEL_BP_105_51042_20120609_053754_outLine +BABEL_BP_105_51374_20120808_021113_inLine +BABEL_BP_105_51374_20120808_021113_outLine +BABEL_BP_105_55450_20120201_022826_inLine +BABEL_BP_105_55450_20120201_022826_outLine +BABEL_BP_105_55777_20120529_060606_inLine +BABEL_BP_105_55777_20120529_060606_outLine +BABEL_BP_105_55777_20120529_065353_inLine +BABEL_BP_105_55777_20120529_065353_outLine +BABEL_BP_105_56812_20120601_070152_inLine +BABEL_BP_105_56812_20120601_070152_outLine +BABEL_BP_105_60848_20120627_050640_inLine +BABEL_BP_105_60848_20120627_050643_outLine +BABEL_BP_105_62160_20120815_073641_inLine +BABEL_BP_105_62160_20120815_073641_outLine +BABEL_BP_105_62177_20120206_010509_inLine +BABEL_BP_105_62177_20120206_010509_outLine +BABEL_BP_105_63459_20120316_010003_inLine +BABEL_BP_105_63491_20120131_020702_inLine +BABEL_BP_105_63491_20120131_020702_outLine +BABEL_BP_105_65601_20120130_233749_inLine +BABEL_BP_105_65601_20120130_233749_outLine +BABEL_BP_105_65732_20120210_054155_inLine +BABEL_BP_105_65732_20120210_054155_outLine +BABEL_BP_105_66188_20120611_222651_inLine +BABEL_BP_105_66188_20120611_222651_outLine +BABEL_BP_105_68671_20120607_065759_inLine +BABEL_BP_105_68671_20120607_065759_outLine +BABEL_BP_105_69145_20120607_070422_inLine +BABEL_BP_105_69145_20120607_070422_outLine +BABEL_BP_105_69275_20120607_085559_inLine +BABEL_BP_105_69275_20120607_085559_outLine +BABEL_BP_105_70077_20120615_070304_inLine +BABEL_BP_105_70077_20120615_070304_outLine +BABEL_BP_105_71654_20120129_031219_inLine +BABEL_BP_105_71654_20120129_031219_outLine +BABEL_BP_105_72011_20120708_195954_inLine +BABEL_BP_105_72011_20120708_195954_outLine +BABEL_BP_105_72011_20120708_201001_inLine +BABEL_BP_105_72011_20120708_201001_outLine +BABEL_BP_105_73562_20120206_084510_inLine +BABEL_BP_105_73562_20120206_084510_outLine +BABEL_BP_105_73757_20120206_093159_inLine +BABEL_BP_105_73757_20120206_093159_outLine +BABEL_BP_105_74295_20120122_020359_inLine +BABEL_BP_105_74295_20120122_020359_outLine +BABEL_BP_105_74607_20120208_041443_inLine +BABEL_BP_105_74607_20120208_041443_outLine +BABEL_BP_105_75020_20120808_014405_inLine +BABEL_BP_105_75020_20120808_014405_outLine +BABEL_BP_105_77771_20120529_022050_inLine +BABEL_BP_105_77771_20120529_022050_outLine +BABEL_BP_105_78245_20120815_044319_inLine +BABEL_BP_105_78245_20120815_044319_outLine +BABEL_BP_105_78728_20120210_014021_inLine +BABEL_BP_105_78728_20120210_014021_outLine +BABEL_BP_105_80174_20120606_185602_inLine +BABEL_BP_105_80174_20120606_185602_outLine +BABEL_BP_105_80535_20120611_065341_inLine +BABEL_BP_105_80535_20120611_065341_outLine +BABEL_BP_105_81944_20120531_010546_inLine +BABEL_BP_105_81944_20120531_010546_outLine +BABEL_BP_105_81996_20120208_060259_inLine +BABEL_BP_105_81996_20120208_060259_outLine +BABEL_BP_105_83012_20120529_010427_inLine +BABEL_BP_105_83012_20120529_010427_outLine +BABEL_BP_105_83053_20120121_030631_inLine +BABEL_BP_105_83053_20120121_030631_outLine +BABEL_BP_105_84700_20120530_041137_inLine +BABEL_BP_105_84700_20120530_041137_outLine +BABEL_BP_105_84865_20120619_034124_inLine +BABEL_BP_105_84865_20120619_034124_outLine +BABEL_BP_105_86305_20120201_230055_inLine +BABEL_BP_105_86305_20120201_230055_outLine +BABEL_BP_105_86998_20120613_030245_inLine +BABEL_BP_105_86998_20120613_030245_outLine +BABEL_BP_105_87885_20120709_012121_inLine +BABEL_BP_105_87885_20120709_012121_outLine +BABEL_BP_105_88245_20120430_200721_inLine +BABEL_BP_105_88245_20120430_200721_outLine +BABEL_BP_105_88982_20120128_051748_inLine +BABEL_BP_105_88982_20120128_051748_outLine +BABEL_BP_105_90180_20120611_232400_inLine +BABEL_BP_105_90180_20120611_232400_outLine +BABEL_BP_105_90313_20120128_001531_inLine +BABEL_BP_105_90313_20120128_001531_outLine +BABEL_BP_105_92308_20120616_231053_inLine +BABEL_BP_105_92308_20120616_231053_outLine +BABEL_BP_105_92328_20120611_062634_inLine +BABEL_BP_105_92328_20120611_062634_outLine +BABEL_BP_105_92820_20120521_005626_inLine +BABEL_BP_105_92820_20120521_005626_outLine +BABEL_BP_105_92852_20120221_033327_inLine +BABEL_BP_105_92852_20120221_033327_outLine +BABEL_BP_105_93151_20120208_021412_inLine +BABEL_BP_105_93151_20120208_021412_outLine +BABEL_BP_105_95861_20120202_000341_inLine +BABEL_BP_105_95861_20120202_000341_outLine +BABEL_BP_105_99929_20120603_000106_inLine +BABEL_BP_105_99929_20120603_000106_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/evalpart1.list b/egs/babel/s5d/conf/lists/105-turkish/evalpart1.list new file mode 100644 index 00000000000..87d6e0f050b --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/evalpart1.list @@ -0,0 +1,65 @@ +BABEL_BP_105_11478_20120128_081119_inLine +BABEL_BP_105_11478_20120128_081119_outLine +BABEL_BP_105_12667_20120502_025008_inLine +BABEL_BP_105_12667_20120502_025008_outLine +BABEL_BP_105_15859_20120313_033308_inLine +BABEL_BP_105_15859_20120313_033308_outLine +BABEL_BP_105_22566_20120621_011722_inLine +BABEL_BP_105_22566_20120621_011722_outLine +BABEL_BP_105_27645_20120501_182457_inLine +BABEL_BP_105_27645_20120501_182457_outLine +BABEL_BP_105_33969_20120430_013648_inLine +BABEL_BP_105_33969_20120430_013648_outLine +BABEL_BP_105_38985_20120123_064936_inLine +BABEL_BP_105_38985_20120123_064936_outLine +BABEL_BP_105_41989_20120828_232255_inLine +BABEL_BP_105_41989_20120828_232255_outLine +BABEL_BP_105_42749_20120504_192522_inLine +BABEL_BP_105_42749_20120504_192522_outLine +BABEL_BP_105_42768_20120517_203439_inLine +BABEL_BP_105_42768_20120517_203439_outLine +BABEL_BP_105_42768_20120517_204350_inLine +BABEL_BP_105_42768_20120517_204350_outLine +BABEL_BP_105_45106_20120106_231201_inLine +BABEL_BP_105_45106_20120106_231201_outLine +BABEL_BP_105_45677_20120527_022244_inLine +BABEL_BP_105_45677_20120527_022244_outLine +BABEL_BP_105_46427_20120208_230929_inLine +BABEL_BP_105_46427_20120208_230929_outLine +BABEL_BP_105_46813_20120521_040045_inLine +BABEL_BP_105_46813_20120521_040046_outLine +BABEL_BP_105_47263_20120603_001729_inLine +BABEL_BP_105_47263_20120603_001729_outLine +BABEL_BP_105_50915_20120606_030647_inLine +BABEL_BP_105_50915_20120606_030647_outLine +BABEL_BP_105_51374_20120808_021113_inLine +BABEL_BP_105_51374_20120808_021113_outLine +BABEL_BP_105_60848_20120627_050640_inLine +BABEL_BP_105_60848_20120627_050643_outLine +BABEL_BP_105_63459_20120316_010003_inLine +BABEL_BP_105_63491_20120131_020702_inLine +BABEL_BP_105_63491_20120131_020702_outLine +BABEL_BP_105_65601_20120130_233749_inLine +BABEL_BP_105_65601_20120130_233749_outLine +BABEL_BP_105_65732_20120210_054155_inLine +BABEL_BP_105_65732_20120210_054155_outLine +BABEL_BP_105_72011_20120708_195954_inLine +BABEL_BP_105_72011_20120708_195954_outLine +BABEL_BP_105_72011_20120708_201001_inLine +BABEL_BP_105_72011_20120708_201001_outLine +BABEL_BP_105_74295_20120122_020359_inLine +BABEL_BP_105_74295_20120122_020359_outLine +BABEL_BP_105_78245_20120815_044319_inLine +BABEL_BP_105_78245_20120815_044319_outLine +BABEL_BP_105_80174_20120606_185602_inLine +BABEL_BP_105_80174_20120606_185602_outLine +BABEL_BP_105_81944_20120531_010546_inLine +BABEL_BP_105_81944_20120531_010546_outLine +BABEL_BP_105_83053_20120121_030631_inLine +BABEL_BP_105_83053_20120121_030631_outLine +BABEL_BP_105_84700_20120530_041137_inLine +BABEL_BP_105_84700_20120530_041137_outLine +BABEL_BP_105_87885_20120709_012121_inLine +BABEL_BP_105_87885_20120709_012121_outLine +BABEL_BP_105_88982_20120128_051748_inLine +BABEL_BP_105_88982_20120128_051748_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list b/egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list new file mode 100644 index 00000000000..6d810bffecc --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list @@ -0,0 +1,993 @@ +BABEL_BP_105_10160_20120107_220423_inLine +BABEL_BP_105_10160_20120107_220423_outLine +BABEL_BP_105_10211_20120602_185303_inLine +BABEL_BP_105_10211_20120602_185303_outLine +BABEL_BP_105_10467_20120520_004721_inLine +BABEL_BP_105_10467_20120520_004721_outLine +BABEL_BP_105_10973_20120604_181602_inLine +BABEL_BP_105_10973_20120604_181602_outLine +BABEL_BP_105_11022_20120126_221846_inLine +BABEL_BP_105_11022_20120126_221846_outLine +BABEL_BP_105_11152_20120608_002410_inLine +BABEL_BP_105_11152_20120608_002410_outLine +BABEL_BP_105_11371_20120110_001148_inLine +BABEL_BP_105_11371_20120110_001148_outLine +BABEL_BP_105_11422_20120110_233241_inLine +BABEL_BP_105_11422_20120110_233241_outLine +BABEL_BP_105_11627_20120209_232308_inLine +BABEL_BP_105_11868_20120518_025856_inLine +BABEL_BP_105_11868_20120518_025856_outLine +BABEL_BP_105_11982_20120520_192511_outLine +BABEL_BP_105_12003_20120220_085129_inLine +BABEL_BP_105_12003_20120220_085131_outLine +BABEL_BP_105_12120_20120621_024039_inLine +BABEL_BP_105_12120_20120621_024039_outLine +BABEL_BP_105_12439_20120520_215211_inLine +BABEL_BP_105_12439_20120520_215211_outLine +BABEL_BP_105_12643_20120628_010121_inLine +BABEL_BP_105_13065_20120208_032637_inLine +BABEL_BP_105_13065_20120208_032637_outLine +BABEL_BP_105_13118_20120130_042038_outLine +BABEL_BP_105_13389_20120530_002622_inLine +BABEL_BP_105_13389_20120530_002622_outLine +BABEL_BP_105_13530_20120604_015841_inLine +BABEL_BP_105_13530_20120604_015841_outLine +BABEL_BP_105_13660_20120314_062650_inLine +BABEL_BP_105_13660_20120314_062651_outLine +BABEL_BP_105_13702_20120512_204855_inLine +BABEL_BP_105_13702_20120512_204855_outLine +BABEL_BP_105_13913_20120121_005810_inLine +BABEL_BP_105_13913_20120121_005810_outLine +BABEL_BP_105_14054_20120205_012603_inLine +BABEL_BP_105_14054_20120205_012603_outLine +BABEL_BP_105_14707_20120121_003857_inLine +BABEL_BP_105_14707_20120121_003857_outLine +BABEL_BP_105_14891_20120107_224233_inLine +BABEL_BP_105_14891_20120107_224233_outLine +BABEL_BP_105_14936_20120528_215659_inLine +BABEL_BP_105_14936_20120528_215659_outLine +BABEL_BP_105_14997_20120314_212654_inLine +BABEL_BP_105_14997_20120314_212654_outLine +BABEL_BP_105_15022_20120204_043515_inLine +BABEL_BP_105_15022_20120204_043515_outLine +BABEL_BP_105_16066_20120205_105046_inLine +BABEL_BP_105_16066_20120205_105046_outLine +BABEL_BP_105_16257_20120709_025101_inLine +BABEL_BP_105_16257_20120709_025101_outLine +BABEL_BP_105_16346_20120122_031133_inLine +BABEL_BP_105_16346_20120122_031133_outLine +BABEL_BP_105_16617_20120315_024321_inLine +BABEL_BP_105_16617_20120315_024321_outLine +BABEL_BP_105_16646_20120209_075016_inLine +BABEL_BP_105_16646_20120209_075016_outLine +BABEL_BP_105_16855_20120210_062956_inLine +BABEL_BP_105_16855_20120210_062956_outLine +BABEL_BP_105_16875_20120626_033717_inLine +BABEL_BP_105_16875_20120626_033718_outLine +BABEL_BP_105_16883_20120121_060732_inLine +BABEL_BP_105_16883_20120121_060732_outLine +BABEL_BP_105_17013_20120314_031626_inLine +BABEL_BP_105_17013_20120314_031626_outLine +BABEL_BP_105_17018_20120421_182457_outLine +BABEL_BP_105_17511_20120128_212023_inLine +BABEL_BP_105_17511_20120128_212023_outLine +BABEL_BP_105_17606_20120530_230042_inLine +BABEL_BP_105_17606_20120530_230042_outLine +BABEL_BP_105_17933_20120130_062220_inLine +BABEL_BP_105_17948_20120120_073631_inLine +BABEL_BP_105_17948_20120120_073631_outLine +BABEL_BP_105_18209_20120129_215151_inLine +BABEL_BP_105_18209_20120129_215151_outLine +BABEL_BP_105_18234_20120220_051332_inLine +BABEL_BP_105_18672_20120131_015941_inLine +BABEL_BP_105_18672_20120131_015941_outLine +BABEL_BP_105_18701_20120127_035425_inLine +BABEL_BP_105_18701_20120127_035425_outLine +BABEL_BP_105_18716_20120218_070145_inLine +BABEL_BP_105_18802_20120620_222614_inLine +BABEL_BP_105_18802_20120620_222614_outLine +BABEL_BP_105_19248_20120504_193537_inLine +BABEL_BP_105_19248_20120504_193537_outLine +BABEL_BP_105_19404_20120829_192145_inLine +BABEL_BP_105_19404_20120829_192145_outLine +BABEL_BP_105_19479_20120527_195818_inLine +BABEL_BP_105_19479_20120527_195818_outLine +BABEL_BP_105_19479_20120527_200936_inLine +BABEL_BP_105_19479_20120527_200936_outLine +BABEL_BP_105_19731_20120519_190911_inLine +BABEL_BP_105_19731_20120519_190911_outLine +BABEL_BP_105_20320_20120207_211206_inLine +BABEL_BP_105_20320_20120207_211206_outLine +BABEL_BP_105_20347_20120504_231529_inLine +BABEL_BP_105_20347_20120504_232320_inLine +BABEL_BP_105_20462_20120605_192730_inLine +BABEL_BP_105_20462_20120605_192730_outLine +BABEL_BP_105_20471_20120125_013916_inLine +BABEL_BP_105_20471_20120125_013916_outLine +BABEL_BP_105_20471_20120125_015348_inLine +BABEL_BP_105_20471_20120125_015348_outLine +BABEL_BP_105_20483_20120202_013100_inLine +BABEL_BP_105_20483_20120202_013100_outLine +BABEL_BP_105_20518_20120202_070149_inLine +BABEL_BP_105_20518_20120202_070149_outLine +BABEL_BP_105_20590_20120106_021113_inLine +BABEL_BP_105_20590_20120106_021113_outLine +BABEL_BP_105_20591_20120126_045259_inLine +BABEL_BP_105_20591_20120126_045259_outLine +BABEL_BP_105_21258_20120528_002304_inLine +BABEL_BP_105_21258_20120528_002304_outLine +BABEL_BP_105_21367_20120120_050000_inLine +BABEL_BP_105_21367_20120120_050000_outLine +BABEL_BP_105_21370_20120605_185740_inLine +BABEL_BP_105_21370_20120605_185740_outLine +BABEL_BP_105_21430_20120129_024859_outLine +BABEL_BP_105_21518_20120118_195555_inLine +BABEL_BP_105_21518_20120118_195555_outLine +BABEL_BP_105_21714_20120518_223459_inLine +BABEL_BP_105_21714_20120518_223459_outLine +BABEL_BP_105_21782_20120130_003418_outLine +BABEL_BP_105_21946_20120504_035038_inLine +BABEL_BP_105_21946_20120504_035039_outLine +BABEL_BP_105_22179_20120206_023628_inLine +BABEL_BP_105_22179_20120206_023628_outLine +BABEL_BP_105_22272_20120430_191440_inLine +BABEL_BP_105_22272_20120430_191440_outLine +BABEL_BP_105_22408_20120131_202129_inLine +BABEL_BP_105_22408_20120131_202129_outLine +BABEL_BP_105_22408_20120131_210558_inLine +BABEL_BP_105_22408_20120131_210558_outLine +BABEL_BP_105_22509_20120429_020025_inLine +BABEL_BP_105_22509_20120429_020025_outLine +BABEL_BP_105_22898_20120129_040904_inLine +BABEL_BP_105_22898_20120129_040904_outLine +BABEL_BP_105_22903_20120204_205250_inLine +BABEL_BP_105_22903_20120204_205250_outLine +BABEL_BP_105_22910_20120208_013659_inLine +BABEL_BP_105_22910_20120208_013659_outLine +BABEL_BP_105_23167_20120520_193822_inLine +BABEL_BP_105_23167_20120520_193822_outLine +BABEL_BP_105_23502_20120129_223353_inLine +BABEL_BP_105_23502_20120129_223353_outLine +BABEL_BP_105_23571_20120131_040441_inLine +BABEL_BP_105_23571_20120131_040441_outLine +BABEL_BP_105_23629_20120503_212942_inLine +BABEL_BP_105_23629_20120503_212942_outLine +BABEL_BP_105_23930_20120127_051732_outLine +BABEL_BP_105_24094_20120203_230434_inLine +BABEL_BP_105_24094_20120203_230434_outLine +BABEL_BP_105_24420_20120122_053229_inLine +BABEL_BP_105_24420_20120122_053229_outLine +BABEL_BP_105_24589_20120530_180625_inLine +BABEL_BP_105_24589_20120530_180625_outLine +BABEL_BP_105_24608_20120111_023000_inLine +BABEL_BP_105_24608_20120111_023000_outLine +BABEL_BP_105_24638_20120120_040215_inLine +BABEL_BP_105_24638_20120120_040215_outLine +BABEL_BP_105_25035_20120221_014614_inLine +BABEL_BP_105_25035_20120221_014614_outLine +BABEL_BP_105_25106_20120129_003957_inLine +BABEL_BP_105_25106_20120129_003957_outLine +BABEL_BP_105_25236_20120209_002129_inLine +BABEL_BP_105_25236_20120209_002129_outLine +BABEL_BP_105_25278_20120208_203010_inLine +BABEL_BP_105_25278_20120208_203010_outLine +BABEL_BP_105_25315_20120516_232406_inLine +BABEL_BP_105_25315_20120516_232406_outLine +BABEL_BP_105_25735_20120520_030401_inLine +BABEL_BP_105_25735_20120520_030401_outLine +BABEL_BP_105_25934_20120105_020031_inLine +BABEL_BP_105_25992_20120120_012613_inLine +BABEL_BP_105_25992_20120120_012613_outLine +BABEL_BP_105_26164_20120627_210408_inLine +BABEL_BP_105_26164_20120627_210408_outLine +BABEL_BP_105_26350_20120113_221856_inLine +BABEL_BP_105_26350_20120113_221856_outLine +BABEL_BP_105_26598_20120124_055700_inLine +BABEL_BP_105_26598_20120124_055700_outLine +BABEL_BP_105_26644_20120517_212756_inLine +BABEL_BP_105_26644_20120517_212756_outLine +BABEL_BP_105_26684_20120125_030410_inLine +BABEL_BP_105_26684_20120125_030410_outLine +BABEL_BP_105_27349_20120129_233743_inLine +BABEL_BP_105_27349_20120129_233743_outLine +BABEL_BP_105_27605_20120129_073539_inLine +BABEL_BP_105_27605_20120129_073539_outLine +BABEL_BP_105_27724_20120130_023439_inLine +BABEL_BP_105_27724_20120130_023439_outLine +BABEL_BP_105_28107_20120221_061758_outLine +BABEL_BP_105_28204_20120130_031505_inLine +BABEL_BP_105_28204_20120130_031505_outLine +BABEL_BP_105_28889_20120204_200150_outLine +BABEL_BP_105_29133_20120220_042138_inLine +BABEL_BP_105_29168_20120131_214316_inLine +BABEL_BP_105_29168_20120131_214316_outLine +BABEL_BP_105_29259_20120607_190658_inLine +BABEL_BP_105_29259_20120607_190658_outLine +BABEL_BP_105_29276_20120209_054912_inLine +BABEL_BP_105_29276_20120209_054912_outLine +BABEL_BP_105_29290_20120130_044642_inLine +BABEL_BP_105_29302_20120128_044018_outLine +BABEL_BP_105_29335_20120125_090733_inLine +BABEL_BP_105_29335_20120125_090733_outLine +BABEL_BP_105_29407_20120531_013323_inLine +BABEL_BP_105_29407_20120531_013323_outLine +BABEL_BP_105_29421_20120127_235240_inLine +BABEL_BP_105_29421_20120127_235240_outLine +BABEL_BP_105_29444_20120204_050434_inLine +BABEL_BP_105_29444_20120204_050434_outLine +BABEL_BP_105_29771_20120430_234735_inLine +BABEL_BP_105_29771_20120430_234735_outLine +BABEL_BP_105_29988_20120120_075802_inLine +BABEL_BP_105_29988_20120120_075802_outLine +BABEL_BP_105_30168_20120209_192615_inLine +BABEL_BP_105_30168_20120209_192615_outLine +BABEL_BP_105_30554_20120126_022601_inLine +BABEL_BP_105_30554_20120126_022601_outLine +BABEL_BP_105_31281_20120130_004325_inLine +BABEL_BP_105_31281_20120130_004325_outLine +BABEL_BP_105_31460_20120603_224411_inLine +BABEL_BP_105_31460_20120603_224411_outLine +BABEL_BP_105_31917_20120202_083328_inLine +BABEL_BP_105_31917_20120202_083328_outLine +BABEL_BP_105_32120_20120627_232416_inLine +BABEL_BP_105_32120_20120627_232416_outLine +BABEL_BP_105_32263_20120125_003247_inLine +BABEL_BP_105_32263_20120125_003247_outLine +BABEL_BP_105_32295_20120201_060053_inLine +BABEL_BP_105_32334_20120126_064227_inLine +BABEL_BP_105_32334_20120126_064227_outLine +BABEL_BP_105_32642_20120518_185259_outLine +BABEL_BP_105_32663_20120709_040652_inLine +BABEL_BP_105_32663_20120709_040652_outLine +BABEL_BP_105_32710_20120320_040408_inLine +BABEL_BP_105_32710_20120320_040408_outLine +BABEL_BP_105_32818_20120530_032934_inLine +BABEL_BP_105_32818_20120530_032935_outLine +BABEL_BP_105_33671_20120314_060721_inLine +BABEL_BP_105_33671_20120314_060721_outLine +BABEL_BP_105_34169_20120209_195657_outLine +BABEL_BP_105_34194_20120206_104021_inLine +BABEL_BP_105_34194_20120206_104021_outLine +BABEL_BP_105_34235_20120206_051248_inLine +BABEL_BP_105_34248_20120628_013714_inLine +BABEL_BP_105_34248_20120628_013714_outLine +BABEL_BP_105_34480_20120605_033447_inLine +BABEL_BP_105_34480_20120605_033447_outLine +BABEL_BP_105_34498_20120127_071326_inLine +BABEL_BP_105_34498_20120127_071326_outLine +BABEL_BP_105_34590_20120829_000220_inLine +BABEL_BP_105_34590_20120829_000220_outLine +BABEL_BP_105_35006_20120118_204903_inLine +BABEL_BP_105_35006_20120118_204903_outLine +BABEL_BP_105_35011_20120314_000129_inLine +BABEL_BP_105_35229_20120621_203612_inLine +BABEL_BP_105_35229_20120621_203612_outLine +BABEL_BP_105_35324_20120117_204415_inLine +BABEL_BP_105_35324_20120117_204415_outLine +BABEL_BP_105_35329_20120203_051310_inLine +BABEL_BP_105_35329_20120203_051310_outLine +BABEL_BP_105_35357_20120530_040330_inLine +BABEL_BP_105_35357_20120530_040330_outLine +BABEL_BP_105_35576_20120530_184018_inLine +BABEL_BP_105_35576_20120530_184018_outLine +BABEL_BP_105_36276_20120519_000042_inLine +BABEL_BP_105_36276_20120519_000042_outLine +BABEL_BP_105_36360_20120121_024157_inLine +BABEL_BP_105_36360_20120121_024157_outLine +BABEL_BP_105_36383_20120126_014553_inLine +BABEL_BP_105_36561_20120125_091214_inLine +BABEL_BP_105_36561_20120125_091214_outLine +BABEL_BP_105_36711_20120817_211133_inLine +BABEL_BP_105_36711_20120817_211133_outLine +BABEL_BP_105_36722_20120420_012709_inLine +BABEL_BP_105_36722_20120420_012709_outLine +BABEL_BP_105_36975_20120119_201922_inLine +BABEL_BP_105_36975_20120119_201922_outLine +BABEL_BP_105_37094_20120111_013332_inLine +BABEL_BP_105_37094_20120111_013332_outLine +BABEL_BP_105_37110_20120113_201333_inLine +BABEL_BP_105_37110_20120113_201333_outLine +BABEL_BP_105_37111_20120504_215437_inLine +BABEL_BP_105_37111_20120504_215437_outLine +BABEL_BP_105_37260_20120314_015840_inLine +BABEL_BP_105_37260_20120314_015840_outLine +BABEL_BP_105_37444_20120518_221718_inLine +BABEL_BP_105_37444_20120518_221718_outLine +BABEL_BP_105_37461_20120530_010739_inLine +BABEL_BP_105_37461_20120530_010739_outLine +BABEL_BP_105_38108_20120129_001503_inLine +BABEL_BP_105_38108_20120129_001503_outLine +BABEL_BP_105_38640_20120208_010027_inLine +BABEL_BP_105_38640_20120208_010027_outLine +BABEL_BP_105_39066_20120206_073804_inLine +BABEL_BP_105_39066_20120206_073804_outLine +BABEL_BP_105_39114_20120516_035141_inLine +BABEL_BP_105_39114_20120516_035141_outLine +BABEL_BP_105_39384_20120525_200159_outLine +BABEL_BP_105_39384_20120525_200904_outLine +BABEL_BP_105_39915_20120527_221155_inLine +BABEL_BP_105_39915_20120527_221155_outLine +BABEL_BP_105_39997_20120202_204531_inLine +BABEL_BP_105_39997_20120202_204531_outLine +BABEL_BP_105_40002_20120202_061416_inLine +BABEL_BP_105_40002_20120202_061416_outLine +BABEL_BP_105_40040_20120125_211630_inLine +BABEL_BP_105_40040_20120125_211630_outLine +BABEL_BP_105_40046_20120110_013037_inLine +BABEL_BP_105_40046_20120110_013037_outLine +BABEL_BP_105_40084_20120127_075326_inLine +BABEL_BP_105_40084_20120127_075326_outLine +BABEL_BP_105_40123_20120527_021542_inLine +BABEL_BP_105_40123_20120527_021542_outLine +BABEL_BP_105_40346_20120109_223712_inLine +BABEL_BP_105_40346_20120109_223712_outLine +BABEL_BP_105_40510_20120128_063431_inLine +BABEL_BP_105_40510_20120128_063431_outLine +BABEL_BP_105_40980_20120208_102244_outLine +BABEL_BP_105_41170_20120110_004951_inLine +BABEL_BP_105_41170_20120110_004951_outLine +BABEL_BP_105_41456_20120316_021539_inLine +BABEL_BP_105_41456_20120316_021539_outLine +BABEL_BP_105_41540_20120121_064850_inLine +BABEL_BP_105_41540_20120121_064850_outLine +BABEL_BP_105_41561_20120708_205430_inLine +BABEL_BP_105_41561_20120708_205430_outLine +BABEL_BP_105_41661_20120206_073351_inLine +BABEL_BP_105_41661_20120206_073351_outLine +BABEL_BP_105_41797_20120208_054959_inLine +BABEL_BP_105_41797_20120208_054959_outLine +BABEL_BP_105_42145_20120210_004555_inLine +BABEL_BP_105_42145_20120210_004555_outLine +BABEL_BP_105_42309_20120530_225817_inLine +BABEL_BP_105_42309_20120530_225817_outLine +BABEL_BP_105_42471_20120210_064751_outLine +BABEL_BP_105_42651_20120208_003002_inLine +BABEL_BP_105_42651_20120208_003002_outLine +BABEL_BP_105_42788_20120520_202049_outLine +BABEL_BP_105_42853_20120105_232804_inLine +BABEL_BP_105_42853_20120105_232804_outLine +BABEL_BP_105_43017_20120814_005806_inLine +BABEL_BP_105_43017_20120814_005806_outLine +BABEL_BP_105_43277_20120122_050352_inLine +BABEL_BP_105_43277_20120122_050352_outLine +BABEL_BP_105_43317_20120516_181202_inLine +BABEL_BP_105_43317_20120516_181202_outLine +BABEL_BP_105_43383_20120814_060445_inLine +BABEL_BP_105_43383_20120814_060445_outLine +BABEL_BP_105_43425_20120520_223154_inLine +BABEL_BP_105_43425_20120520_223154_outLine +BABEL_BP_105_43425_20120520_224822_inLine +BABEL_BP_105_43425_20120520_224822_outLine +BABEL_BP_105_43426_20120127_054206_inLine +BABEL_BP_105_43426_20120127_054206_outLine +BABEL_BP_105_43991_20120201_043008_inLine +BABEL_BP_105_43991_20120201_043008_outLine +BABEL_BP_105_44129_20120203_031411_inLine +BABEL_BP_105_44209_20120130_072808_inLine +BABEL_BP_105_44209_20120130_072808_outLine +BABEL_BP_105_44500_20120531_224758_inLine +BABEL_BP_105_44500_20120531_224758_outLine +BABEL_BP_105_44568_20120315_215919_inLine +BABEL_BP_105_44568_20120315_215919_outLine +BABEL_BP_105_44756_20120125_222756_inLine +BABEL_BP_105_44829_20120816_071805_inLine +BABEL_BP_105_44829_20120816_071805_outLine +BABEL_BP_105_44836_20120208_085036_inLine +BABEL_BP_105_44836_20120208_085036_outLine +BABEL_BP_105_45214_20120209_223827_inLine +BABEL_BP_105_45214_20120209_223827_outLine +BABEL_BP_105_45227_20120208_205329_inLine +BABEL_BP_105_45227_20120208_205329_outLine +BABEL_BP_105_45511_20120601_001634_inLine +BABEL_BP_105_45511_20120601_001634_outLine +BABEL_BP_105_45512_20120208_063419_inLine +BABEL_BP_105_45512_20120208_063419_outLine +BABEL_BP_105_45655_20120206_065331_inLine +BABEL_BP_105_45655_20120206_065331_outLine +BABEL_BP_105_45681_20120209_002338_inLine +BABEL_BP_105_45681_20120209_002338_outLine +BABEL_BP_105_45929_20120208_051244_inLine +BABEL_BP_105_45929_20120208_051244_outLine +BABEL_BP_105_45931_20120816_011738_inLine +BABEL_BP_105_45931_20120816_011738_outLine +BABEL_BP_105_46603_20120430_193144_inLine +BABEL_BP_105_46603_20120430_193144_outLine +BABEL_BP_105_46977_20120210_043052_inLine +BABEL_BP_105_46977_20120210_043052_outLine +BABEL_BP_105_47037_20120118_025150_inLine +BABEL_BP_105_47037_20120118_025150_outLine +BABEL_BP_105_47128_20120206_014647_inLine +BABEL_BP_105_47128_20120206_014647_outLine +BABEL_BP_105_47429_20120512_193242_inLine +BABEL_BP_105_47429_20120512_193242_outLine +BABEL_BP_105_47433_20120124_032650_inLine +BABEL_BP_105_47433_20120124_032650_outLine +BABEL_BP_105_47566_20120210_004031_outLine +BABEL_BP_105_47625_20120210_031653_outLine +BABEL_BP_105_47646_20120130_220546_inLine +BABEL_BP_105_47646_20120130_220546_outLine +BABEL_BP_105_47733_20120124_050736_inLine +BABEL_BP_105_47733_20120124_050736_outLine +BABEL_BP_105_47794_20120517_013537_inLine +BABEL_BP_105_47794_20120517_013537_outLine +BABEL_BP_105_47794_20120517_014505_inLine +BABEL_BP_105_47794_20120517_014505_outLine +BABEL_BP_105_47821_20120430_182844_outLine +BABEL_BP_105_47823_20120209_005455_inLine +BABEL_BP_105_47823_20120209_005455_outLine +BABEL_BP_105_47845_20120604_014840_inLine +BABEL_BP_105_48061_20120201_084109_inLine +BABEL_BP_105_48061_20120201_084109_outLine +BABEL_BP_105_48247_20120814_194116_inLine +BABEL_BP_105_48247_20120814_194116_outLine +BABEL_BP_105_48281_20120527_205037_inLine +BABEL_BP_105_48281_20120527_205037_outLine +BABEL_BP_105_48281_20120527_210249_inLine +BABEL_BP_105_48281_20120527_210249_outLine +BABEL_BP_105_48317_20120201_220534_inLine +BABEL_BP_105_48410_20120816_072736_inLine +BABEL_BP_105_48410_20120816_072736_outLine +BABEL_BP_105_48418_20120517_235210_inLine +BABEL_BP_105_48418_20120517_235210_outLine +BABEL_BP_105_48491_20120814_025137_inLine +BABEL_BP_105_48491_20120814_025137_outLine +BABEL_BP_105_48559_20120120_085039_inLine +BABEL_BP_105_48559_20120120_085039_outLine +BABEL_BP_105_48976_20120209_021529_inLine +BABEL_BP_105_48976_20120209_021529_outLine +BABEL_BP_105_49186_20120627_224343_inLine +BABEL_BP_105_49186_20120627_224343_outLine +BABEL_BP_105_49239_20120121_234750_outLine +BABEL_BP_105_49541_20120205_233637_inLine +BABEL_BP_105_49541_20120205_233637_outLine +BABEL_BP_105_49624_20120129_090754_inLine +BABEL_BP_105_49624_20120129_090754_outLine +BABEL_BP_105_49689_20120429_224801_inLine +BABEL_BP_105_49689_20120429_224801_outLine +BABEL_BP_105_50028_20120628_020702_inLine +BABEL_BP_105_50028_20120628_020702_outLine +BABEL_BP_105_50141_20120516_230234_inLine +BABEL_BP_105_50141_20120516_230234_outLine +BABEL_BP_105_50201_20120314_220751_inLine +BABEL_BP_105_50201_20120314_220751_outLine +BABEL_BP_105_50416_20120120_030634_inLine +BABEL_BP_105_50416_20120120_030634_outLine +BABEL_BP_105_50416_20120120_032209_inLine +BABEL_BP_105_50416_20120120_032209_outLine +BABEL_BP_105_50641_20120519_213400_inLine +BABEL_BP_105_50641_20120519_213400_outLine +BABEL_BP_105_50752_20120530_202359_inLine +BABEL_BP_105_50752_20120530_202359_outLine +BABEL_BP_105_50798_20120814_222755_inLine +BABEL_BP_105_50798_20120814_222755_outLine +BABEL_BP_105_50932_20120131_024519_outLine +BABEL_BP_105_51052_20120125_203253_inLine +BABEL_BP_105_51052_20120125_203253_outLine +BABEL_BP_105_51149_20120517_022710_inLine +BABEL_BP_105_51149_20120517_022710_outLine +BABEL_BP_105_51448_20120512_221822_inLine +BABEL_BP_105_51448_20120512_221822_outLine +BABEL_BP_105_51521_20120528_232651_inLine +BABEL_BP_105_51521_20120528_232651_outLine +BABEL_BP_105_51569_20120113_191836_inLine +BABEL_BP_105_51569_20120113_191836_outLine +BABEL_BP_105_52219_20120122_061548_inLine +BABEL_BP_105_52219_20120122_061548_outLine +BABEL_BP_105_52335_20120602_042319_inLine +BABEL_BP_105_52335_20120602_042320_outLine +BABEL_BP_105_52602_20120130_010143_inLine +BABEL_BP_105_52602_20120130_010143_outLine +BABEL_BP_105_52642_20120120_062951_inLine +BABEL_BP_105_52642_20120120_062951_outLine +BABEL_BP_105_52900_20120207_074729_inLine +BABEL_BP_105_53179_20120126_014504_inLine +BABEL_BP_105_53179_20120126_014504_outLine +BABEL_BP_105_53181_20120209_221434_inLine +BABEL_BP_105_53181_20120209_221434_outLine +BABEL_BP_105_53352_20120313_025305_inLine +BABEL_BP_105_53352_20120313_025305_outLine +BABEL_BP_105_53653_20120601_203737_inLine +BABEL_BP_105_53653_20120601_203737_outLine +BABEL_BP_105_53653_20120601_205017_inLine +BABEL_BP_105_53653_20120601_205017_outLine +BABEL_BP_105_53994_20120501_063357_inLine +BABEL_BP_105_53994_20120501_063357_outLine +BABEL_BP_105_54285_20120501_170645_inLine +BABEL_BP_105_54285_20120501_170645_outLine +BABEL_BP_105_54621_20120604_220824_inLine +BABEL_BP_105_54621_20120604_220824_outLine +BABEL_BP_105_55355_20120602_030100_inLine +BABEL_BP_105_55355_20120602_030100_outLine +BABEL_BP_105_55399_20120207_220014_inLine +BABEL_BP_105_55399_20120207_220014_outLine +BABEL_BP_105_55470_20120515_231335_inLine +BABEL_BP_105_55470_20120515_231335_outLine +BABEL_BP_105_55820_20120120_200536_inLine +BABEL_BP_105_55820_20120120_200536_outLine +BABEL_BP_105_55838_20120519_183551_outLine +BABEL_BP_105_55838_20120519_184228_outLine +BABEL_BP_105_56039_20120207_012118_inLine +BABEL_BP_105_56039_20120207_012118_outLine +BABEL_BP_105_57148_20120208_234937_inLine +BABEL_BP_105_57148_20120208_234937_outLine +BABEL_BP_105_57454_20120123_082347_inLine +BABEL_BP_105_57454_20120123_082347_outLine +BABEL_BP_105_57457_20120203_040430_inLine +BABEL_BP_105_57457_20120203_040430_outLine +BABEL_BP_105_57619_20120530_212910_inLine +BABEL_BP_105_57619_20120530_212910_outLine +BABEL_BP_105_57629_20120109_193726_inLine +BABEL_BP_105_57629_20120109_193726_outLine +BABEL_BP_105_57637_20120207_092849_outLine +BABEL_BP_105_58108_20120516_200608_inLine +BABEL_BP_105_58108_20120516_200608_outLine +BABEL_BP_105_58108_20120516_215546_inLine +BABEL_BP_105_58108_20120516_215546_outLine +BABEL_BP_105_58192_20120530_040251_inLine +BABEL_BP_105_58192_20120530_040252_outLine +BABEL_BP_105_58232_20120221_044134_inLine +BABEL_BP_105_58232_20120221_044134_outLine +BABEL_BP_105_58357_20120602_203200_inLine +BABEL_BP_105_58357_20120602_203200_outLine +BABEL_BP_105_58413_20120220_081844_inLine +BABEL_BP_105_58413_20120220_081902_outLine +BABEL_BP_105_58536_20120207_101252_inLine +BABEL_BP_105_58536_20120207_101252_outLine +BABEL_BP_105_59169_20120126_071441_inLine +BABEL_BP_105_59169_20120126_071441_outLine +BABEL_BP_105_59175_20120814_014729_inLine +BABEL_BP_105_59175_20120814_014729_outLine +BABEL_BP_105_59454_20120210_050748_inLine +BABEL_BP_105_59454_20120210_050748_outLine +BABEL_BP_105_59924_20120520_193636_outLine +BABEL_BP_105_59925_20120531_025444_inLine +BABEL_BP_105_59925_20120531_025444_outLine +BABEL_BP_105_60106_20120206_012558_inLine +BABEL_BP_105_60106_20120206_012558_outLine +BABEL_BP_105_60193_20120208_022615_inLine +BABEL_BP_105_60193_20120208_022615_outLine +BABEL_BP_105_60605_20120121_024426_inLine +BABEL_BP_105_60605_20120121_024426_outLine +BABEL_BP_105_60826_20120127_052753_inLine +BABEL_BP_105_60826_20120127_052753_outLine +BABEL_BP_105_60842_20120207_082938_inLine +BABEL_BP_105_60842_20120207_082938_outLine +BABEL_BP_105_60995_20120708_212511_inLine +BABEL_BP_105_60995_20120708_212511_outLine +BABEL_BP_105_61119_20120120_011733_inLine +BABEL_BP_105_61119_20120120_011733_outLine +BABEL_BP_105_61750_20120430_182721_inLine +BABEL_BP_105_61750_20120430_182721_outLine +BABEL_BP_105_61762_20120208_192030_inLine +BABEL_BP_105_61762_20120208_192030_outLine +BABEL_BP_105_61906_20120125_055530_inLine +BABEL_BP_105_61906_20120125_055530_outLine +BABEL_BP_105_61936_20120626_050803_inLine +BABEL_BP_105_61936_20120626_050804_outLine +BABEL_BP_105_61988_20120207_042437_inLine +BABEL_BP_105_61988_20120207_042437_outLine +BABEL_BP_105_62277_20120504_191914_inLine +BABEL_BP_105_62277_20120504_191914_outLine +BABEL_BP_105_62286_20120206_001738_inLine +BABEL_BP_105_62286_20120206_001739_outLine +BABEL_BP_105_62589_20120208_070910_inLine +BABEL_BP_105_62589_20120208_070910_outLine +BABEL_BP_105_63116_20120210_011436_inLine +BABEL_BP_105_63116_20120210_011436_outLine +BABEL_BP_105_63233_20120209_025744_inLine +BABEL_BP_105_63339_20120113_014223_inLine +BABEL_BP_105_63339_20120113_014223_outLine +BABEL_BP_105_63352_20120529_062238_inLine +BABEL_BP_105_63352_20120529_062238_outLine +BABEL_BP_105_63369_20120504_000600_inLine +BABEL_BP_105_63369_20120504_000600_outLine +BABEL_BP_105_64404_20120109_210230_inLine +BABEL_BP_105_64404_20120109_210230_outLine +BABEL_BP_105_64661_20120206_064757_inLine +BABEL_BP_105_64724_20120502_185902_inLine +BABEL_BP_105_64724_20120502_185902_outLine +BABEL_BP_105_64889_20120430_041923_inLine +BABEL_BP_105_64889_20120430_041923_outLine +BABEL_BP_105_65069_20120205_053459_inLine +BABEL_BP_105_65069_20120205_053459_outLine +BABEL_BP_105_65248_20120106_003446_inLine +BABEL_BP_105_65248_20120106_003446_outLine +BABEL_BP_105_65580_20120107_231525_inLine +BABEL_BP_105_65580_20120107_231525_outLine +BABEL_BP_105_65629_20120206_013549_inLine +BABEL_BP_105_65629_20120206_013549_outLine +BABEL_BP_105_65783_20120206_225414_inLine +BABEL_BP_105_65783_20120206_225414_outLine +BABEL_BP_105_65923_20120207_201411_inLine +BABEL_BP_105_65923_20120207_201411_outLine +BABEL_BP_105_66419_20120521_015830_inLine +BABEL_BP_105_66419_20120521_015830_outLine +BABEL_BP_105_66451_20120208_202426_inLine +BABEL_BP_105_66451_20120208_202426_outLine +BABEL_BP_105_66798_20120208_003832_inLine +BABEL_BP_105_66798_20120208_003832_outLine +BABEL_BP_105_66839_20120520_203654_inLine +BABEL_BP_105_66839_20120520_203655_outLine +BABEL_BP_105_67150_20120619_230543_inLine +BABEL_BP_105_67150_20120619_230543_outLine +BABEL_BP_105_67227_20120518_213954_inLine +BABEL_BP_105_67227_20120518_213954_outLine +BABEL_BP_105_67304_20120206_233053_inLine +BABEL_BP_105_67304_20120206_233053_outLine +BABEL_BP_105_67628_20120122_014514_inLine +BABEL_BP_105_67628_20120122_014514_outLine +BABEL_BP_105_67798_20120207_222749_inLine +BABEL_BP_105_67798_20120207_222749_outLine +BABEL_BP_105_67916_20120624_041235_inLine +BABEL_BP_105_67916_20120624_041236_outLine +BABEL_BP_105_67916_20120624_042035_inLine +BABEL_BP_105_67916_20120624_042036_outLine +BABEL_BP_105_68129_20120129_010002_inLine +BABEL_BP_105_68276_20120530_043559_inLine +BABEL_BP_105_68276_20120530_043600_outLine +BABEL_BP_105_68295_20120125_213909_inLine +BABEL_BP_105_68295_20120125_213909_outLine +BABEL_BP_105_68362_20120516_233958_inLine +BABEL_BP_105_68362_20120516_233958_outLine +BABEL_BP_105_68545_20120130_195611_inLine +BABEL_BP_105_68545_20120130_195611_outLine +BABEL_BP_105_68767_20120208_195338_inLine +BABEL_BP_105_68767_20120208_195338_outLine +BABEL_BP_105_68861_20120206_042909_inLine +BABEL_BP_105_68861_20120206_042909_outLine +BABEL_BP_105_69137_20120205_012455_inLine +BABEL_BP_105_69137_20120205_012455_outLine +BABEL_BP_105_69368_20120209_025044_inLine +BABEL_BP_105_69368_20120209_025044_outLine +BABEL_BP_105_69548_20120206_002506_inLine +BABEL_BP_105_69548_20120206_002506_outLine +BABEL_BP_105_69621_20120208_050816_inLine +BABEL_BP_105_69621_20120208_050816_outLine +BABEL_BP_105_69764_20120209_041231_inLine +BABEL_BP_105_69764_20120209_041231_outLine +BABEL_BP_105_70243_20120130_002646_inLine +BABEL_BP_105_70243_20120130_002646_outLine +BABEL_BP_105_70285_20120520_195703_inLine +BABEL_BP_105_70285_20120520_195703_outLine +BABEL_BP_105_70511_20120129_071513_inLine +BABEL_BP_105_70511_20120129_071513_outLine +BABEL_BP_105_70548_20120209_030934_inLine +BABEL_BP_105_70548_20120209_030934_outLine +BABEL_BP_105_70615_20120112_204508_inLine +BABEL_BP_105_70615_20120112_204508_outLine +BABEL_BP_105_70680_20120109_201712_inLine +BABEL_BP_105_70906_20120521_022727_inLine +BABEL_BP_105_70906_20120521_022727_outLine +BABEL_BP_105_70975_20120527_224548_inLine +BABEL_BP_105_70975_20120527_224548_outLine +BABEL_BP_105_71178_20120124_044039_inLine +BABEL_BP_105_71178_20120124_044039_outLine +BABEL_BP_105_71739_20120422_024509_inLine +BABEL_BP_105_71739_20120422_024509_outLine +BABEL_BP_105_71741_20120314_230737_inLine +BABEL_BP_105_71741_20120314_230737_outLine +BABEL_BP_105_72119_20120202_041158_inLine +BABEL_BP_105_72119_20120202_041158_outLine +BABEL_BP_105_72141_20120125_085836_inLine +BABEL_BP_105_72141_20120125_085836_outLine +BABEL_BP_105_72297_20120602_030633_inLine +BABEL_BP_105_72297_20120602_030633_outLine +BABEL_BP_105_72330_20120520_201127_outLine +BABEL_BP_105_72330_20120520_201604_outLine +BABEL_BP_105_72718_20120525_180835_inLine +BABEL_BP_105_72718_20120525_180835_outLine +BABEL_BP_105_72746_20120205_020507_inLine +BABEL_BP_105_72746_20120205_020507_outLine +BABEL_BP_105_72879_20120125_032216_inLine +BABEL_BP_105_72879_20120125_032216_outLine +BABEL_BP_105_73051_20120817_204309_inLine +BABEL_BP_105_73051_20120817_204309_outLine +BABEL_BP_105_73059_20120520_222710_inLine +BABEL_BP_105_73059_20120520_222710_outLine +BABEL_BP_105_73072_20120607_013513_inLine +BABEL_BP_105_73072_20120607_013513_outLine +BABEL_BP_105_73452_20120527_020050_inLine +BABEL_BP_105_73452_20120527_020050_outLine +BABEL_BP_105_73542_20120118_000641_inLine +BABEL_BP_105_73542_20120118_000641_outLine +BABEL_BP_105_73752_20120520_212014_inLine +BABEL_BP_105_73752_20120520_212014_outLine +BABEL_BP_105_73761_20120119_040339_inLine +BABEL_BP_105_73761_20120119_040339_outLine +BABEL_BP_105_73780_20120430_230832_inLine +BABEL_BP_105_73780_20120430_230832_outLine +BABEL_BP_105_73944_20120207_022618_inLine +BABEL_BP_105_73944_20120207_022618_outLine +BABEL_BP_105_74012_20120207_031751_inLine +BABEL_BP_105_74012_20120207_031751_outLine +BABEL_BP_105_74571_20120709_032825_inLine +BABEL_BP_105_74588_20120208_231518_inLine +BABEL_BP_105_74588_20120208_231518_outLine +BABEL_BP_105_74709_20120123_195039_inLine +BABEL_BP_105_74709_20120123_195039_outLine +BABEL_BP_105_75248_20120621_004722_inLine +BABEL_BP_105_75248_20120621_004722_outLine +BABEL_BP_105_75333_20120517_033420_inLine +BABEL_BP_105_75333_20120517_033420_outLine +BABEL_BP_105_75354_20120520_012303_inLine +BABEL_BP_105_75354_20120520_012303_outLine +BABEL_BP_105_75498_20120123_090316_inLine +BABEL_BP_105_75498_20120123_090316_outLine +BABEL_BP_105_75680_20120627_220907_inLine +BABEL_BP_105_75680_20120627_220907_outLine +BABEL_BP_105_75799_20120121_081211_inLine +BABEL_BP_105_75799_20120121_081211_outLine +BABEL_BP_105_75845_20120126_093251_inLine +BABEL_BP_105_75845_20120126_093251_outLine +BABEL_BP_105_75990_20120210_003258_inLine +BABEL_BP_105_76252_20120708_232625_inLine +BABEL_BP_105_76252_20120708_232625_outLine +BABEL_BP_105_76320_20120520_214841_outLine +BABEL_BP_105_76451_20120520_012516_inLine +BABEL_BP_105_76451_20120520_012516_outLine +BABEL_BP_105_76691_20120501_060535_inLine +BABEL_BP_105_76691_20120501_060535_outLine +BABEL_BP_105_76925_20120207_051003_inLine +BABEL_BP_105_76925_20120207_051003_outLine +BABEL_BP_105_77137_20120120_003356_inLine +BABEL_BP_105_77137_20120120_003356_outLine +BABEL_BP_105_77244_20120530_230026_inLine +BABEL_BP_105_77244_20120530_230026_outLine +BABEL_BP_105_77342_20120126_053532_inLine +BABEL_BP_105_77348_20120109_231904_inLine +BABEL_BP_105_77348_20120109_231904_outLine +BABEL_BP_105_77483_20120126_061820_outLine +BABEL_BP_105_77487_20120131_083433_inLine +BABEL_BP_105_77487_20120131_083433_outLine +BABEL_BP_105_77584_20120119_043252_inLine +BABEL_BP_105_77584_20120119_043252_outLine +BABEL_BP_105_77802_20120120_034318_inLine +BABEL_BP_105_77802_20120120_034318_outLine +BABEL_BP_105_77811_20120619_203214_inLine +BABEL_BP_105_77811_20120619_203214_outLine +BABEL_BP_105_77965_20120110_195959_inLine +BABEL_BP_105_77965_20120110_195959_outLine +BABEL_BP_105_79284_20120520_000955_inLine +BABEL_BP_105_79284_20120520_000955_outLine +BABEL_BP_105_79293_20120313_050558_inLine +BABEL_BP_105_79412_20120814_020731_inLine +BABEL_BP_105_79412_20120814_020731_outLine +BABEL_BP_105_79456_20120315_065631_outLine +BABEL_BP_105_79495_20120107_211221_inLine +BABEL_BP_105_79495_20120107_211221_outLine +BABEL_BP_105_79619_20120204_034427_outLine +BABEL_BP_105_79899_20120519_004730_inLine +BABEL_BP_105_79899_20120519_004730_outLine +BABEL_BP_105_80008_20120206_073118_inLine +BABEL_BP_105_80008_20120206_073118_outLine +BABEL_BP_105_80028_20120620_230841_inLine +BABEL_BP_105_80028_20120620_230841_outLine +BABEL_BP_105_80247_20120501_021202_inLine +BABEL_BP_105_80247_20120501_021202_outLine +BABEL_BP_105_80290_20120502_211538_inLine +BABEL_BP_105_80290_20120502_211538_outLine +BABEL_BP_105_80838_20120130_015756_inLine +BABEL_BP_105_80838_20120130_015756_outLine +BABEL_BP_105_80874_20120209_070233_inLine +BABEL_BP_105_80874_20120209_070233_outLine +BABEL_BP_105_80953_20120126_025448_inLine +BABEL_BP_105_80953_20120126_025448_outLine +BABEL_BP_105_81015_20120122_211324_inLine +BABEL_BP_105_81015_20120122_211324_outLine +BABEL_BP_105_81056_20120220_045306_inLine +BABEL_BP_105_81056_20120220_045306_outLine +BABEL_BP_105_81084_20120125_044727_outLine +BABEL_BP_105_81096_20120205_000909_inLine +BABEL_BP_105_81096_20120205_000909_outLine +BABEL_BP_105_81244_20120120_234254_inLine +BABEL_BP_105_81244_20120120_234254_outLine +BABEL_BP_105_81261_20120206_044337_inLine +BABEL_BP_105_81261_20120206_044337_outLine +BABEL_BP_105_81321_20120127_073458_inLine +BABEL_BP_105_81321_20120127_073458_outLine +BABEL_BP_105_81583_20120206_035506_inLine +BABEL_BP_105_81583_20120206_035506_outLine +BABEL_BP_105_81587_20120530_022705_inLine +BABEL_BP_105_81587_20120530_022705_outLine +BABEL_BP_105_81717_20120121_075007_inLine +BABEL_BP_105_81717_20120121_075007_outLine +BABEL_BP_105_81799_20120122_044223_inLine +BABEL_BP_105_81799_20120122_044223_outLine +BABEL_BP_105_82006_20120119_235812_inLine +BABEL_BP_105_82006_20120119_235812_outLine +BABEL_BP_105_82103_20120207_074556_outLine +BABEL_BP_105_82443_20120623_012845_inLine +BABEL_BP_105_82443_20120623_012845_outLine +BABEL_BP_105_82591_20120201_222003_outLine +BABEL_BP_105_82766_20120130_011639_inLine +BABEL_BP_105_82766_20120130_011639_outLine +BABEL_BP_105_82880_20120708_225241_inLine +BABEL_BP_105_82880_20120708_225241_outLine +BABEL_BP_105_83256_20120424_212011_inLine +BABEL_BP_105_83256_20120424_212011_outLine +BABEL_BP_105_83529_20120520_020225_inLine +BABEL_BP_105_83529_20120520_020225_outLine +BABEL_BP_105_83531_20120202_033247_inLine +BABEL_BP_105_83531_20120202_033247_outLine +BABEL_BP_105_83700_20120205_032346_inLine +BABEL_BP_105_83700_20120205_032346_outLine +BABEL_BP_105_83702_20120122_070851_inLine +BABEL_BP_105_83702_20120122_070851_outLine +BABEL_BP_105_83713_20120123_051739_inLine +BABEL_BP_105_83713_20120123_051739_outLine +BABEL_BP_105_84171_20120520_204934_outLine +BABEL_BP_105_84916_20120209_213013_outLine +BABEL_BP_105_84943_20120208_061546_inLine +BABEL_BP_105_84943_20120208_061546_outLine +BABEL_BP_105_85031_20120205_234855_inLine +BABEL_BP_105_85031_20120205_234855_outLine +BABEL_BP_105_85083_20120502_172834_inLine +BABEL_BP_105_85083_20120502_172834_outLine +BABEL_BP_105_85222_20120623_191629_inLine +BABEL_BP_105_85222_20120623_191629_outLine +BABEL_BP_105_85883_20120130_035046_inLine +BABEL_BP_105_85883_20120130_035046_outLine +BABEL_BP_105_85941_20120122_072454_inLine +BABEL_BP_105_85941_20120122_072454_outLine +BABEL_BP_105_85948_20120429_220916_inLine +BABEL_BP_105_85948_20120429_220916_outLine +BABEL_BP_105_86004_20120121_235617_inLine +BABEL_BP_105_86004_20120121_235617_outLine +BABEL_BP_105_86014_20120130_071042_inLine +BABEL_BP_105_86014_20120130_071042_outLine +BABEL_BP_105_86259_20120130_021439_inLine +BABEL_BP_105_86259_20120130_021439_outLine +BABEL_BP_105_86801_20120531_045324_outLine +BABEL_BP_105_87107_20120606_210147_inLine +BABEL_BP_105_87107_20120606_210147_outLine +BABEL_BP_105_87850_20120122_034948_inLine +BABEL_BP_105_87850_20120122_034948_outLine +BABEL_BP_105_87857_20120602_232747_inLine +BABEL_BP_105_87857_20120602_232747_outLine +BABEL_BP_105_87862_20120119_190443_inLine +BABEL_BP_105_87862_20120119_190443_outLine +BABEL_BP_105_88243_20120126_081939_inLine +BABEL_BP_105_88243_20120126_081939_outLine +BABEL_BP_105_88253_20120521_025324_inLine +BABEL_BP_105_88253_20120521_025324_outLine +BABEL_BP_105_88294_20120123_071701_inLine +BABEL_BP_105_88294_20120123_071701_outLine +BABEL_BP_105_88383_20120205_064745_inLine +BABEL_BP_105_88383_20120205_064745_outLine +BABEL_BP_105_88506_20120315_203433_inLine +BABEL_BP_105_88506_20120315_203433_outLine +BABEL_BP_105_88932_20120209_024746_inLine +BABEL_BP_105_88932_20120209_024746_outLine +BABEL_BP_105_89345_20120123_012645_inLine +BABEL_BP_105_89345_20120123_012645_outLine +BABEL_BP_105_89565_20120208_075727_outLine +BABEL_BP_105_89583_20120208_041101_inLine +BABEL_BP_105_89583_20120208_041101_outLine +BABEL_BP_105_89674_20120207_210507_inLine +BABEL_BP_105_89674_20120207_210507_outLine +BABEL_BP_105_89818_20120111_002805_inLine +BABEL_BP_105_89818_20120111_002805_outLine +BABEL_BP_105_89838_20120219_225311_inLine +BABEL_BP_105_89838_20120219_225311_outLine +BABEL_BP_105_89867_20120124_044128_inLine +BABEL_BP_105_89867_20120124_044128_outLine +BABEL_BP_105_89867_20120124_050334_inLine +BABEL_BP_105_89867_20120124_050334_outLine +BABEL_BP_105_90046_20120605_010159_inLine +BABEL_BP_105_90046_20120605_010159_outLine +BABEL_BP_105_90055_20120205_015425_inLine +BABEL_BP_105_90055_20120205_015425_outLine +BABEL_BP_105_90490_20120107_011745_inLine +BABEL_BP_105_90490_20120107_011745_outLine +BABEL_BP_105_90559_20120601_213056_inLine +BABEL_BP_105_90559_20120601_213056_outLine +BABEL_BP_105_90577_20120106_010938_inLine +BABEL_BP_105_90577_20120106_010938_outLine +BABEL_BP_105_90730_20120127_001133_inLine +BABEL_BP_105_90730_20120127_001133_outLine +BABEL_BP_105_90819_20120130_023600_inLine +BABEL_BP_105_90819_20120130_023600_outLine +BABEL_BP_105_90951_20120127_014240_inLine +BABEL_BP_105_90951_20120127_014240_outLine +BABEL_BP_105_91002_20120517_195202_inLine +BABEL_BP_105_91002_20120517_195202_outLine +BABEL_BP_105_91358_20120614_031106_inLine +BABEL_BP_105_91358_20120614_031107_outLine +BABEL_BP_105_91386_20120625_201849_inLine +BABEL_BP_105_91386_20120625_201849_outLine +BABEL_BP_105_91703_20120126_003014_inLine +BABEL_BP_105_91703_20120126_003014_outLine +BABEL_BP_105_91975_20120622_002430_inLine +BABEL_BP_105_91975_20120622_002430_outLine +BABEL_BP_105_91975_20120622_004757_inLine +BABEL_BP_105_91975_20120622_004757_outLine +BABEL_BP_105_92252_20120119_001340_inLine +BABEL_BP_105_92252_20120119_001340_outLine +BABEL_BP_105_92407_20120206_090518_inLine +BABEL_BP_105_92407_20120206_090518_outLine +BABEL_BP_105_92628_20120202_065713_inLine +BABEL_BP_105_92628_20120202_065713_outLine +BABEL_BP_105_92752_20120131_065611_inLine +BABEL_BP_105_92752_20120131_065611_outLine +BABEL_BP_105_92789_20120208_092935_inLine +BABEL_BP_105_92789_20120208_092935_outLine +BABEL_BP_105_92800_20120204_062855_inLine +BABEL_BP_105_92800_20120204_062855_outLine +BABEL_BP_105_93004_20120203_214508_inLine +BABEL_BP_105_93004_20120203_214508_outLine +BABEL_BP_105_93044_20120530_205229_inLine +BABEL_BP_105_93044_20120530_205229_outLine +BABEL_BP_105_93044_20120530_210446_inLine +BABEL_BP_105_93044_20120530_210446_outLine +BABEL_BP_105_93314_20120204_045440_outLine +BABEL_BP_105_93436_20120605_021136_outLine +BABEL_BP_105_93541_20120207_220607_inLine +BABEL_BP_105_93541_20120207_220607_outLine +BABEL_BP_105_93637_20120208_014420_inLine +BABEL_BP_105_93637_20120208_014420_outLine +BABEL_BP_105_94149_20120528_213123_inLine +BABEL_BP_105_94149_20120528_213123_outLine +BABEL_BP_105_94162_20120121_020746_inLine +BABEL_BP_105_94162_20120121_020746_outLine +BABEL_BP_105_94168_20120127_071423_inLine +BABEL_BP_105_94168_20120127_071423_outLine +BABEL_BP_105_94223_20120813_060431_inLine +BABEL_BP_105_94223_20120813_060431_outLine +BABEL_BP_105_94226_20120126_200629_inLine +BABEL_BP_105_94226_20120126_200629_outLine +BABEL_BP_105_94235_20120131_090132_inLine +BABEL_BP_105_94235_20120131_090132_outLine +BABEL_BP_105_94542_20120503_002707_inLine +BABEL_BP_105_94542_20120503_002707_outLine +BABEL_BP_105_94694_20120127_060811_inLine +BABEL_BP_105_94694_20120127_060811_outLine +BABEL_BP_105_95034_20120130_072201_outLine +BABEL_BP_105_95533_20120527_024409_inLine +BABEL_BP_105_95533_20120527_024409_outLine +BABEL_BP_105_95650_20120110_225916_inLine +BABEL_BP_105_95650_20120110_225916_outLine +BABEL_BP_105_95736_20120128_235257_inLine +BABEL_BP_105_95736_20120128_235257_outLine +BABEL_BP_105_95815_20120201_065914_inLine +BABEL_BP_105_95815_20120201_065914_outLine +BABEL_BP_105_96108_20120201_013051_inLine +BABEL_BP_105_96108_20120201_013051_outLine +BABEL_BP_105_96302_20120518_220402_inLine +BABEL_BP_105_96302_20120518_220402_outLine +BABEL_BP_105_96438_20120208_042745_inLine +BABEL_BP_105_96438_20120208_042745_outLine +BABEL_BP_105_97004_20120628_024047_inLine +BABEL_BP_105_97260_20120128_060528_inLine +BABEL_BP_105_97260_20120128_060528_outLine +BABEL_BP_105_97274_20120202_091803_inLine +BABEL_BP_105_97274_20120202_091803_outLine +BABEL_BP_105_97298_20120706_190045_inLine +BABEL_BP_105_97298_20120706_190045_outLine +BABEL_BP_105_97318_20120606_000332_inLine +BABEL_BP_105_97318_20120606_000332_outLine +BABEL_BP_105_97405_20120128_051654_outLine +BABEL_BP_105_97629_20120606_230655_inLine +BABEL_BP_105_97629_20120606_230655_outLine +BABEL_BP_105_97635_20120519_194730_inLine +BABEL_BP_105_97635_20120519_194730_outLine +BABEL_BP_105_97650_20120124_023530_inLine +BABEL_BP_105_97650_20120124_023530_outLine +BABEL_BP_105_97699_20120619_014656_inLine +BABEL_BP_105_97699_20120619_014656_outLine +BABEL_BP_105_97760_20120503_205622_inLine +BABEL_BP_105_97760_20120503_205622_outLine +BABEL_BP_105_97797_20120130_025511_inLine +BABEL_BP_105_97797_20120130_025511_outLine +BABEL_BP_105_97941_20120123_224142_inLine +BABEL_BP_105_97941_20120123_224142_outLine +BABEL_BP_105_98279_20120121_021104_inLine +BABEL_BP_105_98279_20120121_021104_outLine +BABEL_BP_105_98402_20120518_004507_inLine +BABEL_BP_105_98402_20120518_004507_outLine +BABEL_BP_105_98476_20120314_082638_outLine +BABEL_BP_105_99414_20120618_212729_inLine +BABEL_BP_105_99414_20120618_212729_outLine +BABEL_BP_105_99514_20120126_232257_inLine +BABEL_BP_105_99514_20120126_232257_outLine +BABEL_BP_105_99694_20120202_034424_inLine +BABEL_BP_105_99694_20120202_034425_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list b/egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list new file mode 100644 index 00000000000..18efca5b37c --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list @@ -0,0 +1,128 @@ +BABEL_BP_105_16257_20120709_025101_inLine +BABEL_BP_105_16257_20120709_025101_outLine +BABEL_BP_105_17013_20120314_031626_inLine +BABEL_BP_105_17013_20120314_031626_outLine +BABEL_BP_105_18672_20120131_015941_inLine +BABEL_BP_105_18672_20120131_015941_outLine +BABEL_BP_105_18716_20120218_070145_inLine +BABEL_BP_105_20347_20120504_231529_inLine +BABEL_BP_105_20347_20120504_232320_inLine +BABEL_BP_105_20471_20120125_013916_inLine +BABEL_BP_105_20471_20120125_013916_outLine +BABEL_BP_105_20471_20120125_015348_inLine +BABEL_BP_105_20471_20120125_015348_outLine +BABEL_BP_105_21370_20120605_185740_inLine +BABEL_BP_105_21370_20120605_185740_outLine +BABEL_BP_105_22272_20120430_191440_inLine +BABEL_BP_105_22272_20120430_191440_outLine +BABEL_BP_105_22408_20120131_202129_inLine +BABEL_BP_105_22408_20120131_202129_outLine +BABEL_BP_105_22408_20120131_210558_inLine +BABEL_BP_105_22408_20120131_210558_outLine +BABEL_BP_105_22898_20120129_040904_inLine +BABEL_BP_105_22898_20120129_040904_outLine +BABEL_BP_105_23629_20120503_212942_inLine +BABEL_BP_105_23629_20120503_212942_outLine +BABEL_BP_105_24608_20120111_023000_inLine +BABEL_BP_105_24608_20120111_023000_outLine +BABEL_BP_105_26164_20120627_210408_inLine +BABEL_BP_105_26164_20120627_210408_outLine +BABEL_BP_105_26644_20120517_212756_inLine +BABEL_BP_105_26644_20120517_212756_outLine +BABEL_BP_105_27724_20120130_023439_inLine +BABEL_BP_105_27724_20120130_023439_outLine +BABEL_BP_105_29421_20120127_235240_inLine +BABEL_BP_105_29421_20120127_235240_outLine +BABEL_BP_105_31460_20120603_224411_inLine +BABEL_BP_105_31460_20120603_224411_outLine +BABEL_BP_105_32663_20120709_040652_inLine +BABEL_BP_105_32663_20120709_040652_outLine +BABEL_BP_105_32818_20120530_032934_inLine +BABEL_BP_105_32818_20120530_032935_outLine +BABEL_BP_105_34590_20120829_000220_inLine +BABEL_BP_105_34590_20120829_000220_outLine +BABEL_BP_105_35329_20120203_051310_inLine +BABEL_BP_105_35329_20120203_051310_outLine +BABEL_BP_105_35576_20120530_184018_inLine +BABEL_BP_105_35576_20120530_184018_outLine +BABEL_BP_105_39066_20120206_073804_inLine +BABEL_BP_105_39066_20120206_073804_outLine +BABEL_BP_105_39114_20120516_035141_inLine +BABEL_BP_105_39114_20120516_035141_outLine +BABEL_BP_105_42145_20120210_004555_inLine +BABEL_BP_105_42145_20120210_004555_outLine +BABEL_BP_105_43317_20120516_181202_inLine +BABEL_BP_105_43317_20120516_181202_outLine +BABEL_BP_105_44209_20120130_072808_inLine +BABEL_BP_105_44209_20120130_072808_outLine +BABEL_BP_105_44500_20120531_224758_inLine +BABEL_BP_105_44500_20120531_224758_outLine +BABEL_BP_105_45511_20120601_001634_inLine +BABEL_BP_105_45511_20120601_001634_outLine +BABEL_BP_105_45512_20120208_063419_inLine +BABEL_BP_105_45512_20120208_063419_outLine +BABEL_BP_105_47429_20120512_193242_inLine +BABEL_BP_105_47429_20120512_193242_outLine +BABEL_BP_105_47823_20120209_005455_inLine +BABEL_BP_105_47823_20120209_005455_outLine +BABEL_BP_105_49186_20120627_224343_inLine +BABEL_BP_105_49186_20120627_224343_outLine +BABEL_BP_105_50416_20120120_030634_inLine +BABEL_BP_105_50416_20120120_030634_outLine +BABEL_BP_105_50416_20120120_032209_inLine +BABEL_BP_105_50416_20120120_032209_outLine +BABEL_BP_105_51149_20120517_022710_inLine +BABEL_BP_105_51149_20120517_022710_outLine +BABEL_BP_105_53352_20120313_025305_inLine +BABEL_BP_105_53352_20120313_025305_outLine +BABEL_BP_105_55355_20120602_030100_inLine +BABEL_BP_105_55355_20120602_030100_outLine +BABEL_BP_105_56039_20120207_012118_inLine +BABEL_BP_105_56039_20120207_012118_outLine +BABEL_BP_105_60995_20120708_212511_inLine +BABEL_BP_105_60995_20120708_212511_outLine +BABEL_BP_105_61750_20120430_182721_inLine +BABEL_BP_105_61750_20120430_182721_outLine +BABEL_BP_105_62286_20120206_001738_inLine +BABEL_BP_105_62286_20120206_001739_outLine +BABEL_BP_105_62589_20120208_070910_inLine +BABEL_BP_105_62589_20120208_070910_outLine +BABEL_BP_105_63116_20120210_011436_inLine +BABEL_BP_105_63116_20120210_011436_outLine +BABEL_BP_105_65069_20120205_053459_inLine +BABEL_BP_105_65069_20120205_053459_outLine +BABEL_BP_105_65783_20120206_225414_inLine +BABEL_BP_105_65783_20120206_225414_outLine +BABEL_BP_105_69764_20120209_041231_inLine +BABEL_BP_105_69764_20120209_041231_outLine +BABEL_BP_105_71739_20120422_024509_inLine +BABEL_BP_105_71739_20120422_024509_outLine +BABEL_BP_105_71741_20120314_230737_inLine +BABEL_BP_105_71741_20120314_230737_outLine +BABEL_BP_105_72718_20120525_180835_inLine +BABEL_BP_105_72718_20120525_180835_outLine +BABEL_BP_105_73059_20120520_222710_inLine +BABEL_BP_105_73059_20120520_222710_outLine +BABEL_BP_105_73452_20120527_020050_inLine +BABEL_BP_105_73452_20120527_020050_outLine +BABEL_BP_105_75354_20120520_012303_inLine +BABEL_BP_105_75354_20120520_012303_outLine +BABEL_BP_105_80247_20120501_021202_inLine +BABEL_BP_105_80247_20120501_021202_outLine +BABEL_BP_105_82591_20120201_222003_outLine +BABEL_BP_105_83256_20120424_212011_inLine +BABEL_BP_105_83256_20120424_212011_outLine +BABEL_BP_105_83702_20120122_070851_inLine +BABEL_BP_105_83702_20120122_070851_outLine +BABEL_BP_105_83713_20120123_051739_inLine +BABEL_BP_105_83713_20120123_051739_outLine +BABEL_BP_105_90046_20120605_010159_inLine +BABEL_BP_105_90046_20120605_010159_outLine +BABEL_BP_105_92800_20120204_062855_inLine +BABEL_BP_105_92800_20120204_062855_outLine +BABEL_BP_105_94542_20120503_002707_inLine +BABEL_BP_105_94542_20120503_002707_outLine +BABEL_BP_105_96438_20120208_042745_inLine +BABEL_BP_105_96438_20120208_042745_outLine +BABEL_BP_105_97760_20120503_205622_inLine +BABEL_BP_105_97760_20120503_205622_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/dev.list b/egs/babel/s5d/conf/lists/106-tagalog/dev.list new file mode 100644 index 00000000000..09f159f6574 --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/dev.list @@ -0,0 +1,146 @@ +BABEL_BP_106_05343_20120411_001147_inLine +BABEL_BP_106_05343_20120411_001147_outLine +BABEL_BP_106_11690_20120315_042036_inLine +BABEL_BP_106_11690_20120315_042036_outLine +BABEL_BP_106_11694_20120315_051701_inLine +BABEL_BP_106_11694_20120315_051701_outLine +BABEL_BP_106_11915_20120301_192127_outLine +BABEL_BP_106_11915_20120301_193624_outLine +BABEL_BP_106_14475_20120317_195829_inLine +BABEL_BP_106_14475_20120317_195829_outLine +BABEL_BP_106_16883_20120219_191154_inLine +BABEL_BP_106_16883_20120219_191154_outLine +BABEL_BP_106_16883_20120219_191914_inLine +BABEL_BP_106_16883_20120219_191914_outLine +BABEL_BP_106_17948_20120305_020044_inLine +BABEL_BP_106_17948_20120305_020044_outLine +BABEL_BP_106_19012_20120405_191535_inLine +BABEL_BP_106_19012_20120405_191535_outLine +BABEL_BP_106_24379_20120303_015051_inLine +BABEL_BP_106_24379_20120303_015051_outLine +BABEL_BP_106_25035_20120213_014750_inLine +BABEL_BP_106_25035_20120213_014750_outLine +BABEL_BP_106_28260_20120210_165445_inLine +BABEL_BP_106_28260_20120210_165445_outLine +BABEL_BP_106_28740_20120131_002533_inLine +BABEL_BP_106_28768_20120405_170206_inLine +BABEL_BP_106_28768_20120405_170206_outLine +BABEL_BP_106_28768_20120405_172419_inLine +BABEL_BP_106_28768_20120405_172419_outLine +BABEL_BP_106_29268_20120501_030651_inLine +BABEL_BP_106_29268_20120501_032051_inLine +BABEL_BP_106_29268_20120501_033313_inLine +BABEL_BP_106_30554_20120301_192050_inLine +BABEL_BP_106_30554_20120301_192050_outLine +BABEL_BP_106_30715_20120501_014624_inLine +BABEL_BP_106_31635_20120428_220813_inLine +BABEL_BP_106_32642_20120318_154011_inLine +BABEL_BP_106_32642_20120318_154011_outLine +BABEL_BP_106_35896_20120302_123550_inLine +BABEL_BP_106_36490_20120405_193235_inLine +BABEL_BP_106_36490_20120405_193235_outLine +BABEL_BP_106_40168_20120208_173832_outLine +BABEL_BP_106_40168_20120208_175258_outLine +BABEL_BP_106_42383_20120331_140217_inLine +BABEL_BP_106_42383_20120331_140217_outLine +BABEL_BP_106_42766_20120217_003639_inLine +BABEL_BP_106_42766_20120217_003639_outLine +BABEL_BP_106_47845_20120405_122139_inLine +BABEL_BP_106_47845_20120405_122139_outLine +BABEL_BP_106_47845_20120405_123415_inLine +BABEL_BP_106_47845_20120405_123415_outLine +BABEL_BP_106_48477_20120304_224818_inLine +BABEL_BP_106_48477_20120304_224818_outLine +BABEL_BP_106_53544_20120314_004506_inLine +BABEL_BP_106_53544_20120314_004506_outLine +BABEL_BP_106_53544_20120314_010454_inLine +BABEL_BP_106_53544_20120314_010454_outLine +BABEL_BP_106_53982_20120224_233136_inLine +BABEL_BP_106_53982_20120224_233136_outLine +BABEL_BP_106_57422_20120227_015422_inLine +BABEL_BP_106_57422_20120227_015422_outLine +BABEL_BP_106_58413_20120304_005849_inLine +BABEL_BP_106_58413_20120304_005849_outLine +BABEL_BP_106_58737_20120327_234027_inLine +BABEL_BP_106_58737_20120327_234027_outLine +BABEL_BP_106_59500_20120327_192807_inLine +BABEL_BP_106_59500_20120327_192807_outLine +BABEL_BP_106_61385_20120227_200049_inLine +BABEL_BP_106_61385_20120227_200049_outLine +BABEL_BP_106_65580_20120221_205300_inLine +BABEL_BP_106_65580_20120221_205300_outLine +BABEL_BP_106_65580_20120221_210222_inLine +BABEL_BP_106_65580_20120221_210222_outLine +BABEL_BP_106_66026_20120511_112437_inLine +BABEL_BP_106_66026_20120511_114127_inLine +BABEL_BP_106_66668_20120130_000343_inLine +BABEL_BP_106_66668_20120130_000343_outLine +BABEL_BP_106_66668_20120130_002819_inLine +BABEL_BP_106_66668_20120130_002819_outLine +BABEL_BP_106_68362_20120403_123939_inLine +BABEL_BP_106_68362_20120403_123939_outLine +BABEL_BP_106_69050_20120203_173053_inLine +BABEL_BP_106_69050_20120203_173053_outLine +BABEL_BP_106_72297_20120405_193507_inLine +BABEL_BP_106_72297_20120405_193507_outLine +BABEL_BP_106_72297_20120405_194943_inLine +BABEL_BP_106_72297_20120405_194943_outLine +BABEL_BP_106_73782_20120313_012825_inLine +BABEL_BP_106_73782_20120313_012825_outLine +BABEL_BP_106_75333_20120329_172440_inLine +BABEL_BP_106_75333_20120329_172440_outLine +BABEL_BP_106_75871_20120127_162002_inLine +BABEL_BP_106_75871_20120127_162002_outLine +BABEL_BP_106_76341_20120219_170650_inLine +BABEL_BP_106_76341_20120219_170650_outLine +BABEL_BP_106_76341_20120219_173824_inLine +BABEL_BP_106_76341_20120219_173824_outLine +BABEL_BP_106_78572_20120304_135853_inLine +BABEL_BP_106_79570_20120302_141553_outLine +BABEL_BP_106_79632_20120309_173547_inLine +BABEL_BP_106_79632_20120309_173547_outLine +BABEL_BP_106_79698_20120315_223952_inLine +BABEL_BP_106_79698_20120315_230838_inLine +BABEL_BP_106_79698_20120315_230838_outLine +BABEL_BP_106_81587_20120309_163209_inLine +BABEL_BP_106_81587_20120309_163209_outLine +BABEL_BP_106_83255_20120530_214353_inLine +BABEL_BP_106_83891_20120327_163405_inLine +BABEL_BP_106_83891_20120327_163405_outLine +BABEL_BP_106_85617_20120225_212818_inLine +BABEL_BP_106_85617_20120225_212818_outLine +BABEL_BP_106_90180_20120317_002331_inLine +BABEL_BP_106_90180_20120317_002331_outLine +BABEL_BP_106_90577_20120111_201742_inLine +BABEL_BP_106_90577_20120111_201742_outLine +BABEL_BP_106_90764_20120131_140951_inLine +BABEL_BP_106_90764_20120131_140951_outLine +BABEL_BP_106_90890_20120322_020338_inLine +BABEL_BP_106_90890_20120322_020338_outLine +BABEL_BP_106_92820_20120318_144230_inLine +BABEL_BP_106_92820_20120318_144230_outLine +BABEL_BP_106_93000_20120227_164805_inLine +BABEL_BP_106_93000_20120227_164805_outLine +BABEL_BP_106_94149_20120205_211427_inLine +BABEL_BP_106_94149_20120205_211427_outLine +BABEL_BP_106_94244_20120405_200522_inLine +BABEL_BP_106_94244_20120405_200522_outLine +BABEL_BP_106_94542_20120305_045905_inLine +BABEL_BP_106_94542_20120305_045905_outLine +BABEL_BP_106_95589_20120225_030746_inLine +BABEL_BP_106_95589_20120225_032340_inLine +BABEL_BP_106_95589_20120225_032340_outLine +BABEL_BP_106_96347_20120422_163204_inLine +BABEL_BP_106_96347_20120422_163808_inLine +BABEL_BP_106_97318_20120405_141943_inLine +BABEL_BP_106_97318_20120405_141943_outLine +BABEL_BP_106_97629_20120227_180122_inLine +BABEL_BP_106_97629_20120227_180122_outLine +BABEL_BP_106_97797_20120224_210655_inLine +BABEL_BP_106_97797_20120224_210655_outLine +BABEL_BP_106_97797_20120224_211935_inLine +BABEL_BP_106_97797_20120224_211935_outLine +BABEL_BP_106_98086_20120228_172810_inLine +BABEL_BP_106_98086_20120228_172810_outLine +BABEL_BP_106_98640_20120317_040411_inLine +BABEL_BP_106_98640_20120317_040412_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/eval.list b/egs/babel/s5d/conf/lists/106-tagalog/eval.list new file mode 100644 index 00000000000..b2c3042f61a --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/eval.list @@ -0,0 +1,241 @@ +BABEL_BP_106_00590_20120401_144745_inLine +BABEL_BP_106_00590_20120401_144745_outLine +BABEL_BP_106_05737_20120317_201434_inLine +BABEL_BP_106_05737_20120317_201434_outLine +BABEL_BP_106_08336_20120308_213905_inLine +BABEL_BP_106_08336_20120308_231058_inLine +BABEL_BP_106_08336_20120308_231812_inLine +BABEL_BP_106_08336_20120308_232516_inLine +BABEL_BP_106_08336_20120308_234130_inLine +BABEL_BP_106_09067_20120304_174532_inLine +BABEL_BP_106_09067_20120304_174532_outLine +BABEL_BP_106_10033_20120428_005441_inLine +BABEL_BP_106_10279_20120525_160616_inLine +BABEL_BP_106_11868_20120403_204010_inLine +BABEL_BP_106_11868_20120403_204010_outLine +BABEL_BP_106_12317_20120324_045054_inLine +BABEL_BP_106_12317_20120324_045054_outLine +BABEL_BP_106_12631_20120202_190009_inLine +BABEL_BP_106_12631_20120202_190009_outLine +BABEL_BP_106_13635_20120319_005136_inLine +BABEL_BP_106_13635_20120319_005136_outLine +BABEL_BP_106_13715_20120530_194000_inLine +BABEL_BP_106_13878_20120517_133306_inLine +BABEL_BP_106_14899_20120519_174015_inLine +BABEL_BP_106_14915_20120525_195519_inLine +BABEL_BP_106_14915_20120525_201940_inLine +BABEL_BP_106_14915_20120525_235128_inLine +BABEL_BP_106_18730_20120322_025159_inLine +BABEL_BP_106_18730_20120322_025159_outLine +BABEL_BP_106_18991_20120208_210053_inLine +BABEL_BP_106_18991_20120208_210053_outLine +BABEL_BP_106_20213_20120417_130013_inLine +BABEL_BP_106_20213_20120417_130013_outLine +BABEL_BP_106_20307_20120409_012136_inLine +BABEL_BP_106_20307_20120409_012136_outLine +BABEL_BP_106_20462_20120217_160808_inLine +BABEL_BP_106_20462_20120217_160808_outLine +BABEL_BP_106_20462_20120217_164536_inLine +BABEL_BP_106_20462_20120217_164536_outLine +BABEL_BP_106_20518_20120525_181959_inLine +BABEL_BP_106_20518_20120525_182614_inLine +BABEL_BP_106_20518_20120525_183956_inLine +BABEL_BP_106_20685_20120323_031815_inLine +BABEL_BP_106_20685_20120323_031815_outLine +BABEL_BP_106_21634_20120530_182237_inLine +BABEL_BP_106_22401_20120321_012046_inLine +BABEL_BP_106_22401_20120321_012046_outLine +BABEL_BP_106_22401_20120321_013515_inLine +BABEL_BP_106_22401_20120321_013515_outLine +BABEL_BP_106_22566_20120318_130741_inLine +BABEL_BP_106_22566_20120318_130741_outLine +BABEL_BP_106_25041_20120318_183127_inLine +BABEL_BP_106_25041_20120318_183127_outLine +BABEL_BP_106_25072_20120307_172016_inLine +BABEL_BP_106_25072_20120307_172016_outLine +BABEL_BP_106_25072_20120307_173008_inLine +BABEL_BP_106_25072_20120307_173008_outLine +BABEL_BP_106_27645_20120309_195238_inLine +BABEL_BP_106_27645_20120309_195238_outLine +BABEL_BP_106_27825_20120525_165434_inLine +BABEL_BP_106_29259_20120525_174551_inLine +BABEL_BP_106_30168_20120417_211215_inLine +BABEL_BP_106_30722_20120228_173748_inLine +BABEL_BP_106_30722_20120228_175207_inLine +BABEL_BP_106_30722_20120228_175207_outLine +BABEL_BP_106_30722_20120228_180341_inLine +BABEL_BP_106_30722_20120228_180341_outLine +BABEL_BP_106_31350_20120305_132208_inLine +BABEL_BP_106_31451_20120430_160735_inLine +BABEL_BP_106_31614_20120315_181514_inLine +BABEL_BP_106_31614_20120315_181514_outLine +BABEL_BP_106_32132_20120604_141124_inLine +BABEL_BP_106_34732_20120504_011240_inLine +BABEL_BP_106_34732_20120504_011240_outLine +BABEL_BP_106_36828_20120413_195545_inLine +BABEL_BP_106_36828_20120413_195545_outLine +BABEL_BP_106_37940_20120509_134420_inLine +BABEL_BP_106_38524_20120531_115250_inLine +BABEL_BP_106_40385_20120316_121848_inLine +BABEL_BP_106_40385_20120316_123312_inLine +BABEL_BP_106_41146_20120127_174843_outLine +BABEL_BP_106_41456_20120417_215741_inLine +BABEL_BP_106_41456_20120417_215741_outLine +BABEL_BP_106_41471_20120227_013419_inLine +BABEL_BP_106_41471_20120227_013419_outLine +BABEL_BP_106_41471_20120227_015846_inLine +BABEL_BP_106_41471_20120227_015846_outLine +BABEL_BP_106_41797_20120418_010121_inLine +BABEL_BP_106_41797_20120418_010121_outLine +BABEL_BP_106_44500_20120307_165936_inLine +BABEL_BP_106_44500_20120307_165936_outLine +BABEL_BP_106_45570_20120411_165807_inLine +BABEL_BP_106_45570_20120411_165807_outLine +BABEL_BP_106_45929_20120524_212453_inLine +BABEL_BP_106_45929_20120524_220624_inLine +BABEL_BP_106_46409_20120213_193348_outLine +BABEL_BP_106_48281_20120208_172243_inLine +BABEL_BP_106_48281_20120208_172243_outLine +BABEL_BP_106_48559_20120417_130856_inLine +BABEL_BP_106_48559_20120417_130856_outLine +BABEL_BP_106_48559_20120417_140813_inLine +BABEL_BP_106_48559_20120417_140813_outLine +BABEL_BP_106_48645_20120304_124310_inLine +BABEL_BP_106_48645_20120304_124310_outLine +BABEL_BP_106_48727_20120530_170050_inLine +BABEL_BP_106_49351_20120315_214910_inLine +BABEL_BP_106_49351_20120315_214910_outLine +BABEL_BP_106_50112_20120327_165821_inLine +BABEL_BP_106_50112_20120327_165821_outLine +BABEL_BP_106_50757_20120519_142209_inLine +BABEL_BP_106_53278_20120304_182746_inLine +BABEL_BP_106_53278_20120304_182746_outLine +BABEL_BP_106_54285_20120304_170422_inLine +BABEL_BP_106_54285_20120304_170422_outLine +BABEL_BP_106_54339_20120220_233532_inLine +BABEL_BP_106_54339_20120220_233532_outLine +BABEL_BP_106_54339_20120220_235208_inLine +BABEL_BP_106_54339_20120220_235208_outLine +BABEL_BP_106_56648_20120221_204115_inLine +BABEL_BP_106_56648_20120221_204115_outLine +BABEL_BP_106_59454_20120302_005653_inLine +BABEL_BP_106_59454_20120302_013702_inLine +BABEL_BP_106_59454_20120302_013702_outLine +BABEL_BP_106_59736_20120517_215232_inLine +BABEL_BP_106_60064_20120405_122048_inLine +BABEL_BP_106_60064_20120405_122049_outLine +BABEL_BP_106_60183_20120227_184542_outLine +BABEL_BP_106_60183_20120227_185937_outLine +BABEL_BP_106_61408_20120313_190656_inLine +BABEL_BP_106_61408_20120313_190656_outLine +BABEL_BP_106_61408_20120313_191850_inLine +BABEL_BP_106_61408_20120313_191850_outLine +BABEL_BP_106_61762_20120210_205954_inLine +BABEL_BP_106_61762_20120210_205954_outLine +BABEL_BP_106_62589_20120526_194818_inLine +BABEL_BP_106_62710_20120226_042014_inLine +BABEL_BP_106_62710_20120226_042014_outLine +BABEL_BP_106_62710_20120226_043927_inLine +BABEL_BP_106_62710_20120226_043927_outLine +BABEL_BP_106_63116_20120301_233405_inLine +BABEL_BP_106_63116_20120301_233405_outLine +BABEL_BP_106_64178_20120512_001535_inLine +BABEL_BP_106_64178_20120512_001535_outLine +BABEL_BP_106_64300_20120517_211937_inLine +BABEL_BP_106_64300_20120517_213314_inLine +BABEL_BP_106_65837_20120314_013343_inLine +BABEL_BP_106_65837_20120314_013343_outLine +BABEL_BP_106_69871_20120308_190521_inLine +BABEL_BP_106_69871_20120308_191814_inLine +BABEL_BP_106_69871_20120308_191814_outLine +BABEL_BP_106_70323_20120315_214239_inLine +BABEL_BP_106_70323_20120315_214239_outLine +BABEL_BP_106_70530_20120315_171715_inLine +BABEL_BP_106_70530_20120315_171715_outLine +BABEL_BP_106_70773_20120331_201706_inLine +BABEL_BP_106_70773_20120331_201706_outLine +BABEL_BP_106_72647_20120314_140705_inLine +BABEL_BP_106_72647_20120314_140705_outLine +BABEL_BP_106_72908_20120301_214516_inLine +BABEL_BP_106_72908_20120301_214516_outLine +BABEL_BP_106_73050_20120229_190728_inLine +BABEL_BP_106_73050_20120229_190728_outLine +BABEL_BP_106_73050_20120229_192106_inLine +BABEL_BP_106_73050_20120229_192106_outLine +BABEL_BP_106_73122_20120131_151743_inLine +BABEL_BP_106_73122_20120131_151743_outLine +BABEL_BP_106_73205_20120131_011807_inLine +BABEL_BP_106_73205_20120131_011807_outLine +BABEL_BP_106_74940_20120324_000134_inLine +BABEL_BP_106_74940_20120324_000134_outLine +BABEL_BP_106_78487_20120228_180247_inLine +BABEL_BP_106_78487_20120228_184448_inLine +BABEL_BP_106_78487_20120228_185132_inLine +BABEL_BP_106_78487_20120229_165653_inLine +BABEL_BP_106_78487_20120229_180156_inLine +BABEL_BP_106_82007_20120511_234807_inLine +BABEL_BP_106_82007_20120511_234807_outLine +BABEL_BP_106_83012_20120227_002142_inLine +BABEL_BP_106_83012_20120227_002142_outLine +BABEL_BP_106_83012_20120227_004851_inLine +BABEL_BP_106_83012_20120227_004851_outLine +BABEL_BP_106_83053_20120418_185830_inLine +BABEL_BP_106_85719_20120315_175358_inLine +BABEL_BP_106_85719_20120315_175358_outLine +BABEL_BP_106_85883_20120221_204813_inLine +BABEL_BP_106_85883_20120221_204813_outLine +BABEL_BP_106_85883_20120221_210017_inLine +BABEL_BP_106_85883_20120221_210017_outLine +BABEL_BP_106_86211_20120323_003846_inLine +BABEL_BP_106_86211_20120323_003846_outLine +BABEL_BP_106_86339_20120517_211109_inLine +BABEL_BP_106_86900_20120129_013513_inLine +BABEL_BP_106_86900_20120129_013513_outLine +BABEL_BP_106_86998_20120316_235214_inLine +BABEL_BP_106_86998_20120316_235214_outLine +BABEL_BP_106_88932_20120210_024536_inLine +BABEL_BP_106_88932_20120210_024536_outLine +BABEL_BP_106_88932_20120210_031316_inLine +BABEL_BP_106_88932_20120210_031316_outLine +BABEL_BP_106_89619_20120216_201137_inLine +BABEL_BP_106_89619_20120216_201137_outLine +BABEL_BP_106_89619_20120216_202208_inLine +BABEL_BP_106_89619_20120216_202208_outLine +BABEL_BP_106_89674_20120128_172359_inLine +BABEL_BP_106_89674_20120128_172359_outLine +BABEL_BP_106_89674_20120128_175646_inLine +BABEL_BP_106_89674_20120128_175646_outLine +BABEL_BP_106_89818_20120323_031837_inLine +BABEL_BP_106_89818_20120323_033337_inLine +BABEL_BP_106_89818_20120323_033337_outLine +BABEL_BP_106_90046_20120316_225047_inLine +BABEL_BP_106_90046_20120316_225047_outLine +BABEL_BP_106_90559_20120404_191014_inLine +BABEL_BP_106_90559_20120404_191014_outLine +BABEL_BP_106_91007_20120405_174537_inLine +BABEL_BP_106_91007_20120405_174537_outLine +BABEL_BP_106_92072_20120315_162353_inLine +BABEL_BP_106_92072_20120315_162353_outLine +BABEL_BP_106_92094_20120519_171316_inLine +BABEL_BP_106_92328_20120318_183827_inLine +BABEL_BP_106_92328_20120318_183827_outLine +BABEL_BP_106_93506_20120501_114215_inLine +BABEL_BP_106_94696_20120405_132036_inLine +BABEL_BP_106_94696_20120405_132036_outLine +BABEL_BP_106_94696_20120405_132924_inLine +BABEL_BP_106_94696_20120405_132924_outLine +BABEL_BP_106_95225_20120323_234548_inLine +BABEL_BP_106_95225_20120323_234548_outLine +BABEL_BP_106_95572_20120501_120940_inLine +BABEL_BP_106_95637_20120210_215628_inLine +BABEL_BP_106_95637_20120210_215628_outLine +BABEL_BP_106_97052_20120315_205207_inLine +BABEL_BP_106_97052_20120315_205207_outLine +BABEL_BP_106_97941_20120228_153714_inLine +BABEL_BP_106_97941_20120228_155826_inLine +BABEL_BP_106_98099_20120224_234716_inLine +BABEL_BP_106_98099_20120224_234716_outLine +BABEL_BP_106_99503_20120328_011545_inLine +BABEL_BP_106_99503_20120328_011545_outLine +BABEL_BP_106_99764_20120309_004852_inLine +BABEL_BP_106_99764_20120309_004852_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list b/egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list new file mode 100644 index 00000000000..690fec715fb --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list @@ -0,0 +1,69 @@ +BABEL_BP_106_11868_20120403_204010_inLine +BABEL_BP_106_11868_20120403_204010_outLine +BABEL_BP_106_18730_20120322_025159_inLine +BABEL_BP_106_18730_20120322_025159_outLine +BABEL_BP_106_18991_20120208_210053_inLine +BABEL_BP_106_18991_20120208_210053_outLine +BABEL_BP_106_20213_20120417_130013_inLine +BABEL_BP_106_20213_20120417_130013_outLine +BABEL_BP_106_20307_20120409_012136_inLine +BABEL_BP_106_20307_20120409_012136_outLine +BABEL_BP_106_20685_20120323_031815_inLine +BABEL_BP_106_20685_20120323_031815_outLine +BABEL_BP_106_22401_20120321_012046_inLine +BABEL_BP_106_22401_20120321_012046_outLine +BABEL_BP_106_22401_20120321_013515_inLine +BABEL_BP_106_22401_20120321_013515_outLine +BABEL_BP_106_22566_20120318_130741_inLine +BABEL_BP_106_22566_20120318_130741_outLine +BABEL_BP_106_27645_20120309_195238_inLine +BABEL_BP_106_27645_20120309_195238_outLine +BABEL_BP_106_32132_20120604_141124_inLine +BABEL_BP_106_34732_20120504_011240_inLine +BABEL_BP_106_34732_20120504_011240_outLine +BABEL_BP_106_41471_20120227_013419_inLine +BABEL_BP_106_41471_20120227_013419_outLine +BABEL_BP_106_41471_20120227_015846_inLine +BABEL_BP_106_41471_20120227_015846_outLine +BABEL_BP_106_48281_20120208_172243_inLine +BABEL_BP_106_48281_20120208_172243_outLine +BABEL_BP_106_48645_20120304_124310_inLine +BABEL_BP_106_48645_20120304_124310_outLine +BABEL_BP_106_53278_20120304_182746_inLine +BABEL_BP_106_53278_20120304_182746_outLine +BABEL_BP_106_54285_20120304_170422_inLine +BABEL_BP_106_54285_20120304_170422_outLine +BABEL_BP_106_54339_20120220_233532_inLine +BABEL_BP_106_54339_20120220_233532_outLine +BABEL_BP_106_54339_20120220_235208_inLine +BABEL_BP_106_54339_20120220_235208_outLine +BABEL_BP_106_63116_20120301_233405_inLine +BABEL_BP_106_63116_20120301_233405_outLine +BABEL_BP_106_72647_20120314_140705_inLine +BABEL_BP_106_72647_20120314_140705_outLine +BABEL_BP_106_73050_20120229_190728_inLine +BABEL_BP_106_73050_20120229_190728_outLine +BABEL_BP_106_73050_20120229_192106_inLine +BABEL_BP_106_73050_20120229_192106_outLine +BABEL_BP_106_73122_20120131_151743_inLine +BABEL_BP_106_73122_20120131_151743_outLine +BABEL_BP_106_73205_20120131_011807_inLine +BABEL_BP_106_73205_20120131_011807_outLine +BABEL_BP_106_74940_20120324_000134_inLine +BABEL_BP_106_74940_20120324_000134_outLine +BABEL_BP_106_82007_20120511_234807_inLine +BABEL_BP_106_82007_20120511_234807_outLine +BABEL_BP_106_85719_20120315_175358_inLine +BABEL_BP_106_85719_20120315_175358_outLine +BABEL_BP_106_86998_20120316_235214_inLine +BABEL_BP_106_86998_20120316_235214_outLine +BABEL_BP_106_90046_20120316_225047_inLine +BABEL_BP_106_90046_20120316_225047_outLine +BABEL_BP_106_90559_20120404_191014_inLine +BABEL_BP_106_90559_20120404_191014_outLine +BABEL_BP_106_95637_20120210_215628_inLine +BABEL_BP_106_95637_20120210_215628_outLine +BABEL_BP_106_97052_20120315_205207_inLine +BABEL_BP_106_97052_20120315_205207_outLine +BABEL_BP_106_97941_20120228_153714_inLine +BABEL_BP_106_97941_20120228_155826_inLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list b/egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list new file mode 100644 index 00000000000..daa7243e0f2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list @@ -0,0 +1,1138 @@ +BABEL_BP_106_00300_20120415_005214_inLine +BABEL_BP_106_00315_20120419_231124_inLine +BABEL_BP_106_03420_20120409_204941_inLine +BABEL_BP_106_03420_20120409_204941_outLine +BABEL_BP_106_03420_20120409_211810_inLine +BABEL_BP_106_03420_20120409_211811_outLine +BABEL_BP_106_03695_20120401_185127_inLine +BABEL_BP_106_03695_20120401_190556_inLine +BABEL_BP_106_04577_20120409_220039_inLine +BABEL_BP_106_04577_20120409_220039_outLine +BABEL_BP_106_05510_20120505_014918_inLine +BABEL_BP_106_07199_20120407_224853_inLine +BABEL_BP_106_07199_20120407_224853_outLine +BABEL_BP_106_07924_20120414_191906_inLine +BABEL_BP_106_09087_20120304_155326_outLine +BABEL_BP_106_09087_20120304_161115_outLine +BABEL_BP_106_10160_20120322_024644_inLine +BABEL_BP_106_10160_20120322_024644_outLine +BABEL_BP_106_10271_20120307_153101_inLine +BABEL_BP_106_10271_20120307_153101_outLine +BABEL_BP_106_10470_20120229_011606_inLine +BABEL_BP_106_10470_20120229_011606_outLine +BABEL_BP_106_10545_20120315_185249_inLine +BABEL_BP_106_10545_20120315_185249_outLine +BABEL_BP_106_10643_20120407_222930_inLine +BABEL_BP_106_10643_20120407_222930_outLine +BABEL_BP_106_10732_20120604_111534_inLine +BABEL_BP_106_10732_20120604_113159_inLine +BABEL_BP_106_10985_20120313_013835_inLine +BABEL_BP_106_10985_20120313_013835_outLine +BABEL_BP_106_11004_20120603_171542_inLine +BABEL_BP_106_11152_20120421_140313_inLine +BABEL_BP_106_11158_20120314_183907_inLine +BABEL_BP_106_11158_20120314_183907_outLine +BABEL_BP_106_11158_20120314_193006_inLine +BABEL_BP_106_11158_20120314_193006_outLine +BABEL_BP_106_11197_20120327_225746_inLine +BABEL_BP_106_11197_20120327_225746_outLine +BABEL_BP_106_11197_20120327_231450_inLine +BABEL_BP_106_11197_20120327_231450_outLine +BABEL_BP_106_11208_20120409_211504_inLine +BABEL_BP_106_11233_20120407_231020_inLine +BABEL_BP_106_11233_20120407_231020_outLine +BABEL_BP_106_11366_20120323_024622_inLine +BABEL_BP_106_11366_20120323_024622_outLine +BABEL_BP_106_11366_20120323_025914_inLine +BABEL_BP_106_11366_20120323_025914_outLine +BABEL_BP_106_11479_20120202_183704_inLine +BABEL_BP_106_11603_20120331_150248_inLine +BABEL_BP_106_11603_20120331_150248_outLine +BABEL_BP_106_11603_20120331_151525_inLine +BABEL_BP_106_11603_20120331_151525_outLine +BABEL_BP_106_11627_20120210_040828_inLine +BABEL_BP_106_11650_20120315_191912_outLine +BABEL_BP_106_11650_20120315_215538_outLine +BABEL_BP_106_11982_20120219_202255_inLine +BABEL_BP_106_11982_20120219_202255_outLine +BABEL_BP_106_12003_20120205_192229_inLine +BABEL_BP_106_12003_20120205_192229_outLine +BABEL_BP_106_12120_20120318_023316_inLine +BABEL_BP_106_12120_20120318_024105_inLine +BABEL_BP_106_12120_20120318_024557_inLine +BABEL_BP_106_12120_20120318_025233_inLine +BABEL_BP_106_12248_20120304_225237_inLine +BABEL_BP_106_12486_20120302_130425_inLine +BABEL_BP_106_12486_20120302_130425_outLine +BABEL_BP_106_12535_20120228_130707_inLine +BABEL_BP_106_12535_20120228_131530_inLine +BABEL_BP_106_12535_20120228_135537_inLine +BABEL_BP_106_12643_20120315_235155_inLine +BABEL_BP_106_12643_20120315_235155_outLine +BABEL_BP_106_12667_20120308_204253_inLine +BABEL_BP_106_12667_20120308_204253_outLine +BABEL_BP_106_12807_20120312_175004_inLine +BABEL_BP_106_12807_20120312_175004_outLine +BABEL_BP_106_12963_20120309_184450_inLine +BABEL_BP_106_12963_20120309_184450_outLine +BABEL_BP_106_12979_20120308_200109_inLine +BABEL_BP_106_13065_20120422_032208_inLine +BABEL_BP_106_13065_20120422_035054_inLine +BABEL_BP_106_13071_20120315_000734_inLine +BABEL_BP_106_13071_20120315_000734_outLine +BABEL_BP_106_13071_20120315_001539_inLine +BABEL_BP_106_13071_20120315_001539_outLine +BABEL_BP_106_13341_20120601_211500_inLine +BABEL_BP_106_13441_20120226_235451_inLine +BABEL_BP_106_13441_20120226_235451_outLine +BABEL_BP_106_13476_20120307_215216_inLine +BABEL_BP_106_13476_20120307_215216_outLine +BABEL_BP_106_13530_20120404_122619_inLine +BABEL_BP_106_13530_20120404_123636_inLine +BABEL_BP_106_13709_20120501_184324_inLine +BABEL_BP_106_13795_20120213_233957_inLine +BABEL_BP_106_13795_20120213_233957_outLine +BABEL_BP_106_14059_20120323_040739_inLine +BABEL_BP_106_14059_20120323_040739_outLine +BABEL_BP_106_14524_20120416_134207_inLine +BABEL_BP_106_14524_20120419_235605_inLine +BABEL_BP_106_14591_20120511_002610_inLine +BABEL_BP_106_14770_20120323_025454_inLine +BABEL_BP_106_14770_20120323_025454_outLine +BABEL_BP_106_14836_20120221_185410_inLine +BABEL_BP_106_14836_20120221_185410_outLine +BABEL_BP_106_14840_20120419_212050_inLine +BABEL_BP_106_14936_20120201_174445_inLine +BABEL_BP_106_14936_20120201_174445_outLine +BABEL_BP_106_15234_20120229_012024_inLine +BABEL_BP_106_15234_20120229_012024_outLine +BABEL_BP_106_15353_20120229_125558_inLine +BABEL_BP_106_15353_20120229_125558_outLine +BABEL_BP_106_15859_20120229_175309_inLine +BABEL_BP_106_15859_20120229_175309_outLine +BABEL_BP_106_15940_20120229_001305_inLine +BABEL_BP_106_15940_20120229_001305_outLine +BABEL_BP_106_15966_20120414_160956_inLine +BABEL_BP_106_16117_20120315_004358_inLine +BABEL_BP_106_16117_20120315_004358_outLine +BABEL_BP_106_16185_20120314_174822_outLine +BABEL_BP_106_16307_20120408_002125_inLine +BABEL_BP_106_16307_20120408_002125_outLine +BABEL_BP_106_16385_20120212_202256_inLine +BABEL_BP_106_16385_20120212_202256_outLine +BABEL_BP_106_16406_20120309_161540_inLine +BABEL_BP_106_16783_20120601_214201_inLine +BABEL_BP_106_16984_20120226_022713_inLine +BABEL_BP_106_17013_20120227_184346_inLine +BABEL_BP_106_17013_20120227_184346_outLine +BABEL_BP_106_17093_20120217_180258_outLine +BABEL_BP_106_17203_20120129_171949_inLine +BABEL_BP_106_17353_20120314_160721_inLine +BABEL_BP_106_17353_20120314_163054_inLine +BABEL_BP_106_17452_20120408_203139_inLine +BABEL_BP_106_17452_20120408_204534_inLine +BABEL_BP_106_17452_20120408_205342_inLine +BABEL_BP_106_17511_20120301_194447_inLine +BABEL_BP_106_17511_20120301_194447_outLine +BABEL_BP_106_17606_20120225_235727_inLine +BABEL_BP_106_17606_20120225_235727_outLine +BABEL_BP_106_17850_20120224_223940_inLine +BABEL_BP_106_17850_20120224_223940_outLine +BABEL_BP_106_18209_20120304_004340_inLine +BABEL_BP_106_18209_20120304_004340_outLine +BABEL_BP_106_18701_20120302_161857_inLine +BABEL_BP_106_18701_20120302_161857_outLine +BABEL_BP_106_18802_20120318_014432_inLine +BABEL_BP_106_18802_20120318_014432_outLine +BABEL_BP_106_18903_20120317_121505_inLine +BABEL_BP_106_19063_20120415_183305_inLine +BABEL_BP_106_19248_20120307_192705_inLine +BABEL_BP_106_19248_20120307_192705_outLine +BABEL_BP_106_19290_20120605_180800_inLine +BABEL_BP_106_19479_20120501_154630_inLine +BABEL_BP_106_19479_20120501_155913_inLine +BABEL_BP_106_19479_20120501_165350_inLine +BABEL_BP_106_19619_20120219_023026_inLine +BABEL_BP_106_19619_20120219_023026_outLine +BABEL_BP_106_19656_20120227_201656_inLine +BABEL_BP_106_19656_20120227_201656_outLine +BABEL_BP_106_19861_20120308_181811_inLine +BABEL_BP_106_19861_20120308_181811_outLine +BABEL_BP_106_19867_20120428_022912_inLine +BABEL_BP_106_19915_20120129_043730_inLine +BABEL_BP_106_19915_20120129_043730_outLine +BABEL_BP_106_20320_20120206_212251_inLine +BABEL_BP_106_20320_20120206_212251_outLine +BABEL_BP_106_20591_20120225_172142_inLine +BABEL_BP_106_20680_20120314_195655_inLine +BABEL_BP_106_20680_20120314_233935_inLine +BABEL_BP_106_20680_20120314_233935_outLine +BABEL_BP_106_20740_20120229_234935_inLine +BABEL_BP_106_20741_20120604_131021_inLine +BABEL_BP_106_20775_20120309_184437_inLine +BABEL_BP_106_20775_20120309_184437_outLine +BABEL_BP_106_20985_20120314_184025_inLine +BABEL_BP_106_20985_20120314_184025_outLine +BABEL_BP_106_21050_20120317_181509_outLine +BABEL_BP_106_21258_20120205_012953_outLine +BABEL_BP_106_21259_20120331_174446_inLine +BABEL_BP_106_21259_20120331_174446_outLine +BABEL_BP_106_21259_20120331_184534_inLine +BABEL_BP_106_21259_20120331_184534_outLine +BABEL_BP_106_21259_20120331_225507_inLine +BABEL_BP_106_21259_20120331_225507_outLine +BABEL_BP_106_21306_20120417_233743_inLine +BABEL_BP_106_21367_20120317_185340_inLine +BABEL_BP_106_21367_20120317_185340_outLine +BABEL_BP_106_21430_20120207_184620_inLine +BABEL_BP_106_21430_20120207_184620_outLine +BABEL_BP_106_21518_20120225_224701_inLine +BABEL_BP_106_21518_20120225_224701_outLine +BABEL_BP_106_21556_20120313_021608_inLine +BABEL_BP_106_21556_20120313_021608_outLine +BABEL_BP_106_21714_20120318_174632_inLine +BABEL_BP_106_21714_20120318_174632_outLine +BABEL_BP_106_21845_20120310_002143_inLine +BABEL_BP_106_21845_20120310_002143_outLine +BABEL_BP_106_22034_20120317_021754_inLine +BABEL_BP_106_22034_20120317_021754_outLine +BABEL_BP_106_22272_20120318_201647_inLine +BABEL_BP_106_22272_20120318_201647_outLine +BABEL_BP_106_22408_20120213_221623_inLine +BABEL_BP_106_22408_20120213_221623_outLine +BABEL_BP_106_22696_20120308_195105_inLine +BABEL_BP_106_22696_20120308_195105_outLine +BABEL_BP_106_22903_20120224_164344_inLine +BABEL_BP_106_22903_20120224_164344_outLine +BABEL_BP_106_22910_20120129_213616_inLine +BABEL_BP_106_22910_20120129_213616_outLine +BABEL_BP_106_22973_20120311_224022_inLine +BABEL_BP_106_22973_20120311_224022_outLine +BABEL_BP_106_23167_20120128_183627_inLine +BABEL_BP_106_23167_20120128_183627_outLine +BABEL_BP_106_23571_20120229_180344_inLine +BABEL_BP_106_23571_20120229_180344_outLine +BABEL_BP_106_23629_20120304_212835_inLine +BABEL_BP_106_23629_20120304_212835_outLine +BABEL_BP_106_23878_20120209_170350_inLine +BABEL_BP_106_23878_20120209_170350_outLine +BABEL_BP_106_23995_20120225_011657_inLine +BABEL_BP_106_23995_20120225_011657_outLine +BABEL_BP_106_24084_20120318_015502_inLine +BABEL_BP_106_24124_20120415_182317_inLine +BABEL_BP_106_24335_20120408_005503_inLine +BABEL_BP_106_24335_20120408_012607_inLine +BABEL_BP_106_24335_20120408_012607_outLine +BABEL_BP_106_24441_20120417_211954_inLine +BABEL_BP_106_24569_20120307_232752_inLine +BABEL_BP_106_24569_20120307_232752_outLine +BABEL_BP_106_24580_20120604_165125_inLine +BABEL_BP_106_24638_20120419_013630_inLine +BABEL_BP_106_24661_20120322_221220_inLine +BABEL_BP_106_24661_20120322_221220_outLine +BABEL_BP_106_24817_20120301_031015_inLine +BABEL_BP_106_24817_20120301_031015_outLine +BABEL_BP_106_25279_20120401_195557_inLine +BABEL_BP_106_25479_20120315_154117_inLine +BABEL_BP_106_25479_20120315_154117_outLine +BABEL_BP_106_25479_20120315_160418_inLine +BABEL_BP_106_25479_20120315_160418_outLine +BABEL_BP_106_25502_20120129_015831_inLine +BABEL_BP_106_25502_20120129_015831_outLine +BABEL_BP_106_25735_20120314_233234_inLine +BABEL_BP_106_25735_20120314_233234_outLine +BABEL_BP_106_25751_20120227_221828_inLine +BABEL_BP_106_25751_20120227_221828_outLine +BABEL_BP_106_25866_20120304_181012_inLine +BABEL_BP_106_25866_20120304_181012_outLine +BABEL_BP_106_25871_20120228_005211_inLine +BABEL_BP_106_25871_20120228_005957_inLine +BABEL_BP_106_25871_20120228_012444_inLine +BABEL_BP_106_25904_20120213_182237_inLine +BABEL_BP_106_25904_20120213_182237_outLine +BABEL_BP_106_26164_20120401_201225_inLine +BABEL_BP_106_26164_20120401_201225_outLine +BABEL_BP_106_26164_20120401_202221_inLine +BABEL_BP_106_26164_20120401_202221_outLine +BABEL_BP_106_26348_20120314_173141_outLine +BABEL_BP_106_26598_20120415_181527_inLine +BABEL_BP_106_26644_20120411_154709_inLine +BABEL_BP_106_26684_20120211_170412_inLine +BABEL_BP_106_26684_20120211_170412_outLine +BABEL_BP_106_26786_20120306_151101_inLine +BABEL_BP_106_26786_20120306_151101_outLine +BABEL_BP_106_26901_20120212_192301_inLine +BABEL_BP_106_26901_20120212_192301_outLine +BABEL_BP_106_26901_20120212_193813_inLine +BABEL_BP_106_26901_20120212_193813_outLine +BABEL_BP_106_27363_20120315_165356_inLine +BABEL_BP_106_27890_20120302_171119_inLine +BABEL_BP_106_27890_20120302_171119_outLine +BABEL_BP_106_27916_20120403_232720_inLine +BABEL_BP_106_27916_20120403_232720_outLine +BABEL_BP_106_27916_20120403_233612_inLine +BABEL_BP_106_27916_20120403_233612_outLine +BABEL_BP_106_28683_20120331_165731_inLine +BABEL_BP_106_28754_20120205_171932_inLine +BABEL_BP_106_28754_20120205_171932_outLine +BABEL_BP_106_28754_20120205_174934_inLine +BABEL_BP_106_28754_20120205_174934_outLine +BABEL_BP_106_29087_20120315_125218_outLine +BABEL_BP_106_29087_20120315_130643_outLine +BABEL_BP_106_29097_20120127_001938_inLine +BABEL_BP_106_29097_20120127_001938_outLine +BABEL_BP_106_29133_20120129_171742_outLine +BABEL_BP_106_29290_20120212_151530_inLine +BABEL_BP_106_29290_20120212_151530_outLine +BABEL_BP_106_29328_20120212_210507_outLine +BABEL_BP_106_29407_20120403_225249_inLine +BABEL_BP_106_29421_20120213_182542_inLine +BABEL_BP_106_29421_20120213_182542_outLine +BABEL_BP_106_29512_20120226_190947_inLine +BABEL_BP_106_29512_20120226_190947_outLine +BABEL_BP_106_29545_20120331_153345_outLine +BABEL_BP_106_29589_20120225_144930_inLine +BABEL_BP_106_29589_20120225_144930_outLine +BABEL_BP_106_29988_20120301_225306_inLine +BABEL_BP_106_29988_20120301_234957_inLine +BABEL_BP_106_30418_20120401_162421_inLine +BABEL_BP_106_30418_20120401_162421_outLine +BABEL_BP_106_30583_20120129_163331_inLine +BABEL_BP_106_30583_20120129_163331_outLine +BABEL_BP_106_30642_20120302_150419_inLine +BABEL_BP_106_30642_20120302_150419_outLine +BABEL_BP_106_30818_20120503_004014_inLine +BABEL_BP_106_31031_20120215_010958_inLine +BABEL_BP_106_31031_20120215_010958_outLine +BABEL_BP_106_31256_20120317_140651_inLine +BABEL_BP_106_31256_20120317_140651_outLine +BABEL_BP_106_31265_20120311_235253_inLine +BABEL_BP_106_31265_20120311_235253_outLine +BABEL_BP_106_31328_20120212_180708_inLine +BABEL_BP_106_31328_20120212_180708_outLine +BABEL_BP_106_31606_20120403_225528_inLine +BABEL_BP_106_31783_20120331_154149_inLine +BABEL_BP_106_31783_20120331_163639_inLine +BABEL_BP_106_31975_20120309_181134_inLine +BABEL_BP_106_31975_20120309_181134_outLine +BABEL_BP_106_32263_20120225_201234_inLine +BABEL_BP_106_32263_20120225_203654_inLine +BABEL_BP_106_32334_20120304_193216_outLine +BABEL_BP_106_32400_20120307_235432_inLine +BABEL_BP_106_32400_20120307_235432_outLine +BABEL_BP_106_32562_20120307_193633_inLine +BABEL_BP_106_32562_20120307_193633_outLine +BABEL_BP_106_32710_20120418_235030_inLine +BABEL_BP_106_32887_20120327_221120_inLine +BABEL_BP_106_32887_20120327_222408_inLine +BABEL_BP_106_32890_20120221_193416_inLine +BABEL_BP_106_32890_20120221_193417_outLine +BABEL_BP_106_33023_20120203_000619_inLine +BABEL_BP_106_33023_20120203_000619_outLine +BABEL_BP_106_33192_20120516_170543_inLine +BABEL_BP_106_33192_20120516_172023_inLine +BABEL_BP_106_33540_20120221_204916_outLine +BABEL_BP_106_33540_20120221_210930_outLine +BABEL_BP_106_33671_20120206_215709_outLine +BABEL_BP_106_33707_20120403_172641_inLine +BABEL_BP_106_33742_20120229_020923_inLine +BABEL_BP_106_33742_20120229_020923_outLine +BABEL_BP_106_33817_20120301_165159_inLine +BABEL_BP_106_33817_20120301_165159_outLine +BABEL_BP_106_33969_20120310_001559_inLine +BABEL_BP_106_33969_20120310_001559_outLine +BABEL_BP_106_34328_20120225_012732_inLine +BABEL_BP_106_34328_20120225_012732_outLine +BABEL_BP_106_34439_20120301_190320_inLine +BABEL_BP_106_34439_20120301_190320_outLine +BABEL_BP_106_34480_20120405_141959_inLine +BABEL_BP_106_34498_20120314_171141_inLine +BABEL_BP_106_34498_20120314_171141_outLine +BABEL_BP_106_34498_20120314_172341_inLine +BABEL_BP_106_34498_20120314_172341_outLine +BABEL_BP_106_34857_20120301_183238_inLine +BABEL_BP_106_34857_20120301_183238_outLine +BABEL_BP_106_34859_20120328_231638_inLine +BABEL_BP_106_34859_20120328_231638_outLine +BABEL_BP_106_34859_20120328_233134_inLine +BABEL_BP_106_34859_20120328_233134_outLine +BABEL_BP_106_34894_20120328_014528_inLine +BABEL_BP_106_34894_20120328_014528_outLine +BABEL_BP_106_34961_20120130_011357_inLine +BABEL_BP_106_34961_20120130_011357_outLine +BABEL_BP_106_35016_20120405_195810_outLine +BABEL_BP_106_35153_20120502_162803_inLine +BABEL_BP_106_35153_20120502_170536_inLine +BABEL_BP_106_35179_20120225_063734_inLine +BABEL_BP_106_35179_20120225_063734_outLine +BABEL_BP_106_35188_20120315_154007_inLine +BABEL_BP_106_35188_20120315_154007_outLine +BABEL_BP_106_35305_20120308_195828_inLine +BABEL_BP_106_35305_20120308_195828_outLine +BABEL_BP_106_35318_20120130_203231_inLine +BABEL_BP_106_35318_20120130_203231_outLine +BABEL_BP_106_35329_20120302_140638_inLine +BABEL_BP_106_35329_20120302_140638_outLine +BABEL_BP_106_35441_20120414_194638_inLine +BABEL_BP_106_35470_20120307_190826_inLine +BABEL_BP_106_35470_20120307_190826_outLine +BABEL_BP_106_35576_20120224_211651_inLine +BABEL_BP_106_35576_20120224_211651_outLine +BABEL_BP_106_35612_20120303_000710_inLine +BABEL_BP_106_35612_20120303_000710_outLine +BABEL_BP_106_35706_20120501_011424_inLine +BABEL_BP_106_35951_20120419_001936_inLine +BABEL_BP_106_35972_20120411_154338_inLine +BABEL_BP_106_35972_20120411_155457_inLine +BABEL_BP_106_36143_20120128_230220_outLine +BABEL_BP_106_36268_20120209_180615_inLine +BABEL_BP_106_36268_20120209_180615_outLine +BABEL_BP_106_36276_20120317_130620_inLine +BABEL_BP_106_36276_20120317_134742_inLine +BABEL_BP_106_36276_20120317_134742_outLine +BABEL_BP_106_36383_20120225_021045_inLine +BABEL_BP_106_36383_20120225_021045_outLine +BABEL_BP_106_36391_20120205_201108_inLine +BABEL_BP_106_36391_20120205_201108_outLine +BABEL_BP_106_36868_20120417_204120_inLine +BABEL_BP_106_36868_20120417_210037_inLine +BABEL_BP_106_37064_20120324_130301_inLine +BABEL_BP_106_37064_20120324_130301_outLine +BABEL_BP_106_37258_20120304_002638_inLine +BABEL_BP_106_37258_20120304_002638_outLine +BABEL_BP_106_37260_20120130_191541_inLine +BABEL_BP_106_37260_20120130_191541_outLine +BABEL_BP_106_37766_20120229_163334_inLine +BABEL_BP_106_37766_20120229_163334_outLine +BABEL_BP_106_38175_20120209_214322_inLine +BABEL_BP_106_38175_20120209_214322_outLine +BABEL_BP_106_38248_20120404_214148_inLine +BABEL_BP_106_38248_20120404_222004_inLine +BABEL_BP_106_38248_20120404_222004_outLine +BABEL_BP_106_38248_20120404_223317_inLine +BABEL_BP_106_38248_20120404_223317_outLine +BABEL_BP_106_38396_20120323_023143_inLine +BABEL_BP_106_38396_20120323_023143_outLine +BABEL_BP_106_38464_20120318_215505_inLine +BABEL_BP_106_38464_20120318_215505_outLine +BABEL_BP_106_38464_20120318_220931_inLine +BABEL_BP_106_38464_20120318_220931_outLine +BABEL_BP_106_38635_20120605_171532_inLine +BABEL_BP_106_38640_20120130_174518_inLine +BABEL_BP_106_38640_20120130_174518_outLine +BABEL_BP_106_38656_20120321_230900_inLine +BABEL_BP_106_38656_20120321_232832_inLine +BABEL_BP_106_38879_20120203_203542_inLine +BABEL_BP_106_38912_20120307_023807_outLine +BABEL_BP_106_38956_20120127_010500_inLine +BABEL_BP_106_38956_20120127_010500_outLine +BABEL_BP_106_39080_20120225_180230_inLine +BABEL_BP_106_39080_20120225_180230_outLine +BABEL_BP_106_39114_20120315_131924_inLine +BABEL_BP_106_39114_20120315_135035_inLine +BABEL_BP_106_39114_20120315_142026_inLine +BABEL_BP_106_39179_20120331_134039_outLine +BABEL_BP_106_39179_20120331_134617_outLine +BABEL_BP_106_39264_20120228_015102_inLine +BABEL_BP_106_39264_20120228_015102_outLine +BABEL_BP_106_39264_20120228_022421_inLine +BABEL_BP_106_39264_20120228_022421_outLine +BABEL_BP_106_39563_20120414_162942_inLine +BABEL_BP_106_39756_20120312_165815_inLine +BABEL_BP_106_40002_20120301_225806_inLine +BABEL_BP_106_40197_20120308_211406_inLine +BABEL_BP_106_40211_20120329_005438_inLine +BABEL_BP_106_40211_20120329_005439_outLine +BABEL_BP_106_40288_20120516_161057_inLine +BABEL_BP_106_40439_20120405_122042_inLine +BABEL_BP_106_40510_20120221_155613_inLine +BABEL_BP_106_40510_20120221_155613_outLine +BABEL_BP_106_40680_20120511_153305_inLine +BABEL_BP_106_40882_20120418_205714_inLine +BABEL_BP_106_41327_20120128_163042_inLine +BABEL_BP_106_41327_20120128_163042_outLine +BABEL_BP_106_41541_20120315_003903_inLine +BABEL_BP_106_41541_20120315_003903_outLine +BABEL_BP_106_41557_20120324_040736_inLine +BABEL_BP_106_41557_20120324_040736_outLine +BABEL_BP_106_41557_20120324_043210_inLine +BABEL_BP_106_41557_20120324_043210_outLine +BABEL_BP_106_41710_20120410_205005_outLine +BABEL_BP_106_41733_20120307_171130_inLine +BABEL_BP_106_41816_20120415_184339_inLine +BABEL_BP_106_41949_20120213_174300_inLine +BABEL_BP_106_41949_20120213_174300_outLine +BABEL_BP_106_42183_20120323_223118_inLine +BABEL_BP_106_42183_20120327_190153_inLine +BABEL_BP_106_42651_20120131_020401_inLine +BABEL_BP_106_42651_20120131_020401_outLine +BABEL_BP_106_42768_20120411_173257_inLine +BABEL_BP_106_42820_20120415_180402_inLine +BABEL_BP_106_42910_20120128_213020_inLine +BABEL_BP_106_42910_20120128_213020_outLine +BABEL_BP_106_43069_20120409_204043_inLine +BABEL_BP_106_43069_20120409_204043_outLine +BABEL_BP_106_43306_20120210_032400_inLine +BABEL_BP_106_43306_20120210_032400_outLine +BABEL_BP_106_43425_20120317_174519_inLine +BABEL_BP_106_43425_20120317_175422_inLine +BABEL_BP_106_43425_20120317_183658_inLine +BABEL_BP_106_43652_20120208_010946_inLine +BABEL_BP_106_43652_20120208_010946_outLine +BABEL_BP_106_43939_20120317_194330_inLine +BABEL_BP_106_43939_20120317_194330_outLine +BABEL_BP_106_44038_20120317_204039_inLine +BABEL_BP_106_44038_20120317_205302_inLine +BABEL_BP_106_44052_20120327_234511_inLine +BABEL_BP_106_44052_20120327_234511_outLine +BABEL_BP_106_44052_20120330_222904_inLine +BABEL_BP_106_44052_20120330_222904_outLine +BABEL_BP_106_44369_20120318_231951_inLine +BABEL_BP_106_44369_20120319_020556_inLine +BABEL_BP_106_44756_20120301_235107_inLine +BABEL_BP_106_44756_20120301_235107_outLine +BABEL_BP_106_45106_20120118_001529_inLine +BABEL_BP_106_45106_20120118_001529_outLine +BABEL_BP_106_45145_20120219_143857_inLine +BABEL_BP_106_45361_20120228_002747_inLine +BABEL_BP_106_45361_20120228_002747_outLine +BABEL_BP_106_45453_20120404_225631_inLine +BABEL_BP_106_45453_20120404_225631_outLine +BABEL_BP_106_45511_20120129_010308_inLine +BABEL_BP_106_45511_20120129_010308_outLine +BABEL_BP_106_45642_20120203_042123_inLine +BABEL_BP_106_45642_20120203_042123_outLine +BABEL_BP_106_45677_20120315_012905_inLine +BABEL_BP_106_45677_20120315_013919_inLine +BABEL_BP_106_45681_20120306_210519_inLine +BABEL_BP_106_45702_20120226_175928_inLine +BABEL_BP_106_45702_20120226_175928_outLine +BABEL_BP_106_45793_20120127_170707_inLine +BABEL_BP_106_45793_20120127_170707_outLine +BABEL_BP_106_46427_20120303_200620_outLine +BABEL_BP_106_46435_20120317_184057_outLine +BABEL_BP_106_46603_20120227_192836_inLine +BABEL_BP_106_46603_20120227_192836_outLine +BABEL_BP_106_46744_20120324_002344_inLine +BABEL_BP_106_46744_20120324_002344_outLine +BABEL_BP_106_46813_20120416_015932_inLine +BABEL_BP_106_47263_20120305_023242_inLine +BABEL_BP_106_47429_20120329_195737_inLine +BABEL_BP_106_47429_20120329_195737_outLine +BABEL_BP_106_47469_20120210_221258_inLine +BABEL_BP_106_47469_20120210_221258_outLine +BABEL_BP_106_47661_20120131_002939_outLine +BABEL_BP_106_47794_20120403_181127_inLine +BABEL_BP_106_47794_20120403_182418_inLine +BABEL_BP_106_47821_20120228_011928_inLine +BABEL_BP_106_47823_20120302_214046_outLine +BABEL_BP_106_47906_20120418_223527_inLine +BABEL_BP_106_47906_20120418_225920_inLine +BABEL_BP_106_48059_20120317_161513_inLine +BABEL_BP_106_48059_20120317_161513_outLine +BABEL_BP_106_48061_20120303_234335_inLine +BABEL_BP_106_48181_20120211_011159_inLine +BABEL_BP_106_48181_20120211_011159_outLine +BABEL_BP_106_48188_20120307_034039_outLine +BABEL_BP_106_48317_20120301_002256_inLine +BABEL_BP_106_48317_20120301_002256_outLine +BABEL_BP_106_48418_20120407_165729_inLine +BABEL_BP_106_48536_20120129_053527_inLine +BABEL_BP_106_48683_20120505_022553_inLine +BABEL_BP_106_49239_20120317_123831_inLine +BABEL_BP_106_49309_20120330_230450_inLine +BABEL_BP_106_49346_20120405_185601_inLine +BABEL_BP_106_49381_20120414_193653_inLine +BABEL_BP_106_49582_20120213_230049_inLine +BABEL_BP_106_49582_20120213_230049_outLine +BABEL_BP_106_49624_20120224_194049_inLine +BABEL_BP_106_49624_20120224_194049_outLine +BABEL_BP_106_49689_20120225_153748_outLine +BABEL_BP_106_49714_20120227_191755_inLine +BABEL_BP_106_49714_20120227_191755_outLine +BABEL_BP_106_50141_20120309_225945_inLine +BABEL_BP_106_50141_20120309_225945_outLine +BABEL_BP_106_50298_20120227_005517_inLine +BABEL_BP_106_50298_20120227_005517_outLine +BABEL_BP_106_50387_20120229_175528_inLine +BABEL_BP_106_50387_20120229_175528_outLine +BABEL_BP_106_50409_20120319_185818_inLine +BABEL_BP_106_50409_20120319_185818_outLine +BABEL_BP_106_50410_20120229_183217_inLine +BABEL_BP_106_50410_20120229_183217_outLine +BABEL_BP_106_50468_20120417_231448_inLine +BABEL_BP_106_50476_20120304_171701_inLine +BABEL_BP_106_50476_20120304_171701_outLine +BABEL_BP_106_50555_20120605_134945_inLine +BABEL_BP_106_50589_20120128_192230_inLine +BABEL_BP_106_50589_20120128_192230_outLine +BABEL_BP_106_50641_20120317_180902_inLine +BABEL_BP_106_50641_20120317_180902_outLine +BABEL_BP_106_50752_20120310_001913_inLine +BABEL_BP_106_51042_20120313_230521_inLine +BABEL_BP_106_51073_20120128_200706_inLine +BABEL_BP_106_51073_20120128_200706_outLine +BABEL_BP_106_51149_20120329_174521_inLine +BABEL_BP_106_51149_20120329_174521_outLine +BABEL_BP_106_51448_20120413_214526_inLine +BABEL_BP_106_51448_20120413_220517_inLine +BABEL_BP_106_51727_20120229_000250_inLine +BABEL_BP_106_51727_20120229_000250_outLine +BABEL_BP_106_52033_20120228_001715_inLine +BABEL_BP_106_52033_20120228_001715_outLine +BABEL_BP_106_52154_20120312_004528_outLine +BABEL_BP_106_52325_20120211_220159_inLine +BABEL_BP_106_52366_20120124_164406_inLine +BABEL_BP_106_52366_20120124_164406_outLine +BABEL_BP_106_52642_20120222_175700_inLine +BABEL_BP_106_52642_20120222_175700_outLine +BABEL_BP_106_52902_20120605_184038_inLine +BABEL_BP_106_53179_20120301_181951_inLine +BABEL_BP_106_53179_20120301_181951_outLine +BABEL_BP_106_53315_20120329_182550_inLine +BABEL_BP_106_53315_20120329_182550_outLine +BABEL_BP_106_53376_20120323_000750_inLine +BABEL_BP_106_53376_20120323_000750_outLine +BABEL_BP_106_53463_20120605_180156_inLine +BABEL_BP_106_53653_20120405_182452_inLine +BABEL_BP_106_53653_20120405_183849_inLine +BABEL_BP_106_53824_20120227_025033_inLine +BABEL_BP_106_53824_20120227_025033_outLine +BABEL_BP_106_54358_20120229_223811_outLine +BABEL_BP_106_54621_20120227_235308_inLine +BABEL_BP_106_54621_20120227_235308_outLine +BABEL_BP_106_54785_20120303_011154_outLine +BABEL_BP_106_55182_20120422_185742_inLine +BABEL_BP_106_55204_20120330_230730_outLine +BABEL_BP_106_55288_20120503_010325_inLine +BABEL_BP_106_55355_20120405_180949_inLine +BABEL_BP_106_55450_20120302_125827_inLine +BABEL_BP_106_55450_20120302_125827_outLine +BABEL_BP_106_55823_20120329_210142_inLine +BABEL_BP_106_55823_20120329_210142_outLine +BABEL_BP_106_55838_20120318_160306_outLine +BABEL_BP_106_55922_20120322_021453_inLine +BABEL_BP_106_55922_20120322_021453_outLine +BABEL_BP_106_55922_20120322_022537_inLine +BABEL_BP_106_55922_20120322_022537_outLine +BABEL_BP_106_55944_20120306_172041_inLine +BABEL_BP_106_55944_20120306_172041_outLine +BABEL_BP_106_56117_20120313_001237_outLine +BABEL_BP_106_56342_20120605_162901_inLine +BABEL_BP_106_56634_20120328_235133_inLine +BABEL_BP_106_56634_20120328_235133_outLine +BABEL_BP_106_56868_20120203_012057_inLine +BABEL_BP_106_56868_20120203_012057_outLine +BABEL_BP_106_56943_20120126_224048_outLine +BABEL_BP_106_57020_20120305_121648_outLine +BABEL_BP_106_57039_20120314_003848_inLine +BABEL_BP_106_57039_20120314_005748_inLine +BABEL_BP_106_57609_20120304_174858_inLine +BABEL_BP_106_57609_20120304_174858_outLine +BABEL_BP_106_57609_20120304_180016_inLine +BABEL_BP_106_57609_20120304_180016_outLine +BABEL_BP_106_57638_20120414_164822_inLine +BABEL_BP_106_58108_20120411_180115_inLine +BABEL_BP_106_58192_20120308_182924_inLine +BABEL_BP_106_58192_20120308_182924_outLine +BABEL_BP_106_58232_20120226_031714_inLine +BABEL_BP_106_58232_20120226_031714_outLine +BABEL_BP_106_58447_20120329_013316_inLine +BABEL_BP_106_58447_20120329_013316_outLine +BABEL_BP_106_58536_20120210_221536_inLine +BABEL_BP_106_58536_20120210_221536_outLine +BABEL_BP_106_58572_20120401_203941_inLine +BABEL_BP_106_58572_20120401_203941_outLine +BABEL_BP_106_58587_20120411_003742_inLine +BABEL_BP_106_58587_20120411_003742_outLine +BABEL_BP_106_58746_20120308_211819_inLine +BABEL_BP_106_58746_20120308_211819_outLine +BABEL_BP_106_58956_20120602_130340_inLine +BABEL_BP_106_59071_20120228_033845_inLine +BABEL_BP_106_59071_20120228_033845_outLine +BABEL_BP_106_59175_20120221_181535_inLine +BABEL_BP_106_59175_20120221_181535_outLine +BABEL_BP_106_59383_20120317_170327_inLine +BABEL_BP_106_59383_20120317_175629_inLine +BABEL_BP_106_59544_20120209_182249_inLine +BABEL_BP_106_59544_20120209_182249_outLine +BABEL_BP_106_59565_20120321_231854_inLine +BABEL_BP_106_59565_20120321_233445_inLine +BABEL_BP_106_59565_20120321_234523_inLine +BABEL_BP_106_59628_20120309_181006_inLine +BABEL_BP_106_59628_20120309_181006_outLine +BABEL_BP_106_59746_20120225_061555_outLine +BABEL_BP_106_59764_20120222_204824_inLine +BABEL_BP_106_59764_20120222_204824_outLine +BABEL_BP_106_59846_20120318_164327_inLine +BABEL_BP_106_59878_20120505_021018_inLine +BABEL_BP_106_59925_20120403_160805_inLine +BABEL_BP_106_60106_20120422_155717_inLine +BABEL_BP_106_60238_20120307_161043_inLine +BABEL_BP_106_60238_20120307_165816_inLine +BABEL_BP_106_60238_20120307_165816_outLine +BABEL_BP_106_60250_20120203_013207_inLine +BABEL_BP_106_60250_20120203_013207_outLine +BABEL_BP_106_60598_20120324_022730_inLine +BABEL_BP_106_60598_20120324_022730_outLine +BABEL_BP_106_60677_20120418_162110_inLine +BABEL_BP_106_60677_20120418_162841_inLine +BABEL_BP_106_60693_20120331_185728_inLine +BABEL_BP_106_60693_20120414_201244_inLine +BABEL_BP_106_60753_20120120_175004_inLine +BABEL_BP_106_60753_20120120_175004_outLine +BABEL_BP_106_60826_20120605_170407_inLine +BABEL_BP_106_61073_20120428_141557_outLine +BABEL_BP_106_61446_20120603_125637_inLine +BABEL_BP_106_61489_20120417_203944_inLine +BABEL_BP_106_61566_20120323_224814_inLine +BABEL_BP_106_61566_20120324_111941_inLine +BABEL_BP_106_61772_20120502_164250_inLine +BABEL_BP_106_61906_20120418_175007_inLine +BABEL_BP_106_62163_20120313_231604_outLine +BABEL_BP_106_62163_20120313_232930_outLine +BABEL_BP_106_62255_20120314_190940_inLine +BABEL_BP_106_62255_20120314_190940_outLine +BABEL_BP_106_62452_20120420_001258_inLine +BABEL_BP_106_62452_20120420_002416_inLine +BABEL_BP_106_62610_20120511_023409_inLine +BABEL_BP_106_62610_20120511_024325_inLine +BABEL_BP_106_63114_20120226_074838_inLine +BABEL_BP_106_63114_20120226_074838_outLine +BABEL_BP_106_63305_20120324_030221_inLine +BABEL_BP_106_63305_20120324_030221_outLine +BABEL_BP_106_63368_20120604_114321_inLine +BABEL_BP_106_63392_20120405_141717_inLine +BABEL_BP_106_63468_20120409_200746_inLine +BABEL_BP_106_63468_20120409_200746_outLine +BABEL_BP_106_63711_20120129_023323_inLine +BABEL_BP_106_63711_20120129_023323_outLine +BABEL_BP_106_63741_20120328_001923_inLine +BABEL_BP_106_64172_20120331_141241_inLine +BABEL_BP_106_64172_20120331_141241_outLine +BABEL_BP_106_64172_20120331_152028_inLine +BABEL_BP_106_64172_20120331_152028_outLine +BABEL_BP_106_64226_20120404_231458_inLine +BABEL_BP_106_64334_20120528_174746_inLine +BABEL_BP_106_64351_20120217_181140_inLine +BABEL_BP_106_64351_20120217_181140_outLine +BABEL_BP_106_64889_20120307_175001_inLine +BABEL_BP_106_64889_20120307_175001_outLine +BABEL_BP_106_65248_20120321_230954_inLine +BABEL_BP_106_65248_20120321_230954_outLine +BABEL_BP_106_65371_20120308_201622_inLine +BABEL_BP_106_65371_20120308_201622_outLine +BABEL_BP_106_65579_20120317_123135_inLine +BABEL_BP_106_65601_20120211_212006_inLine +BABEL_BP_106_65631_20120216_021352_inLine +BABEL_BP_106_65631_20120216_021352_outLine +BABEL_BP_106_65656_20120309_195913_outLine +BABEL_BP_106_65989_20120605_163026_inLine +BABEL_BP_106_66101_20120227_174035_inLine +BABEL_BP_106_66101_20120227_174035_outLine +BABEL_BP_106_66188_20120316_230006_inLine +BABEL_BP_106_66188_20120316_230006_outLine +BABEL_BP_106_66247_20120331_214412_inLine +BABEL_BP_106_66247_20120331_214412_outLine +BABEL_BP_106_66416_20120225_122454_inLine +BABEL_BP_106_66416_20120225_122454_outLine +BABEL_BP_106_66559_20120505_033828_inLine +BABEL_BP_106_66709_20120302_222833_inLine +BABEL_BP_106_66872_20120302_010751_inLine +BABEL_BP_106_66872_20120302_012055_inLine +BABEL_BP_106_66964_20120416_132128_inLine +BABEL_BP_106_67304_20120222_212038_inLine +BABEL_BP_106_67304_20120222_212038_outLine +BABEL_BP_106_67411_20120210_155625_inLine +BABEL_BP_106_67411_20120210_155625_outLine +BABEL_BP_106_67630_20120324_031205_inLine +BABEL_BP_106_67630_20120324_031205_outLine +BABEL_BP_106_67630_20120324_033243_inLine +BABEL_BP_106_67630_20120324_033243_outLine +BABEL_BP_106_67733_20120219_180702_inLine +BABEL_BP_106_67772_20120130_225552_inLine +BABEL_BP_106_67772_20120130_225552_outLine +BABEL_BP_106_68111_20120419_232912_inLine +BABEL_BP_106_68276_20120308_201728_inLine +BABEL_BP_106_68276_20120308_203526_inLine +BABEL_BP_106_68287_20120408_172649_inLine +BABEL_BP_106_68392_20120331_224408_inLine +BABEL_BP_106_68392_20120331_224408_outLine +BABEL_BP_106_68490_20120227_152714_inLine +BABEL_BP_106_68610_20120505_011125_inLine +BABEL_BP_106_68665_20120409_202242_inLine +BABEL_BP_106_68671_20120407_164400_inLine +BABEL_BP_106_68803_20120121_171931_inLine +BABEL_BP_106_68803_20120121_171931_outLine +BABEL_BP_106_69145_20120319_175304_inLine +BABEL_BP_106_69145_20120319_175304_outLine +BABEL_BP_106_69236_20120216_195133_inLine +BABEL_BP_106_69275_20120318_200803_inLine +BABEL_BP_106_69275_20120318_204539_inLine +BABEL_BP_106_69275_20120318_204539_outLine +BABEL_BP_106_69446_20120416_010511_inLine +BABEL_BP_106_69621_20120130_005117_inLine +BABEL_BP_106_69621_20120130_005117_outLine +BABEL_BP_106_70077_20120315_191801_inLine +BABEL_BP_106_70285_20120128_211036_outLine +BABEL_BP_106_70511_20120224_173336_inLine +BABEL_BP_106_70511_20120224_173336_outLine +BABEL_BP_106_70983_20120516_163100_inLine +BABEL_BP_106_71160_20120224_221158_inLine +BABEL_BP_106_71176_20120225_190017_inLine +BABEL_BP_106_71176_20120225_190017_outLine +BABEL_BP_106_71310_20120221_005007_inLine +BABEL_BP_106_71310_20120221_005007_outLine +BABEL_BP_106_71313_20120419_205542_inLine +BABEL_BP_106_71741_20120127_162656_inLine +BABEL_BP_106_72119_20120315_153943_inLine +BABEL_BP_106_72142_20120327_171827_inLine +BABEL_BP_106_72142_20120327_173216_inLine +BABEL_BP_106_72142_20120327_173216_outLine +BABEL_BP_106_72801_20120325_202928_outLine +BABEL_BP_106_72801_20120325_222633_inLine +BABEL_BP_106_72801_20120325_222633_outLine +BABEL_BP_106_72858_20120605_123842_inLine +BABEL_BP_106_73059_20120309_164956_outLine +BABEL_BP_106_73250_20120410_001928_inLine +BABEL_BP_106_73250_20120410_003448_inLine +BABEL_BP_106_73438_20120314_002432_outLine +BABEL_BP_106_73440_20120131_160945_inLine +BABEL_BP_106_73440_20120131_160945_outLine +BABEL_BP_106_73752_20120228_142547_inLine +BABEL_BP_106_73752_20120228_142547_outLine +BABEL_BP_106_73780_20120304_170119_outLine +BABEL_BP_106_73786_20120202_230843_inLine +BABEL_BP_106_73786_20120202_230843_outLine +BABEL_BP_106_73911_20120219_194519_inLine +BABEL_BP_106_73911_20120219_194519_outLine +BABEL_BP_106_74214_20120503_012037_inLine +BABEL_BP_106_74295_20120317_213141_inLine +BABEL_BP_106_74295_20120317_214659_inLine +BABEL_BP_106_74368_20120317_141935_inLine +BABEL_BP_106_74395_20120414_202413_inLine +BABEL_BP_106_74508_20120209_223405_inLine +BABEL_BP_106_74508_20120209_223405_outLine +BABEL_BP_106_74533_20120502_222417_inLine +BABEL_BP_106_74986_20120418_222615_inLine +BABEL_BP_106_75036_20120224_163823_inLine +BABEL_BP_106_75036_20120224_163823_outLine +BABEL_BP_106_75125_20120325_173456_inLine +BABEL_BP_106_75151_20120405_172457_inLine +BABEL_BP_106_75243_20120314_181814_inLine +BABEL_BP_106_75243_20120314_193719_inLine +BABEL_BP_106_75243_20120314_194814_inLine +BABEL_BP_106_75740_20120128_205720_inLine +BABEL_BP_106_75740_20120128_205720_outLine +BABEL_BP_106_75919_20120419_222309_inLine +BABEL_BP_106_75932_20120301_185217_inLine +BABEL_BP_106_75932_20120301_185217_outLine +BABEL_BP_106_76252_20120318_131223_outLine +BABEL_BP_106_76320_20120317_171522_inLine +BABEL_BP_106_76451_20120128_174820_inLine +BABEL_BP_106_76451_20120128_174820_outLine +BABEL_BP_106_76733_20120317_205542_inLine +BABEL_BP_106_76733_20120317_205542_outLine +BABEL_BP_106_76748_20120313_033301_inLine +BABEL_BP_106_76748_20120313_033301_outLine +BABEL_BP_106_76919_20120301_013753_outLine +BABEL_BP_106_76989_20120212_225118_inLine +BABEL_BP_106_76993_20120227_180157_inLine +BABEL_BP_106_76993_20120227_180157_outLine +BABEL_BP_106_77104_20120320_000526_inLine +BABEL_BP_106_77244_20120317_154534_inLine +BABEL_BP_106_77244_20120317_160037_inLine +BABEL_BP_106_77315_20120227_153127_outLine +BABEL_BP_106_77342_20120224_193702_inLine +BABEL_BP_106_77342_20120224_193702_outLine +BABEL_BP_106_77342_20120224_201725_inLine +BABEL_BP_106_77342_20120224_201725_outLine +BABEL_BP_106_77487_20120310_023017_outLine +BABEL_BP_106_77584_20120228_185654_inLine +BABEL_BP_106_77584_20120228_185654_outLine +BABEL_BP_106_78094_20120127_144526_inLine +BABEL_BP_106_78406_20120331_145033_inLine +BABEL_BP_106_78406_20120331_145857_inLine +BABEL_BP_106_78516_20120324_012547_inLine +BABEL_BP_106_78516_20120324_012547_outLine +BABEL_BP_106_78617_20120325_220217_inLine +BABEL_BP_106_78617_20120325_220217_outLine +BABEL_BP_106_78617_20120327_225421_inLine +BABEL_BP_106_78617_20120327_225421_outLine +BABEL_BP_106_78753_20120511_020629_inLine +BABEL_BP_106_79284_20120317_154901_inLine +BABEL_BP_106_79284_20120317_190801_inLine +BABEL_BP_106_79519_20120407_175434_inLine +BABEL_BP_106_79526_20120418_000428_inLine +BABEL_BP_106_79593_20120528_180841_inLine +BABEL_BP_106_79650_20120410_220151_outLine +BABEL_BP_106_79650_20120410_221127_outLine +BABEL_BP_106_79970_20120420_164617_inLine +BABEL_BP_106_80068_20120414_213628_inLine +BABEL_BP_106_80075_20120605_113236_inLine +BABEL_BP_106_80150_20120229_200345_inLine +BABEL_BP_106_80150_20120229_200345_outLine +BABEL_BP_106_80174_20120422_023124_inLine +BABEL_BP_106_80290_20120311_231738_inLine +BABEL_BP_106_80290_20120311_231738_outLine +BABEL_BP_106_80290_20120311_234143_inLine +BABEL_BP_106_80290_20120311_234143_outLine +BABEL_BP_106_80535_20120319_003708_inLine +BABEL_BP_106_80598_20120416_002228_inLine +BABEL_BP_106_80638_20120411_224029_inLine +BABEL_BP_106_80701_20120315_153813_inLine +BABEL_BP_106_81065_20120603_120830_inLine +BABEL_BP_106_81096_20120604_122742_inLine +BABEL_BP_106_81119_20120417_201549_inLine +BABEL_BP_106_81601_20120205_223405_inLine +BABEL_BP_106_81601_20120205_223405_outLine +BABEL_BP_106_81642_20120218_232158_inLine +BABEL_BP_106_81642_20120218_232158_outLine +BABEL_BP_106_81647_20120304_131330_inLine +BABEL_BP_106_81647_20120304_131330_outLine +BABEL_BP_106_81769_20120330_213453_outLine +BABEL_BP_106_81799_20120220_212705_inLine +BABEL_BP_106_81799_20120220_212705_outLine +BABEL_BP_106_81820_20120329_190503_inLine +BABEL_BP_106_81844_20120226_002405_inLine +BABEL_BP_106_81844_20120226_002405_outLine +BABEL_BP_106_81944_20120404_120724_inLine +BABEL_BP_106_81944_20120404_120724_outLine +BABEL_BP_106_82409_20120227_002253_inLine +BABEL_BP_106_82409_20120227_002253_outLine +BABEL_BP_106_82443_20120315_182456_outLine +BABEL_BP_106_82484_20120210_215502_outLine +BABEL_BP_106_82766_20120211_165522_inLine +BABEL_BP_106_82766_20120211_165522_outLine +BABEL_BP_106_83186_20120227_203823_inLine +BABEL_BP_106_83186_20120227_203823_outLine +BABEL_BP_106_83531_20120210_162513_inLine +BABEL_BP_106_83531_20120210_162513_outLine +BABEL_BP_106_83634_20120317_172215_outLine +BABEL_BP_106_83702_20120212_030118_inLine +BABEL_BP_106_83702_20120212_030118_outLine +BABEL_BP_106_83921_20120227_141419_inLine +BABEL_BP_106_83921_20120227_141419_outLine +BABEL_BP_106_84025_20120307_171246_inLine +BABEL_BP_106_84025_20120307_171246_outLine +BABEL_BP_106_84171_20120317_135204_inLine +BABEL_BP_106_84171_20120317_135204_outLine +BABEL_BP_106_84284_20120304_161058_inLine +BABEL_BP_106_84284_20120304_161058_outLine +BABEL_BP_106_84394_20120226_172149_inLine +BABEL_BP_106_84394_20120226_172149_outLine +BABEL_BP_106_84488_20120329_015848_inLine +BABEL_BP_106_84488_20120329_015848_outLine +BABEL_BP_106_84608_20120304_145910_inLine +BABEL_BP_106_84608_20120304_145910_outLine +BABEL_BP_106_84700_20120308_185454_inLine +BABEL_BP_106_84700_20120308_185454_outLine +BABEL_BP_106_84756_20120324_004957_inLine +BABEL_BP_106_84756_20120324_004957_outLine +BABEL_BP_106_84779_20120313_035600_inLine +BABEL_BP_106_84779_20120313_041105_inLine +BABEL_BP_106_84779_20120313_041106_outLine +BABEL_BP_106_84980_20120227_014019_inLine +BABEL_BP_106_84980_20120227_014019_outLine +BABEL_BP_106_85101_20120401_193440_inLine +BABEL_BP_106_85101_20120401_193440_outLine +BABEL_BP_106_85533_20120130_235957_inLine +BABEL_BP_106_85533_20120130_235957_outLine +BABEL_BP_106_85752_20120301_023900_inLine +BABEL_BP_106_85752_20120301_023900_outLine +BABEL_BP_106_86014_20120605_153510_inLine +BABEL_BP_106_86016_20120211_173225_inLine +BABEL_BP_106_86016_20120211_173225_outLine +BABEL_BP_106_86029_20120130_001526_inLine +BABEL_BP_106_86029_20120130_001526_outLine +BABEL_BP_106_86337_20120411_130915_inLine +BABEL_BP_106_86344_20120323_230601_outLine +BABEL_BP_106_86344_20120323_231804_inLine +BABEL_BP_106_86344_20120323_231804_outLine +BABEL_BP_106_86344_20120323_232835_inLine +BABEL_BP_106_86344_20120323_232835_outLine +BABEL_BP_106_87124_20120411_050315_inLine +BABEL_BP_106_87124_20120411_050315_outLine +BABEL_BP_106_87139_20120227_175141_inLine +BABEL_BP_106_87139_20120227_175141_outLine +BABEL_BP_106_87210_20120212_183156_inLine +BABEL_BP_106_87210_20120212_183156_outLine +BABEL_BP_106_87218_20120501_004341_inLine +BABEL_BP_106_87281_20120318_175101_inLine +BABEL_BP_106_87281_20120318_175101_outLine +BABEL_BP_106_87520_20120428_013320_inLine +BABEL_BP_106_87520_20120428_014139_inLine +BABEL_BP_106_87539_20120228_005220_inLine +BABEL_BP_106_87539_20120228_005220_outLine +BABEL_BP_106_87564_20120225_141938_inLine +BABEL_BP_106_87564_20120225_141938_outLine +BABEL_BP_106_87607_20120221_144252_inLine +BABEL_BP_106_87607_20120221_150220_inLine +BABEL_BP_106_87607_20120221_153642_inLine +BABEL_BP_106_87634_20120203_031511_inLine +BABEL_BP_106_87634_20120203_031511_outLine +BABEL_BP_106_87850_20120212_182620_inLine +BABEL_BP_106_87850_20120212_184930_inLine +BABEL_BP_106_87850_20120212_190826_inLine +BABEL_BP_106_87862_20120224_185514_outLine +BABEL_BP_106_87985_20120328_214048_inLine +BABEL_BP_106_87985_20120328_214048_outLine +BABEL_BP_106_88245_20120309_175128_inLine +BABEL_BP_106_88385_20120307_191827_inLine +BABEL_BP_106_88385_20120307_191827_outLine +BABEL_BP_106_88506_20120411_195636_inLine +BABEL_BP_106_88929_20120421_132840_inLine +BABEL_BP_106_88929_20120421_134445_inLine +BABEL_BP_106_89301_20120229_011855_inLine +BABEL_BP_106_89301_20120229_011855_outLine +BABEL_BP_106_89301_20120229_012853_inLine +BABEL_BP_106_89301_20120229_012853_outLine +BABEL_BP_106_89417_20120131_014042_outLine +BABEL_BP_106_89583_20120304_211628_outLine +BABEL_BP_106_89583_20120304_214338_outLine +BABEL_BP_106_89727_20120404_165020_inLine +BABEL_BP_106_90127_20120228_034539_inLine +BABEL_BP_106_90202_20120311_205142_outLine +BABEL_BP_106_90389_20120318_150647_inLine +BABEL_BP_106_90389_20120318_151932_inLine +BABEL_BP_106_90393_20120211_230839_inLine +BABEL_BP_106_90393_20120211_230839_outLine +BABEL_BP_106_90436_20120228_014236_inLine +BABEL_BP_106_90436_20120228_014236_outLine +BABEL_BP_106_90490_20120322_030219_inLine +BABEL_BP_106_90490_20120322_033415_inLine +BABEL_BP_106_90506_20120128_002109_inLine +BABEL_BP_106_90506_20120128_002109_outLine +BABEL_BP_106_90511_20120210_164822_inLine +BABEL_BP_106_90511_20120210_164822_outLine +BABEL_BP_106_90742_20120501_022105_inLine +BABEL_BP_106_90742_20120501_022837_inLine +BABEL_BP_106_90951_20120302_230530_inLine +BABEL_BP_106_90951_20120302_232555_inLine +BABEL_BP_106_91000_20120311_230040_inLine +BABEL_BP_106_91000_20120311_230040_outLine +BABEL_BP_106_91000_20120311_231020_inLine +BABEL_BP_106_91000_20120311_231020_outLine +BABEL_BP_106_91002_20120411_201622_inLine +BABEL_BP_106_91143_20120413_234122_inLine +BABEL_BP_106_91358_20120312_180740_inLine +BABEL_BP_106_91401_20120131_014626_inLine +BABEL_BP_106_91401_20120131_014627_outLine +BABEL_BP_106_91481_20120303_192948_inLine +BABEL_BP_106_91481_20120303_202847_inLine +BABEL_BP_106_91583_20120415_170849_inLine +BABEL_BP_106_91583_20120415_172901_inLine +BABEL_BP_106_91660_20120307_172116_inLine +BABEL_BP_106_91660_20120307_172116_outLine +BABEL_BP_106_91668_20120312_231623_inLine +BABEL_BP_106_91668_20120312_234357_inLine +BABEL_BP_106_91687_20120530_211936_inLine +BABEL_BP_106_91703_20120301_180553_inLine +BABEL_BP_106_91703_20120301_180553_outLine +BABEL_BP_106_91865_20120227_132028_inLine +BABEL_BP_106_91865_20120227_132028_outLine +BABEL_BP_106_91905_20120225_044624_inLine +BABEL_BP_106_91905_20120225_044624_outLine +BABEL_BP_106_91975_20120318_202137_inLine +BABEL_BP_106_92002_20120301_010732_inLine +BABEL_BP_106_92346_20120410_232631_inLine +BABEL_BP_106_92346_20120410_234651_inLine +BABEL_BP_106_92591_20120301_232554_outLine +BABEL_BP_106_92642_20120214_041746_inLine +BABEL_BP_106_92642_20120214_041746_outLine +BABEL_BP_106_93044_20120405_184614_inLine +BABEL_BP_106_93169_20120126_190053_inLine +BABEL_BP_106_93237_20120412_202749_inLine +BABEL_BP_106_93268_20120316_173016_inLine +BABEL_BP_106_93268_20120316_185049_inLine +BABEL_BP_106_93277_20120314_162508_inLine +BABEL_BP_106_93277_20120314_162508_outLine +BABEL_BP_106_93302_20120530_221003_inLine +BABEL_BP_106_93326_20120329_003409_inLine +BABEL_BP_106_93326_20120329_003409_outLine +BABEL_BP_106_93436_20120314_172420_inLine +BABEL_BP_106_93506_20120501_114215_outLine +BABEL_BP_106_93607_20120304_213723_outLine +BABEL_BP_106_93811_20120419_004934_inLine +BABEL_BP_106_94126_20120331_143958_inLine +BABEL_BP_106_94126_20120331_143958_outLine +BABEL_BP_106_94162_20120418_180628_inLine +BABEL_BP_106_94223_20120219_180504_inLine +BABEL_BP_106_94223_20120219_185026_inLine +BABEL_BP_106_94223_20120219_191721_inLine +BABEL_BP_106_94223_20120219_194907_inLine +BABEL_BP_106_94514_20120225_190925_inLine +BABEL_BP_106_94514_20120225_190925_outLine +BABEL_BP_106_94514_20120225_192755_inLine +BABEL_BP_106_94514_20120225_192755_outLine +BABEL_BP_106_94694_20120315_140425_inLine +BABEL_BP_106_94694_20120315_140425_outLine +BABEL_BP_106_94814_20120211_015600_inLine +BABEL_BP_106_95034_20120222_020622_inLine +BABEL_BP_106_95034_20120222_020622_outLine +BABEL_BP_106_95120_20120226_174855_inLine +BABEL_BP_106_95120_20120226_174855_outLine +BABEL_BP_106_95121_20120314_000217_inLine +BABEL_BP_106_95121_20120314_000217_outLine +BABEL_BP_106_95325_20120225_072841_inLine +BABEL_BP_106_95325_20120225_072841_outLine +BABEL_BP_106_95514_20120404_160802_inLine +BABEL_BP_106_95533_20120227_180819_inLine +BABEL_BP_106_95572_20120501_120940_outLine +BABEL_BP_106_95628_20120331_180349_inLine +BABEL_BP_106_95628_20120331_180349_outLine +BABEL_BP_106_95650_20120203_023309_inLine +BABEL_BP_106_95650_20120203_023309_outLine +BABEL_BP_106_95791_20120323_213855_inLine +BABEL_BP_106_95849_20120317_111924_inLine +BABEL_BP_106_95849_20120317_112530_inLine +BABEL_BP_106_95849_20120317_114235_inLine +BABEL_BP_106_95893_20120304_185606_inLine +BABEL_BP_106_95952_20120413_203735_inLine +BABEL_BP_106_95952_20120413_203735_outLine +BABEL_BP_106_95952_20120413_204700_inLine +BABEL_BP_106_95952_20120413_204700_outLine +BABEL_BP_106_96108_20120306_154946_inLine +BABEL_BP_106_96108_20120306_154946_outLine +BABEL_BP_106_96302_20120317_192957_inLine +BABEL_BP_106_96302_20120317_193605_inLine +BABEL_BP_106_96302_20120317_195426_inLine +BABEL_BP_106_96302_20120317_195426_outLine +BABEL_BP_106_96425_20120314_181621_inLine +BABEL_BP_106_96425_20120314_183006_inLine +BABEL_BP_106_96463_20120304_141645_inLine +BABEL_BP_106_96630_20120204_003252_inLine +BABEL_BP_106_96630_20120204_003252_outLine +BABEL_BP_106_96636_20120605_122128_inLine +BABEL_BP_106_96717_20120331_232633_outLine +BABEL_BP_106_96922_20120331_201147_outLine +BABEL_BP_106_97486_20120214_032248_inLine +BABEL_BP_106_97486_20120214_032248_outLine +BABEL_BP_106_97486_20120214_035344_inLine +BABEL_BP_106_97486_20120214_035344_outLine +BABEL_BP_106_97635_20120330_174657_inLine +BABEL_BP_106_97649_20120312_212246_inLine +BABEL_BP_106_97649_20120312_213707_inLine +BABEL_BP_106_97699_20120317_154627_outLine +BABEL_BP_106_98279_20120228_221829_inLine +BABEL_BP_106_98279_20120228_222916_inLine +BABEL_BP_106_98279_20120228_224103_inLine +BABEL_BP_106_98402_20120311_211004_inLine +BABEL_BP_106_98402_20120311_211004_outLine +BABEL_BP_106_98465_20120408_005224_inLine +BABEL_BP_106_98465_20120408_005224_outLine +BABEL_BP_106_98476_20120301_235152_inLine +BABEL_BP_106_98476_20120301_235152_outLine +BABEL_BP_106_98675_20120418_012454_inLine +BABEL_BP_106_98807_20120331_181706_inLine +BABEL_BP_106_98807_20120331_181706_outLine +BABEL_BP_106_98807_20120331_182345_inLine +BABEL_BP_106_98807_20120331_182345_outLine +BABEL_BP_106_98807_20120331_183121_inLine +BABEL_BP_106_98807_20120331_183121_outLine +BABEL_BP_106_99012_20120419_224750_inLine +BABEL_BP_106_99697_20120229_185303_inLine +BABEL_BP_106_99856_20120226_184042_inLine +BABEL_BP_106_99856_20120226_184042_outLine +BABEL_BP_106_99856_20120226_191212_inLine +BABEL_BP_106_99856_20120226_191212_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list b/egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list new file mode 100644 index 00000000000..fee3e3adbaf --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list @@ -0,0 +1,134 @@ +BABEL_BP_106_03420_20120409_204941_inLine +BABEL_BP_106_03420_20120409_204941_outLine +BABEL_BP_106_03420_20120409_211810_inLine +BABEL_BP_106_03420_20120409_211811_outLine +BABEL_BP_106_10985_20120313_013835_inLine +BABEL_BP_106_10985_20120313_013835_outLine +BABEL_BP_106_11158_20120314_183907_inLine +BABEL_BP_106_11158_20120314_183907_outLine +BABEL_BP_106_11158_20120314_193006_inLine +BABEL_BP_106_11158_20120314_193006_outLine +BABEL_BP_106_12248_20120304_225237_inLine +BABEL_BP_106_13071_20120315_000734_inLine +BABEL_BP_106_13071_20120315_000734_outLine +BABEL_BP_106_13071_20120315_001539_inLine +BABEL_BP_106_13071_20120315_001539_outLine +BABEL_BP_106_16406_20120309_161540_inLine +BABEL_BP_106_19867_20120428_022912_inLine +BABEL_BP_106_20320_20120206_212251_inLine +BABEL_BP_106_20320_20120206_212251_outLine +BABEL_BP_106_20740_20120229_234935_inLine +BABEL_BP_106_22910_20120129_213616_inLine +BABEL_BP_106_22910_20120129_213616_outLine +BABEL_BP_106_23571_20120229_180344_inLine +BABEL_BP_106_23571_20120229_180344_outLine +BABEL_BP_106_23878_20120209_170350_inLine +BABEL_BP_106_23878_20120209_170350_outLine +BABEL_BP_106_25751_20120227_221828_inLine +BABEL_BP_106_25751_20120227_221828_outLine +BABEL_BP_106_25866_20120304_181012_inLine +BABEL_BP_106_25866_20120304_181012_outLine +BABEL_BP_106_27916_20120403_232720_inLine +BABEL_BP_106_27916_20120403_232720_outLine +BABEL_BP_106_27916_20120403_233612_inLine +BABEL_BP_106_27916_20120403_233612_outLine +BABEL_BP_106_30818_20120503_004014_inLine +BABEL_BP_106_31265_20120311_235253_inLine +BABEL_BP_106_31265_20120311_235253_outLine +BABEL_BP_106_32400_20120307_235432_inLine +BABEL_BP_106_32400_20120307_235432_outLine +BABEL_BP_106_32890_20120221_193416_inLine +BABEL_BP_106_32890_20120221_193417_outLine +BABEL_BP_106_33742_20120229_020923_inLine +BABEL_BP_106_33742_20120229_020923_outLine +BABEL_BP_106_34480_20120405_141959_inLine +BABEL_BP_106_34961_20120130_011357_inLine +BABEL_BP_106_34961_20120130_011357_outLine +BABEL_BP_106_35179_20120225_063734_inLine +BABEL_BP_106_35179_20120225_063734_outLine +BABEL_BP_106_35706_20120501_011424_inLine +BABEL_BP_106_36268_20120209_180615_inLine +BABEL_BP_106_36268_20120209_180615_outLine +BABEL_BP_106_38640_20120130_174518_inLine +BABEL_BP_106_38640_20120130_174518_outLine +BABEL_BP_106_38956_20120127_010500_inLine +BABEL_BP_106_38956_20120127_010500_outLine +BABEL_BP_106_40510_20120221_155613_inLine +BABEL_BP_106_40510_20120221_155613_outLine +BABEL_BP_106_40680_20120511_153305_inLine +BABEL_BP_106_45453_20120404_225631_inLine +BABEL_BP_106_45453_20120404_225631_outLine +BABEL_BP_106_46603_20120227_192836_inLine +BABEL_BP_106_46603_20120227_192836_outLine +BABEL_BP_106_47429_20120329_195737_inLine +BABEL_BP_106_47429_20120329_195737_outLine +BABEL_BP_106_48188_20120307_034039_outLine +BABEL_BP_106_49624_20120224_194049_inLine +BABEL_BP_106_49624_20120224_194049_outLine +BABEL_BP_106_49689_20120225_152557_outLine +BABEL_BP_106_49689_20120225_153748_outLine +BABEL_BP_106_49714_20120227_191755_inLine +BABEL_BP_106_49714_20120227_191755_outLine +BABEL_BP_106_50409_20120319_185818_inLine +BABEL_BP_106_50409_20120319_185818_outLine +BABEL_BP_106_51149_20120329_174521_inLine +BABEL_BP_106_51149_20120329_174521_outLine +BABEL_BP_106_52366_20120124_164406_inLine +BABEL_BP_106_52366_20120124_164406_outLine +BABEL_BP_106_53315_20120329_182550_inLine +BABEL_BP_106_53315_20120329_182550_outLine +BABEL_BP_106_53376_20120323_000750_inLine +BABEL_BP_106_53376_20120323_000750_outLine +BABEL_BP_106_55823_20120329_210142_inLine +BABEL_BP_106_55823_20120329_210142_outLine +BABEL_BP_106_55922_20120322_021453_inLine +BABEL_BP_106_55922_20120322_021453_outLine +BABEL_BP_106_55922_20120322_022537_inLine +BABEL_BP_106_55922_20120322_022537_outLine +BABEL_BP_106_58192_20120308_182924_inLine +BABEL_BP_106_58192_20120308_182924_outLine +BABEL_BP_106_60598_20120324_022730_inLine +BABEL_BP_106_60598_20120324_022730_outLine +BABEL_BP_106_63305_20120324_030221_inLine +BABEL_BP_106_63305_20120324_030221_outLine +BABEL_BP_106_65248_20120321_230954_inLine +BABEL_BP_106_65248_20120321_230954_outLine +BABEL_BP_106_68392_20120331_224408_inLine +BABEL_BP_106_68392_20120331_224408_outLine +BABEL_BP_106_68610_20120505_011125_inLine +BABEL_BP_106_70285_20120128_211036_outLine +BABEL_BP_106_71310_20120221_005007_inLine +BABEL_BP_106_71310_20120221_005007_outLine +BABEL_BP_106_75036_20120224_163823_inLine +BABEL_BP_106_75036_20120224_163823_outLine +BABEL_BP_106_75932_20120301_185217_inLine +BABEL_BP_106_75932_20120301_185217_outLine +BABEL_BP_106_76252_20120318_131223_outLine +BABEL_BP_106_79519_20120407_175434_inLine +BABEL_BP_106_80174_20120422_023124_inLine +BABEL_BP_106_81944_20120404_120724_inLine +BABEL_BP_106_81944_20120404_120724_outLine +BABEL_BP_106_82766_20120211_165522_inLine +BABEL_BP_106_82766_20120211_165522_outLine +BABEL_BP_106_86014_20120605_153510_inLine +BABEL_BP_106_87210_20120212_183156_inLine +BABEL_BP_106_87210_20120212_183156_outLine +BABEL_BP_106_89417_20120131_014042_inLine +BABEL_BP_106_89417_20120131_014042_outLine +BABEL_BP_106_89727_20120404_165020_inLine +BABEL_BP_106_91000_20120311_230040_inLine +BABEL_BP_106_91000_20120311_230040_outLine +BABEL_BP_106_91000_20120311_231020_inLine +BABEL_BP_106_91000_20120311_231020_outLine +BABEL_BP_106_91401_20120131_014626_inLine +BABEL_BP_106_91401_20120131_014627_outLine +BABEL_BP_106_91905_20120225_044624_inLine +BABEL_BP_106_91905_20120225_044624_outLine +BABEL_BP_106_93169_20120126_190053_inLine +BABEL_BP_106_94814_20120211_015600_inLine +BABEL_BP_106_95034_20120222_020622_inLine +BABEL_BP_106_95034_20120222_020622_outLine +BABEL_BP_106_96630_20120204_003252_inLine +BABEL_BP_106_96630_20120204_003252_outLine +BABEL_BP_106_98465_20120408_005224_inLine +BABEL_BP_106_98465_20120408_005224_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/dev.list b/egs/babel/s5d/conf/lists/107-vietnamese/dev.list new file mode 100644 index 00000000000..f44c76db308 --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/dev.list @@ -0,0 +1,132 @@ +BABEL_BP_107_11031_20120617_182613_inLine +BABEL_BP_107_11031_20120617_182613_outLine +BABEL_BP_107_12120_20120704_024505_inLine +BABEL_BP_107_12120_20120704_024505_outLine +BABEL_BP_107_12248_20120614_183345_inLine +BABEL_BP_107_12248_20120614_183345_outLine +BABEL_BP_107_12963_20120509_002346_inLine +BABEL_BP_107_12963_20120509_002346_outLine +BABEL_BP_107_12963_20120509_003852_inLine +BABEL_BP_107_12963_20120509_003852_outLine +BABEL_BP_107_13476_20120428_003452_inLine +BABEL_BP_107_13476_20120428_003452_outLine +BABEL_BP_107_14610_20120218_201908_inLine +BABEL_BP_107_14610_20120218_201908_outLine +BABEL_BP_107_14769_20120420_013147_inLine +BABEL_BP_107_14769_20120420_013147_outLine +BABEL_BP_107_14997_20120406_190013_inLine +BABEL_BP_107_14997_20120406_190013_outLine +BABEL_BP_107_14997_20120406_191102_inLine +BABEL_BP_107_14997_20120406_191102_outLine +BABEL_BP_107_15493_20120617_120952_inLine +BABEL_BP_107_15493_20120617_120952_outLine +BABEL_BP_107_15502_20120627_124423_inLine +BABEL_BP_107_15502_20120627_124423_outLine +BABEL_BP_107_16167_20120215_213113_inLine +BABEL_BP_107_16167_20120215_213113_outLine +BABEL_BP_107_18730_20120222_145916_inLine +BABEL_BP_107_18730_20120222_145916_outLine +BABEL_BP_107_19619_20120215_221131_inLine +BABEL_BP_107_19619_20120215_221131_outLine +BABEL_BP_107_19619_20120215_223011_inLine +BABEL_BP_107_19619_20120215_223011_outLine +BABEL_BP_107_21489_20120608_123945_inLine +BABEL_BP_107_21489_20120608_123945_outLine +BABEL_BP_107_26644_20120509_013405_inLine +BABEL_BP_107_26644_20120509_013405_outLine +BABEL_BP_107_28161_20120322_171027_inLine +BABEL_BP_107_28161_20120322_171027_outLine +BABEL_BP_107_28648_20120506_223200_inLine +BABEL_BP_107_28648_20120506_223200_outLine +BABEL_BP_107_29168_20120321_215013_inLine +BABEL_BP_107_29168_20120321_215013_outLine +BABEL_BP_107_31538_20120320_202748_inLine +BABEL_BP_107_31538_20120320_202748_outLine +BABEL_BP_107_32120_20120704_182238_inLine +BABEL_BP_107_32120_20120704_182238_outLine +BABEL_BP_107_32236_20120505_195420_inLine +BABEL_BP_107_32236_20120505_195420_outLine +BABEL_BP_107_33704_20120416_005402_inLine +BABEL_BP_107_33704_20120416_005402_outLine +BABEL_BP_107_35391_20120416_192241_inLine +BABEL_BP_107_35391_20120416_192241_outLine +BABEL_BP_107_35441_20120421_221245_inLine +BABEL_BP_107_35441_20120421_221245_outLine +BABEL_BP_107_39080_20120415_141817_inLine +BABEL_BP_107_39080_20120415_141817_outLine +BABEL_BP_107_39140_20120409_163031_inLine +BABEL_BP_107_39140_20120409_163031_outLine +BABEL_BP_107_39997_20120516_214034_inLine +BABEL_BP_107_39997_20120516_214035_outLine +BABEL_BP_107_41456_20120421_133628_inLine +BABEL_BP_107_41456_20120421_133628_outLine +BABEL_BP_107_41661_20120329_022249_inLine +BABEL_BP_107_41661_20120329_022249_outLine +BABEL_BP_107_41661_20120329_023848_inLine +BABEL_BP_107_41661_20120329_023848_outLine +BABEL_BP_107_43086_20120210_015927_inLine +BABEL_BP_107_43086_20120210_015927_outLine +BABEL_BP_107_45512_20120505_135144_inLine +BABEL_BP_107_45512_20120505_135144_outLine +BABEL_BP_107_45677_20120428_184714_inLine +BABEL_BP_107_45677_20120428_184714_outLine +BABEL_BP_107_47037_20120415_210047_inLine +BABEL_BP_107_47037_20120415_210047_outLine +BABEL_BP_107_54285_20120430_233928_inLine +BABEL_BP_107_54285_20120430_233928_outLine +BABEL_BP_107_56812_20120502_123725_inLine +BABEL_BP_107_56812_20120502_123725_outLine +BABEL_BP_107_57020_20120427_011940_inLine +BABEL_BP_107_57020_20120427_011940_outLine +BABEL_BP_107_57976_20120704_183740_inLine +BABEL_BP_107_57976_20120704_183740_outLine +BABEL_BP_107_59868_20120324_013729_inLine +BABEL_BP_107_59868_20120324_013729_outLine +BABEL_BP_107_59868_20120324_015118_inLine +BABEL_BP_107_59868_20120324_015118_outLine +BABEL_BP_107_59891_20120504_013809_inLine +BABEL_BP_107_59891_20120504_013809_outLine +BABEL_BP_107_63459_20120415_003841_inLine +BABEL_BP_107_63459_20120415_003841_outLine +BABEL_BP_107_65606_20120416_004652_inLine +BABEL_BP_107_65606_20120416_004652_outLine +BABEL_BP_107_70625_20120426_235142_inLine +BABEL_BP_107_70625_20120426_235142_outLine +BABEL_BP_107_71178_20120617_184313_inLine +BABEL_BP_107_71178_20120617_184313_outLine +BABEL_BP_107_73542_20120209_010311_inLine +BABEL_BP_107_73542_20120209_010311_outLine +BABEL_BP_107_75990_20120408_211713_inLine +BABEL_BP_107_75990_20120408_211713_outLine +BABEL_BP_107_76320_20120504_123902_inLine +BABEL_BP_107_76320_20120504_123902_outLine +BABEL_BP_107_77697_20120416_235254_inLine +BABEL_BP_107_77697_20120416_235254_outLine +BABEL_BP_107_77771_20120421_231323_inLine +BABEL_BP_107_77771_20120421_231323_outLine +BABEL_BP_107_79412_20120322_174955_inLine +BABEL_BP_107_79412_20120322_174955_outLine +BABEL_BP_107_79526_20120420_150504_inLine +BABEL_BP_107_79526_20120420_150504_outLine +BABEL_BP_107_83219_20120421_172919_inLine +BABEL_BP_107_83219_20120421_172919_outLine +BABEL_BP_107_85204_20120212_190017_inLine +BABEL_BP_107_85204_20120212_190017_outLine +BABEL_BP_107_86259_20120507_015816_inLine +BABEL_BP_107_86259_20120507_015816_outLine +BABEL_BP_107_87850_20120406_210353_inLine +BABEL_BP_107_87850_20120406_210354_outLine +BABEL_BP_107_88383_20120627_125444_inLine +BABEL_BP_107_88383_20120627_125444_outLine +BABEL_BP_107_88383_20120627_130611_inLine +BABEL_BP_107_88383_20120627_130611_outLine +BABEL_BP_107_89838_20120212_205650_inLine +BABEL_BP_107_89838_20120212_205650_outLine +BABEL_BP_107_90202_20120502_194459_inLine +BABEL_BP_107_90202_20120502_194459_outLine +BABEL_BP_107_92386_20120322_195456_inLine +BABEL_BP_107_92386_20120322_195456_outLine +BABEL_BP_107_96283_20120503_162149_inLine +BABEL_BP_107_96283_20120503_162149_outLine +BABEL_BP_107_97760_20120614_184333_inLine +BABEL_BP_107_97760_20120614_184333_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/eval.list b/egs/babel/s5d/conf/lists/107-vietnamese/eval.list new file mode 100644 index 00000000000..9cc6f7875ed --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/eval.list @@ -0,0 +1,981 @@ +BABEL_BP_107_10170_20120703_230849_inLine +BABEL_BP_107_10170_20120703_230850_outLine +BABEL_BP_107_10170_20120703_231552_inLine +BABEL_BP_107_10170_20120703_231552_outLine +BABEL_BP_107_10187_20120218_202839_inLine +BABEL_BP_107_10187_20120218_202839_outLine +BABEL_BP_107_10408_20120704_021830_inLine +BABEL_BP_107_10408_20120704_021830_outLine +BABEL_BP_107_10470_20120608_135407_inLine +BABEL_BP_107_10470_20120608_135407_outLine +BABEL_BP_107_10925_20120209_233924_inLine +BABEL_BP_107_10925_20120209_233924_outLine +BABEL_BP_107_11004_20120329_040734_inLine +BABEL_BP_107_11004_20120329_040734_outLine +BABEL_BP_107_11152_20120418_221056_inLine +BABEL_BP_107_11152_20120418_221056_outLine +BABEL_BP_107_11203_20120415_212056_inLine +BABEL_BP_107_11203_20120415_212056_outLine +BABEL_BP_107_11824_20120413_213002_inLine +BABEL_BP_107_11824_20120413_213002_outLine +BABEL_BP_107_12535_20120614_190306_inLine +BABEL_BP_107_12535_20120614_190306_outLine +BABEL_BP_107_12667_20120514_195317_inLine +BABEL_BP_107_12667_20120514_195317_outLine +BABEL_BP_107_12700_20120608_010254_inLine +BABEL_BP_107_12700_20120608_010254_outLine +BABEL_BP_107_13118_20120419_193637_inLine +BABEL_BP_107_13118_20120419_193637_outLine +BABEL_BP_107_13441_20120412_212102_inLine +BABEL_BP_107_13441_20120412_212102_outLine +BABEL_BP_107_13709_20120406_164042_inLine +BABEL_BP_107_13709_20120406_164043_outLine +BABEL_BP_107_13913_20120415_144214_inLine +BABEL_BP_107_13913_20120415_144214_outLine +BABEL_BP_107_14389_20120617_164138_inLine +BABEL_BP_107_14389_20120617_164138_outLine +BABEL_BP_107_14874_20120320_190424_inLine +BABEL_BP_107_14874_20120320_190424_outLine +BABEL_BP_107_14874_20120320_192210_inLine +BABEL_BP_107_14874_20120320_192210_outLine +BABEL_BP_107_15022_20120418_133337_inLine +BABEL_BP_107_15022_20120418_133337_outLine +BABEL_BP_107_15234_20120509_180434_inLine +BABEL_BP_107_15234_20120509_180434_outLine +BABEL_BP_107_15859_20120419_133516_inLine +BABEL_BP_107_15859_20120419_133516_outLine +BABEL_BP_107_15916_20120426_132306_inLine +BABEL_BP_107_15916_20120426_132306_outLine +BABEL_BP_107_16266_20120211_215251_inLine +BABEL_BP_107_16266_20120211_215251_outLine +BABEL_BP_107_16299_20120220_135944_inLine +BABEL_BP_107_16299_20120220_135944_outLine +BABEL_BP_107_16346_20120423_192454_inLine +BABEL_BP_107_16346_20120423_192454_outLine +BABEL_BP_107_16984_20120414_193034_inLine +BABEL_BP_107_16984_20120414_193034_outLine +BABEL_BP_107_17080_20120212_150122_inLine +BABEL_BP_107_17080_20120212_150122_outLine +BABEL_BP_107_17207_20120507_023403_inLine +BABEL_BP_107_17207_20120507_023403_outLine +BABEL_BP_107_17511_20120419_232032_inLine +BABEL_BP_107_17511_20120419_232032_outLine +BABEL_BP_107_17572_20120613_040637_inLine +BABEL_BP_107_17572_20120613_040637_outLine +BABEL_BP_107_17850_20120615_234216_inLine +BABEL_BP_107_17850_20120615_234216_outLine +BABEL_BP_107_17900_20120323_015142_inLine +BABEL_BP_107_17900_20120323_015142_outLine +BABEL_BP_107_18672_20120426_150856_inLine +BABEL_BP_107_18672_20120426_150856_outLine +BABEL_BP_107_18980_20120608_125749_inLine +BABEL_BP_107_18980_20120608_125749_outLine +BABEL_BP_107_19147_20120212_161206_inLine +BABEL_BP_107_19147_20120212_161206_outLine +BABEL_BP_107_19456_20120704_165824_inLine +BABEL_BP_107_19456_20120704_165824_outLine +BABEL_BP_107_19656_20120325_230731_inLine +BABEL_BP_107_19656_20120325_230731_outLine +BABEL_BP_107_19861_20120511_013731_inLine +BABEL_BP_107_19861_20120511_013731_outLine +BABEL_BP_107_19861_20120511_014743_inLine +BABEL_BP_107_19861_20120511_014744_outLine +BABEL_BP_107_19915_20120218_150645_inLine +BABEL_BP_107_19915_20120218_150645_outLine +BABEL_BP_107_20408_20120323_142004_inLine +BABEL_BP_107_20408_20120323_142004_outLine +BABEL_BP_107_20408_20120323_143722_inLine +BABEL_BP_107_20408_20120323_143722_outLine +BABEL_BP_107_20471_20120328_020935_inLine +BABEL_BP_107_20471_20120328_020935_outLine +BABEL_BP_107_20546_20120323_215948_inLine +BABEL_BP_107_20546_20120323_215948_outLine +BABEL_BP_107_20685_20120222_210447_inLine +BABEL_BP_107_20685_20120222_210447_outLine +BABEL_BP_107_20775_20120502_214146_inLine +BABEL_BP_107_20775_20120502_214146_outLine +BABEL_BP_107_21714_20120608_140615_inLine +BABEL_BP_107_21714_20120608_140615_outLine +BABEL_BP_107_21782_20120321_191431_inLine +BABEL_BP_107_21782_20120321_191431_outLine +BABEL_BP_107_21845_20120613_195420_inLine +BABEL_BP_107_21845_20120613_195420_outLine +BABEL_BP_107_22179_20120220_172322_inLine +BABEL_BP_107_22179_20120220_172322_outLine +BABEL_BP_107_22351_20120413_231618_inLine +BABEL_BP_107_22351_20120413_231618_outLine +BABEL_BP_107_22408_20120416_180244_inLine +BABEL_BP_107_22408_20120416_180244_outLine +BABEL_BP_107_22537_20120322_214458_inLine +BABEL_BP_107_22537_20120322_214458_outLine +BABEL_BP_107_22566_20120704_023628_inLine +BABEL_BP_107_22566_20120704_023628_outLine +BABEL_BP_107_22973_20120503_231406_inLine +BABEL_BP_107_22973_20120503_231406_outLine +BABEL_BP_107_23168_20120618_113427_inLine +BABEL_BP_107_23168_20120618_113427_outLine +BABEL_BP_107_23336_20120429_192926_inLine +BABEL_BP_107_23336_20120429_192926_outLine +BABEL_BP_107_23352_20120425_211848_inLine +BABEL_BP_107_23352_20120425_211848_outLine +BABEL_BP_107_23995_20120418_194620_inLine +BABEL_BP_107_23995_20120418_194620_outLine +BABEL_BP_107_24379_20120422_173418_inLine +BABEL_BP_107_24379_20120422_173418_outLine +BABEL_BP_107_24431_20120215_202205_inLine +BABEL_BP_107_24431_20120215_202205_outLine +BABEL_BP_107_24580_20120420_011554_inLine +BABEL_BP_107_24580_20120420_011554_outLine +BABEL_BP_107_24589_20120508_183427_inLine +BABEL_BP_107_24589_20120508_183427_outLine +BABEL_BP_107_25021_20120131_214134_inLine +BABEL_BP_107_25021_20120131_214134_outLine +BABEL_BP_107_25502_20120217_005526_inLine +BABEL_BP_107_25502_20120217_005526_outLine +BABEL_BP_107_25735_20120608_134208_inLine +BABEL_BP_107_25735_20120608_134208_outLine +BABEL_BP_107_25871_20120422_181122_inLine +BABEL_BP_107_25871_20120422_181122_outLine +BABEL_BP_107_25904_20120509_000636_inLine +BABEL_BP_107_25904_20120509_000636_outLine +BABEL_BP_107_26164_20120705_014122_inLine +BABEL_BP_107_26164_20120705_014122_outLine +BABEL_BP_107_27178_20120324_021235_inLine +BABEL_BP_107_27178_20120324_021235_outLine +BABEL_BP_107_27349_20120321_195149_inLine +BABEL_BP_107_27349_20120321_195149_outLine +BABEL_BP_107_27605_20120329_015050_inLine +BABEL_BP_107_27605_20120329_015050_outLine +BABEL_BP_107_27645_20120501_005559_inLine +BABEL_BP_107_27645_20120501_005559_outLine +BABEL_BP_107_27824_20120418_211841_inLine +BABEL_BP_107_27824_20120418_211841_outLine +BABEL_BP_107_27825_20120418_230344_inLine +BABEL_BP_107_27825_20120418_230344_outLine +BABEL_BP_107_27825_20120418_231611_inLine +BABEL_BP_107_27825_20120418_231611_outLine +BABEL_BP_107_28754_20120417_233136_inLine +BABEL_BP_107_28754_20120417_233136_outLine +BABEL_BP_107_28768_20120607_134003_inLine +BABEL_BP_107_28768_20120607_134003_outLine +BABEL_BP_107_28768_20120607_135648_inLine +BABEL_BP_107_28768_20120607_135648_outLine +BABEL_BP_107_28990_20120421_150239_inLine +BABEL_BP_107_28990_20120421_150239_outLine +BABEL_BP_107_29087_20120511_023457_inLine +BABEL_BP_107_29087_20120511_023457_outLine +BABEL_BP_107_29097_20120120_174353_inLine +BABEL_BP_107_29097_20120120_174353_outLine +BABEL_BP_107_29133_20120212_223742_inLine +BABEL_BP_107_29133_20120212_223742_outLine +BABEL_BP_107_29259_20120418_213018_inLine +BABEL_BP_107_29259_20120418_213018_outLine +BABEL_BP_107_29328_20120208_021903_inLine +BABEL_BP_107_29328_20120208_021903_outLine +BABEL_BP_107_29421_20120501_121237_inLine +BABEL_BP_107_29421_20120501_121237_outLine +BABEL_BP_107_29512_20120426_133304_inLine +BABEL_BP_107_29512_20120426_133304_outLine +BABEL_BP_107_29545_20120704_025504_inLine +BABEL_BP_107_29545_20120704_025504_outLine +BABEL_BP_107_30530_20120210_191257_inLine +BABEL_BP_107_30530_20120210_191257_outLine +BABEL_BP_107_30642_20120424_124529_inLine +BABEL_BP_107_30642_20120424_124529_outLine +BABEL_BP_107_31256_20120424_173937_inLine +BABEL_BP_107_31256_20120424_173937_outLine +BABEL_BP_107_31345_20120501_200006_inLine +BABEL_BP_107_31345_20120501_200006_outLine +BABEL_BP_107_31441_20120322_221247_inLine +BABEL_BP_107_31441_20120322_221247_outLine +BABEL_BP_107_31678_20120323_003303_inLine +BABEL_BP_107_31678_20120323_003303_outLine +BABEL_BP_107_31841_20120420_173052_inLine +BABEL_BP_107_31841_20120420_173052_outLine +BABEL_BP_107_31841_20120420_175428_inLine +BABEL_BP_107_31841_20120420_175428_outLine +BABEL_BP_107_31975_20120418_213316_inLine +BABEL_BP_107_31975_20120418_213316_outLine +BABEL_BP_107_32045_20120627_135349_inLine +BABEL_BP_107_32045_20120627_135349_outLine +BABEL_BP_107_32263_20120415_125245_inLine +BABEL_BP_107_32263_20120415_125245_outLine +BABEL_BP_107_32452_20120417_025731_inLine +BABEL_BP_107_32452_20120417_025731_outLine +BABEL_BP_107_32562_20120502_183523_inLine +BABEL_BP_107_32562_20120502_183523_outLine +BABEL_BP_107_32642_20120507_162602_inLine +BABEL_BP_107_32642_20120507_162602_outLine +BABEL_BP_107_32818_20120505_124034_inLine +BABEL_BP_107_32818_20120505_124034_outLine +BABEL_BP_107_32830_20120217_010905_inLine +BABEL_BP_107_32830_20120217_010905_outLine +BABEL_BP_107_32962_20120417_002922_inLine +BABEL_BP_107_32962_20120417_002922_outLine +BABEL_BP_107_33243_20120417_000926_inLine +BABEL_BP_107_33243_20120417_000926_outLine +BABEL_BP_107_33527_20120415_192039_inLine +BABEL_BP_107_33527_20120415_192039_outLine +BABEL_BP_107_34169_20120328_012436_inLine +BABEL_BP_107_34169_20120328_012436_outLine +BABEL_BP_107_34194_20120218_004244_inLine +BABEL_BP_107_34194_20120218_004244_outLine +BABEL_BP_107_34248_20120704_190743_inLine +BABEL_BP_107_34248_20120704_190743_outLine +BABEL_BP_107_34357_20120608_192929_inLine +BABEL_BP_107_34357_20120608_192929_outLine +BABEL_BP_107_34439_20120514_155943_inLine +BABEL_BP_107_34439_20120514_155943_outLine +BABEL_BP_107_35064_20120609_183707_inLine +BABEL_BP_107_35064_20120609_183707_outLine +BABEL_BP_107_35576_20120618_004603_inLine +BABEL_BP_107_35576_20120618_004603_outLine +BABEL_BP_107_35612_20120424_221417_inLine +BABEL_BP_107_35612_20120424_221418_outLine +BABEL_BP_107_35896_20120426_160252_inLine +BABEL_BP_107_35896_20120426_160252_outLine +BABEL_BP_107_35932_20120321_221039_inLine +BABEL_BP_107_35932_20120321_221039_outLine +BABEL_BP_107_35951_20120415_161914_inLine +BABEL_BP_107_35951_20120415_161914_outLine +BABEL_BP_107_35972_20120510_232832_inLine +BABEL_BP_107_35972_20120510_232832_outLine +BABEL_BP_107_36143_20120217_012635_inLine +BABEL_BP_107_36143_20120217_012635_outLine +BABEL_BP_107_36143_20120217_175752_inLine +BABEL_BP_107_36143_20120217_175752_outLine +BABEL_BP_107_36155_20120421_014500_inLine +BABEL_BP_107_36155_20120421_014500_outLine +BABEL_BP_107_36868_20120426_234641_inLine +BABEL_BP_107_36868_20120426_234641_outLine +BABEL_BP_107_37094_20120208_155100_inLine +BABEL_BP_107_37094_20120208_155100_outLine +BABEL_BP_107_37185_20120608_122828_inLine +BABEL_BP_107_37185_20120608_122828_outLine +BABEL_BP_107_37203_20120409_183756_inLine +BABEL_BP_107_37203_20120409_183756_outLine +BABEL_BP_107_37260_20120509_024525_inLine +BABEL_BP_107_37260_20120509_024525_outLine +BABEL_BP_107_37348_20120506_234059_inLine +BABEL_BP_107_37348_20120506_234938_inLine +BABEL_BP_107_37348_20120507_000848_inLine +BABEL_BP_107_37348_20120507_000848_outLine +BABEL_BP_107_37766_20120608_155216_inLine +BABEL_BP_107_37766_20120608_155217_outLine +BABEL_BP_107_37784_20120509_195942_inLine +BABEL_BP_107_37784_20120509_195942_outLine +BABEL_BP_107_37842_20120513_023632_inLine +BABEL_BP_107_37842_20120513_023632_outLine +BABEL_BP_107_38635_20120424_231446_inLine +BABEL_BP_107_38635_20120424_231446_outLine +BABEL_BP_107_38863_20120614_173605_inLine +BABEL_BP_107_38863_20120614_173605_outLine +BABEL_BP_107_38863_20120614_174335_inLine +BABEL_BP_107_38863_20120614_174335_outLine +BABEL_BP_107_38863_20120614_175009_inLine +BABEL_BP_107_38863_20120614_175009_outLine +BABEL_BP_107_38985_20120506_223622_inLine +BABEL_BP_107_38985_20120506_223622_outLine +BABEL_BP_107_39098_20120324_231724_inLine +BABEL_BP_107_39098_20120324_231724_outLine +BABEL_BP_107_39098_20120324_232726_inLine +BABEL_BP_107_39098_20120324_232726_outLine +BABEL_BP_107_39114_20120614_184836_inLine +BABEL_BP_107_39114_20120614_184836_outLine +BABEL_BP_107_39287_20120611_013320_inLine +BABEL_BP_107_39287_20120611_013320_outLine +BABEL_BP_107_39809_20120216_013447_inLine +BABEL_BP_107_39809_20120216_013447_outLine +BABEL_BP_107_39889_20120325_135610_inLine +BABEL_BP_107_39889_20120325_135610_outLine +BABEL_BP_107_39963_20120323_223603_inLine +BABEL_BP_107_39963_20120323_223603_outLine +BABEL_BP_107_39968_20120609_221724_inLine +BABEL_BP_107_39968_20120609_221724_outLine +BABEL_BP_107_40040_20120506_220308_outLine +BABEL_BP_107_40168_20120420_180808_inLine +BABEL_BP_107_40168_20120420_180808_outLine +BABEL_BP_107_40197_20120504_174115_inLine +BABEL_BP_107_40197_20120504_174115_outLine +BABEL_BP_107_40809_20120627_194401_inLine +BABEL_BP_107_40809_20120627_194401_outLine +BABEL_BP_107_41075_20120416_005109_inLine +BABEL_BP_107_41075_20120416_005109_outLine +BABEL_BP_107_41512_20120704_113900_inLine +BABEL_BP_107_41512_20120704_113900_outLine +BABEL_BP_107_41561_20120704_233037_inLine +BABEL_BP_107_41561_20120704_233037_outLine +BABEL_BP_107_41686_20120217_004524_inLine +BABEL_BP_107_41686_20120217_004524_outLine +BABEL_BP_107_41733_20120429_210259_inLine +BABEL_BP_107_41733_20120429_210259_outLine +BABEL_BP_107_41949_20120430_155207_inLine +BABEL_BP_107_41949_20120430_155207_outLine +BABEL_BP_107_41989_20120321_185501_inLine +BABEL_BP_107_41989_20120321_185501_outLine +BABEL_BP_107_41989_20120321_190714_inLine +BABEL_BP_107_41989_20120321_190714_outLine +BABEL_BP_107_42212_20120704_203258_inLine +BABEL_BP_107_42212_20120704_203258_outLine +BABEL_BP_107_42229_20120216_204712_inLine +BABEL_BP_107_42229_20120216_204712_outLine +BABEL_BP_107_42420_20120705_031347_inLine +BABEL_BP_107_42420_20120705_031347_outLine +BABEL_BP_107_42768_20120503_180000_inLine +BABEL_BP_107_42768_20120503_180000_outLine +BABEL_BP_107_42788_20120421_142943_inLine +BABEL_BP_107_42788_20120421_142943_outLine +BABEL_BP_107_43317_20120510_000906_inLine +BABEL_BP_107_43317_20120510_000906_outLine +BABEL_BP_107_43383_20120404_222305_inLine +BABEL_BP_107_43383_20120404_222305_outLine +BABEL_BP_107_43991_20120429_013420_inLine +BABEL_BP_107_43991_20120429_013420_outLine +BABEL_BP_107_44023_20120430_233729_inLine +BABEL_BP_107_44023_20120430_233730_outLine +BABEL_BP_107_44038_20120704_200232_inLine +BABEL_BP_107_44038_20120704_200232_outLine +BABEL_BP_107_44117_20120704_023955_inLine +BABEL_BP_107_44117_20120704_023955_outLine +BABEL_BP_107_44209_20120418_205150_inLine +BABEL_BP_107_44209_20120418_205150_outLine +BABEL_BP_107_44500_20120421_220207_inLine +BABEL_BP_107_44500_20120421_220207_outLine +BABEL_BP_107_44649_20120429_012920_inLine +BABEL_BP_107_44649_20120429_012920_outLine +BABEL_BP_107_45106_20120118_183909_inLine +BABEL_BP_107_45106_20120118_183909_outLine +BABEL_BP_107_45145_20120215_141231_inLine +BABEL_BP_107_45145_20120215_141231_outLine +BABEL_BP_107_45214_20120418_132013_inLine +BABEL_BP_107_45214_20120418_132013_outLine +BABEL_BP_107_45472_20120210_160318_inLine +BABEL_BP_107_45472_20120210_160318_outLine +BABEL_BP_107_45642_20120211_232703_inLine +BABEL_BP_107_45642_20120211_232703_outLine +BABEL_BP_107_45655_20120218_191119_inLine +BABEL_BP_107_45655_20120218_191119_outLine +BABEL_BP_107_45678_20120210_172837_inLine +BABEL_BP_107_45678_20120210_172837_outLine +BABEL_BP_107_45681_20120517_003820_inLine +BABEL_BP_107_45681_20120517_003820_outLine +BABEL_BP_107_45786_20120502_200051_inLine +BABEL_BP_107_45786_20120502_200051_outLine +BABEL_BP_107_46269_20120616_171713_inLine +BABEL_BP_107_46269_20120616_171713_outLine +BABEL_BP_107_46409_20120429_201101_inLine +BABEL_BP_107_46409_20120429_201101_outLine +BABEL_BP_107_46427_20120516_213127_inLine +BABEL_BP_107_46427_20120516_213127_outLine +BABEL_BP_107_46593_20120429_172814_inLine +BABEL_BP_107_46593_20120429_172814_outLine +BABEL_BP_107_46813_20120503_214109_inLine +BABEL_BP_107_46813_20120503_214109_outLine +BABEL_BP_107_47185_20120417_000125_inLine +BABEL_BP_107_47185_20120417_000125_outLine +BABEL_BP_107_47249_20120704_173500_inLine +BABEL_BP_107_47249_20120704_173500_outLine +BABEL_BP_107_47429_20120614_125021_inLine +BABEL_BP_107_47429_20120614_125021_outLine +BABEL_BP_107_47469_20120409_195752_inLine +BABEL_BP_107_47469_20120409_195752_outLine +BABEL_BP_107_47634_20120405_165429_inLine +BABEL_BP_107_47634_20120405_165429_outLine +BABEL_BP_107_47733_20120508_112151_inLine +BABEL_BP_107_47733_20120508_112151_outLine +BABEL_BP_107_48061_20120420_003849_inLine +BABEL_BP_107_48061_20120420_003849_outLine +BABEL_BP_107_48061_20120420_005250_inLine +BABEL_BP_107_48061_20120420_005250_outLine +BABEL_BP_107_48072_20120218_181934_inLine +BABEL_BP_107_48072_20120218_181934_outLine +BABEL_BP_107_48072_20120218_183449_inLine +BABEL_BP_107_48072_20120218_183449_outLine +BABEL_BP_107_48317_20120423_021629_inLine +BABEL_BP_107_48317_20120423_021629_outLine +BABEL_BP_107_48404_20120704_162020_inLine +BABEL_BP_107_48404_20120704_162020_outLine +BABEL_BP_107_48410_20120329_220200_inLine +BABEL_BP_107_48410_20120329_220200_outLine +BABEL_BP_107_48536_20120214_212101_inLine +BABEL_BP_107_48536_20120214_212101_outLine +BABEL_BP_107_48645_20120421_221346_inLine +BABEL_BP_107_48645_20120421_221346_outLine +BABEL_BP_107_49042_20120408_181734_inLine +BABEL_BP_107_49042_20120408_181734_outLine +BABEL_BP_107_49173_20120505_204557_inLine +BABEL_BP_107_49173_20120505_204557_outLine +BABEL_BP_107_49306_20120524_204041_inLine +BABEL_BP_107_49306_20120524_204041_outLine +BABEL_BP_107_49624_20120618_024358_inLine +BABEL_BP_107_49624_20120618_024358_outLine +BABEL_BP_107_50101_20120208_164249_inLine +BABEL_BP_107_50101_20120208_164249_outLine +BABEL_BP_107_50101_20120208_170815_inLine +BABEL_BP_107_50101_20120208_170815_outLine +BABEL_BP_107_50416_20120517_120502_inLine +BABEL_BP_107_50416_20120517_120502_outLine +BABEL_BP_107_50555_20120428_205621_inLine +BABEL_BP_107_50555_20120428_205621_outLine +BABEL_BP_107_50597_20120516_212308_inLine +BABEL_BP_107_50597_20120516_212308_outLine +BABEL_BP_107_50763_20120220_151302_inLine +BABEL_BP_107_50763_20120220_151302_outLine +BABEL_BP_107_50915_20120608_150955_inLine +BABEL_BP_107_50915_20120608_150955_outLine +BABEL_BP_107_51149_20120514_203206_inLine +BABEL_BP_107_51149_20120514_203207_outLine +BABEL_BP_107_51791_20120517_004528_inLine +BABEL_BP_107_51791_20120517_004528_outLine +BABEL_BP_107_52024_20120414_193538_inLine +BABEL_BP_107_52024_20120414_193538_outLine +BABEL_BP_107_52325_20120418_011735_inLine +BABEL_BP_107_52325_20120418_011735_outLine +BABEL_BP_107_52446_20120212_002618_inLine +BABEL_BP_107_52446_20120212_002618_outLine +BABEL_BP_107_52515_20120324_020411_inLine +BABEL_BP_107_52515_20120324_020411_outLine +BABEL_BP_107_52606_20120617_195206_inLine +BABEL_BP_107_52606_20120617_195206_outLine +BABEL_BP_107_52642_20120517_000300_inLine +BABEL_BP_107_52642_20120517_000300_outLine +BABEL_BP_107_52691_20120617_160904_inLine +BABEL_BP_107_52691_20120617_160904_outLine +BABEL_BP_107_52900_20120320_150335_inLine +BABEL_BP_107_52900_20120320_150335_outLine +BABEL_BP_107_52913_20120704_121758_inLine +BABEL_BP_107_52913_20120704_121759_outLine +BABEL_BP_107_53179_20120618_003820_inLine +BABEL_BP_107_53179_20120618_003820_outLine +BABEL_BP_107_53278_20120508_192335_inLine +BABEL_BP_107_53278_20120508_192335_outLine +BABEL_BP_107_53352_20120504_210910_inLine +BABEL_BP_107_53352_20120504_210910_outLine +BABEL_BP_107_53429_20120704_123624_inLine +BABEL_BP_107_53429_20120704_123624_outLine +BABEL_BP_107_53500_20120416_172018_inLine +BABEL_BP_107_53500_20120416_172018_outLine +BABEL_BP_107_53989_20120703_234506_inLine +BABEL_BP_107_53989_20120703_234506_outLine +BABEL_BP_107_53989_20120703_235719_inLine +BABEL_BP_107_53989_20120703_235719_outLine +BABEL_BP_107_54339_20120506_215557_inLine +BABEL_BP_107_54339_20120506_215557_outLine +BABEL_BP_107_55100_20120417_210019_inLine +BABEL_BP_107_55100_20120417_210020_outLine +BABEL_BP_107_55121_20120504_003327_inLine +BABEL_BP_107_55121_20120504_003327_outLine +BABEL_BP_107_55144_20120321_012306_inLine +BABEL_BP_107_55144_20120321_012306_outLine +BABEL_BP_107_55399_20120215_193434_inLine +BABEL_BP_107_55399_20120215_193434_outLine +BABEL_BP_107_55450_20120424_185013_inLine +BABEL_BP_107_55450_20120424_185013_outLine +BABEL_BP_107_55678_20120323_211821_inLine +BABEL_BP_107_55678_20120323_211821_outLine +BABEL_BP_107_55786_20120322_173045_inLine +BABEL_BP_107_55786_20120322_173045_outLine +BABEL_BP_107_55820_20120411_162436_inLine +BABEL_BP_107_55820_20120411_162436_outLine +BABEL_BP_107_55823_20120608_172512_inLine +BABEL_BP_107_55823_20120608_172512_outLine +BABEL_BP_107_56342_20120419_132008_inLine +BABEL_BP_107_56342_20120419_132008_outLine +BABEL_BP_107_56591_20120418_002004_inLine +BABEL_BP_107_56591_20120418_002004_outLine +BABEL_BP_107_56868_20120406_013202_inLine +BABEL_BP_107_56868_20120406_013202_outLine +BABEL_BP_107_56943_20120222_201642_inLine +BABEL_BP_107_56943_20120222_201642_outLine +BABEL_BP_107_57071_20120527_184402_inLine +BABEL_BP_107_57071_20120527_184402_outLine +BABEL_BP_107_57277_20120503_200553_inLine +BABEL_BP_107_57277_20120503_200553_outLine +BABEL_BP_107_57551_20120325_225227_inLine +BABEL_BP_107_57551_20120325_225227_outLine +BABEL_BP_107_57609_20120430_223510_inLine +BABEL_BP_107_57609_20120430_223510_outLine +BABEL_BP_107_57625_20120506_021834_inLine +BABEL_BP_107_57625_20120506_021834_outLine +BABEL_BP_107_57724_20120212_213811_inLine +BABEL_BP_107_57724_20120212_213811_outLine +BABEL_BP_107_57907_20120608_160937_inLine +BABEL_BP_107_57907_20120608_160939_outLine +BABEL_BP_107_58157_20120608_181026_inLine +BABEL_BP_107_58157_20120608_181026_outLine +BABEL_BP_107_58413_20120418_134444_inLine +BABEL_BP_107_58413_20120418_134444_outLine +BABEL_BP_107_58923_20120210_190334_inLine +BABEL_BP_107_58923_20120210_190334_outLine +BABEL_BP_107_59028_20120523_205355_inLine +BABEL_BP_107_59028_20120523_205355_outLine +BABEL_BP_107_59147_20120215_152227_inLine +BABEL_BP_107_59147_20120215_152227_outLine +BABEL_BP_107_59544_20120406_170833_inLine +BABEL_BP_107_59544_20120406_170833_outLine +BABEL_BP_107_59671_20120322_225750_inLine +BABEL_BP_107_59671_20120322_225750_outLine +BABEL_BP_107_59746_20120414_161308_inLine +BABEL_BP_107_59746_20120414_161308_outLine +BABEL_BP_107_60250_20120218_193537_inLine +BABEL_BP_107_60250_20120218_193537_outLine +BABEL_BP_107_60848_20120704_171856_inLine +BABEL_BP_107_60848_20120704_171856_outLine +BABEL_BP_107_60995_20120704_234842_inLine +BABEL_BP_107_60995_20120704_234843_outLine +BABEL_BP_107_61203_20120217_182644_inLine +BABEL_BP_107_61203_20120217_182644_outLine +BABEL_BP_107_61762_20120217_131207_inLine +BABEL_BP_107_61762_20120217_131207_outLine +BABEL_BP_107_61822_20120405_153356_inLine +BABEL_BP_107_61822_20120405_153357_outLine +BABEL_BP_107_61936_20120704_141205_inLine +BABEL_BP_107_61936_20120704_141205_outLine +BABEL_BP_107_61988_20120406_134336_inLine +BABEL_BP_107_61988_20120406_134336_outLine +BABEL_BP_107_62286_20120429_193945_inLine +BABEL_BP_107_62286_20120429_193945_outLine +BABEL_BP_107_62589_20120423_001315_inLine +BABEL_BP_107_62589_20120423_001315_outLine +BABEL_BP_107_62589_20120423_002039_inLine +BABEL_BP_107_62589_20120423_002039_outLine +BABEL_BP_107_63320_20120608_012846_inLine +BABEL_BP_107_63320_20120608_012846_outLine +BABEL_BP_107_63491_20120502_145101_inLine +BABEL_BP_107_63491_20120502_145101_outLine +BABEL_BP_107_64185_20120504_161653_inLine +BABEL_BP_107_64185_20120504_161653_outLine +BABEL_BP_107_64404_20120206_185707_inLine +BABEL_BP_107_64404_20120206_185708_outLine +BABEL_BP_107_64661_20120325_212204_inLine +BABEL_BP_107_64661_20120325_212204_outLine +BABEL_BP_107_64946_20120517_001754_inLine +BABEL_BP_107_64946_20120517_001754_outLine +BABEL_BP_107_65069_20120421_135835_inLine +BABEL_BP_107_65069_20120421_135835_outLine +BABEL_BP_107_65371_20120507_195517_inLine +BABEL_BP_107_65371_20120507_195517_outLine +BABEL_BP_107_65415_20120220_153755_inLine +BABEL_BP_107_65415_20120220_153755_outLine +BABEL_BP_107_65443_20120220_152901_inLine +BABEL_BP_107_65443_20120220_152901_outLine +BABEL_BP_107_65601_20120417_001124_inLine +BABEL_BP_107_65601_20120417_001124_outLine +BABEL_BP_107_65629_20120322_191551_inLine +BABEL_BP_107_65629_20120322_191551_outLine +BABEL_BP_107_65656_20120503_233657_inLine +BABEL_BP_107_65656_20120503_233658_outLine +BABEL_BP_107_65717_20120414_151906_inLine +BABEL_BP_107_65717_20120414_151906_outLine +BABEL_BP_107_65783_20120429_153408_inLine +BABEL_BP_107_65783_20120429_153408_outLine +BABEL_BP_107_65923_20120420_093839_inLine +BABEL_BP_107_65923_20120420_093839_outLine +BABEL_BP_107_66082_20120608_111438_inLine +BABEL_BP_107_66082_20120608_111438_outLine +BABEL_BP_107_66101_20120426_213014_inLine +BABEL_BP_107_66101_20120426_213014_outLine +BABEL_BP_107_66103_20120505_220240_inLine +BABEL_BP_107_66103_20120505_220240_outLine +BABEL_BP_107_66275_20120503_171050_inLine +BABEL_BP_107_66275_20120503_171050_outLine +BABEL_BP_107_66330_20120502_001133_inLine +BABEL_BP_107_66330_20120502_001133_outLine +BABEL_BP_107_66441_20120324_190814_inLine +BABEL_BP_107_66441_20120324_190814_outLine +BABEL_BP_107_66668_20120212_211947_inLine +BABEL_BP_107_66668_20120212_211947_outLine +BABEL_BP_107_66784_20120616_151422_inLine +BABEL_BP_107_66784_20120616_151422_outLine +BABEL_BP_107_66798_20120404_220411_inLine +BABEL_BP_107_66798_20120404_220411_outLine +BABEL_BP_107_67150_20120618_004347_inLine +BABEL_BP_107_67150_20120618_004347_outLine +BABEL_BP_107_67411_20120322_141025_inLine +BABEL_BP_107_67411_20120322_141025_outLine +BABEL_BP_107_67733_20120215_120553_inLine +BABEL_BP_107_67733_20120215_120553_outLine +BABEL_BP_107_67775_20120502_193035_inLine +BABEL_BP_107_67775_20120502_193035_outLine +BABEL_BP_107_68028_20120502_121140_inLine +BABEL_BP_107_68028_20120502_121140_outLine +BABEL_BP_107_68136_20120416_173551_inLine +BABEL_BP_107_68136_20120416_173551_outLine +BABEL_BP_107_68239_20120608_131431_inLine +BABEL_BP_107_68239_20120608_131431_outLine +BABEL_BP_107_68337_20120404_230000_inLine +BABEL_BP_107_68337_20120404_230000_outLine +BABEL_BP_107_68861_20120323_171053_inLine +BABEL_BP_107_68861_20120323_171053_outLine +BABEL_BP_107_68861_20120323_180450_inLine +BABEL_BP_107_68861_20120323_180450_outLine +BABEL_BP_107_69052_20120417_002628_inLine +BABEL_BP_107_69052_20120417_002628_outLine +BABEL_BP_107_69230_20120703_133459_inLine +BABEL_BP_107_69230_20120703_133459_outLine +BABEL_BP_107_69236_20120214_230344_inLine +BABEL_BP_107_69236_20120214_230344_outLine +BABEL_BP_107_69368_20120211_170226_inLine +BABEL_BP_107_69368_20120211_170226_outLine +BABEL_BP_107_69446_20120416_020122_inLine +BABEL_BP_107_69446_20120416_020122_outLine +BABEL_BP_107_69473_20120605_230319_inLine +BABEL_BP_107_69473_20120605_230319_outLine +BABEL_BP_107_69548_20120213_023955_inLine +BABEL_BP_107_69548_20120213_023955_outLine +BABEL_BP_107_69621_20120213_130748_inLine +BABEL_BP_107_69621_20120213_130748_outLine +BABEL_BP_107_69650_20120323_023553_inLine +BABEL_BP_107_69650_20120323_023553_outLine +BABEL_BP_107_69764_20120324_234039_inLine +BABEL_BP_107_69764_20120324_234039_outLine +BABEL_BP_107_70643_20120427_194211_inLine +BABEL_BP_107_70643_20120427_194211_outLine +BABEL_BP_107_70680_20120201_144426_inLine +BABEL_BP_107_70680_20120201_144426_outLine +BABEL_BP_107_70965_20120506_175829_inLine +BABEL_BP_107_70965_20120506_175829_outLine +BABEL_BP_107_71160_20120616_001355_inLine +BABEL_BP_107_71160_20120616_001355_outLine +BABEL_BP_107_72011_20120704_231031_inLine +BABEL_BP_107_72011_20120704_231031_outLine +BABEL_BP_107_72141_20120322_223344_inLine +BABEL_BP_107_72141_20120322_223344_outLine +BABEL_BP_107_72234_20120511_134939_inLine +BABEL_BP_107_72234_20120511_134939_outLine +BABEL_BP_107_72234_20120511_140008_inLine +BABEL_BP_107_72234_20120511_140008_outLine +BABEL_BP_107_72746_20120429_003515_inLine +BABEL_BP_107_72746_20120429_003515_outLine +BABEL_BP_107_72799_20120425_133035_inLine +BABEL_BP_107_72799_20120425_133035_outLine +BABEL_BP_107_72907_20120505_105259_inLine +BABEL_BP_107_72907_20120505_105259_outLine +BABEL_BP_107_73050_20120426_114239_inLine +BABEL_BP_107_73050_20120426_114239_outLine +BABEL_BP_107_73059_20120425_012258_inLine +BABEL_BP_107_73059_20120425_012258_outLine +BABEL_BP_107_73072_20120322_141121_inLine +BABEL_BP_107_73072_20120322_141121_outLine +BABEL_BP_107_73122_20120501_124450_inLine +BABEL_BP_107_73122_20120501_124450_outLine +BABEL_BP_107_73170_20120322_151236_inLine +BABEL_BP_107_73170_20120322_151236_outLine +BABEL_BP_107_73780_20120613_200802_inLine +BABEL_BP_107_73780_20120613_200802_outLine +BABEL_BP_107_73786_20120323_222826_inLine +BABEL_BP_107_73786_20120323_222826_outLine +BABEL_BP_107_73923_20120118_183938_inLine +BABEL_BP_107_73923_20120118_183938_outLine +BABEL_BP_107_74368_20120424_185039_inLine +BABEL_BP_107_74368_20120424_185039_outLine +BABEL_BP_107_74508_20120418_002925_inLine +BABEL_BP_107_74508_20120418_002925_outLine +BABEL_BP_107_74607_20120426_001241_inLine +BABEL_BP_107_74607_20120426_001241_outLine +BABEL_BP_107_74884_20120323_135739_inLine +BABEL_BP_107_74884_20120323_135739_outLine +BABEL_BP_107_75151_20120611_195147_inLine +BABEL_BP_107_75151_20120611_195147_outLine +BABEL_BP_107_75354_20120506_150750_inLine +BABEL_BP_107_75354_20120506_150750_outLine +BABEL_BP_107_75740_20120216_215302_inLine +BABEL_BP_107_75740_20120216_215302_outLine +BABEL_BP_107_75871_20120214_025447_inLine +BABEL_BP_107_75871_20120214_025447_outLine +BABEL_BP_107_75932_20120419_222819_inLine +BABEL_BP_107_75932_20120419_222819_outLine +BABEL_BP_107_76002_20120608_001301_inLine +BABEL_BP_107_76002_20120608_001301_outLine +BABEL_BP_107_76331_20120417_020306_inLine +BABEL_BP_107_76331_20120417_020306_outLine +BABEL_BP_107_76333_20120418_131111_inLine +BABEL_BP_107_76333_20120418_131111_outLine +BABEL_BP_107_76745_20120608_120713_inLine +BABEL_BP_107_76745_20120608_120713_outLine +BABEL_BP_107_77137_20120424_021726_inLine +BABEL_BP_107_77137_20120424_021726_outLine +BABEL_BP_107_77342_20120613_025311_inLine +BABEL_BP_107_77342_20120613_025311_outLine +BABEL_BP_107_77465_20120422_011705_inLine +BABEL_BP_107_77465_20120422_011705_outLine +BABEL_BP_107_77483_20120412_193453_inLine +BABEL_BP_107_77483_20120412_193453_outLine +BABEL_BP_107_77485_20120612_135036_inLine +BABEL_BP_107_77485_20120612_135036_outLine +BABEL_BP_107_77584_20120411_172119_inLine +BABEL_BP_107_77584_20120411_172119_outLine +BABEL_BP_107_77811_20120616_161504_inLine +BABEL_BP_107_77811_20120616_161504_outLine +BABEL_BP_107_77965_20120215_010556_inLine +BABEL_BP_107_77965_20120215_010556_outLine +BABEL_BP_107_78046_20120508_124043_inLine +BABEL_BP_107_78046_20120508_124043_outLine +BABEL_BP_107_78114_20120418_223932_inLine +BABEL_BP_107_78114_20120418_223932_outLine +BABEL_BP_107_78114_20120418_225258_inLine +BABEL_BP_107_78114_20120418_225258_outLine +BABEL_BP_107_78245_20120321_225726_inLine +BABEL_BP_107_78245_20120321_225726_outLine +BABEL_BP_107_78290_20120425_225137_inLine +BABEL_BP_107_78290_20120425_225137_outLine +BABEL_BP_107_78583_20120505_001318_inLine +BABEL_BP_107_78583_20120505_001318_outLine +BABEL_BP_107_78728_20120320_163004_inLine +BABEL_BP_107_78728_20120320_163004_outLine +BABEL_BP_107_78879_20120322_210341_inLine +BABEL_BP_107_78879_20120322_210341_outLine +BABEL_BP_107_79618_20120322_195037_inLine +BABEL_BP_107_79618_20120322_195037_outLine +BABEL_BP_107_79698_20120614_142804_inLine +BABEL_BP_107_79698_20120614_142804_outLine +BABEL_BP_107_79899_20120507_153432_inLine +BABEL_BP_107_79899_20120507_153432_outLine +BABEL_BP_107_80068_20120419_172811_inLine +BABEL_BP_107_80068_20120419_172811_outLine +BABEL_BP_107_80075_20120418_223142_inLine +BABEL_BP_107_80075_20120418_223142_outLine +BABEL_BP_107_80156_20120325_205810_inLine +BABEL_BP_107_80156_20120325_205810_outLine +BABEL_BP_107_80195_20120328_024036_inLine +BABEL_BP_107_80195_20120328_024036_outLine +BABEL_BP_107_80247_20120429_181855_inLine +BABEL_BP_107_80247_20120429_181855_outLine +BABEL_BP_107_80856_20120325_214845_inLine +BABEL_BP_107_80856_20120325_214845_outLine +BABEL_BP_107_81015_20120418_212020_inLine +BABEL_BP_107_81015_20120418_212020_outLine +BABEL_BP_107_81070_20120612_140617_inLine +BABEL_BP_107_81070_20120612_140617_outLine +BABEL_BP_107_81084_20120328_220200_inLine +BABEL_BP_107_81084_20120328_220200_outLine +BABEL_BP_107_81119_20120418_221853_inLine +BABEL_BP_107_81119_20120418_221853_outLine +BABEL_BP_107_81261_20120324_015429_inLine +BABEL_BP_107_81261_20120324_015429_outLine +BABEL_BP_107_81587_20120429_185902_inLine +BABEL_BP_107_81587_20120429_185902_outLine +BABEL_BP_107_81642_20120504_013042_inLine +BABEL_BP_107_81642_20120504_013042_outLine +BABEL_BP_107_81647_20120425_231333_inLine +BABEL_BP_107_81647_20120425_231333_outLine +BABEL_BP_107_81799_20120506_220843_inLine +BABEL_BP_107_81799_20120506_220843_outLine +BABEL_BP_107_81820_20120506_004426_inLine +BABEL_BP_107_81820_20120506_004426_outLine +BABEL_BP_107_81944_20120607_131513_inLine +BABEL_BP_107_81944_20120607_131513_outLine +BABEL_BP_107_82009_20120503_174403_inLine +BABEL_BP_107_82009_20120503_174403_outLine +BABEL_BP_107_82023_20120217_190453_inLine +BABEL_BP_107_82023_20120217_190453_outLine +BABEL_BP_107_82408_20120216_020857_inLine +BABEL_BP_107_82408_20120216_020857_outLine +BABEL_BP_107_82409_20120507_104757_inLine +BABEL_BP_107_82409_20120507_104757_outLine +BABEL_BP_107_82443_20120705_035534_inLine +BABEL_BP_107_82443_20120705_035535_outLine +BABEL_BP_107_82484_20120409_191254_inLine +BABEL_BP_107_82484_20120409_191254_outLine +BABEL_BP_107_82881_20120212_142555_inLine +BABEL_BP_107_82881_20120212_142555_outLine +BABEL_BP_107_83186_20120414_181142_inLine +BABEL_BP_107_83186_20120414_181142_outLine +BABEL_BP_107_83493_20120509_144229_inLine +BABEL_BP_107_83493_20120509_144229_outLine +BABEL_BP_107_83585_20120429_194403_inLine +BABEL_BP_107_83585_20120429_194403_outLine +BABEL_BP_107_83791_20120329_034633_inLine +BABEL_BP_107_83791_20120329_034633_outLine +BABEL_BP_107_84394_20120426_000543_inLine +BABEL_BP_107_84394_20120426_000543_outLine +BABEL_BP_107_84394_20120426_001306_inLine +BABEL_BP_107_84394_20120426_001306_outLine +BABEL_BP_107_84439_20120418_011204_inLine +BABEL_BP_107_84439_20120418_011204_outLine +BABEL_BP_107_84491_20120430_203802_inLine +BABEL_BP_107_84491_20120430_203802_outLine +BABEL_BP_107_84608_20120421_181859_inLine +BABEL_BP_107_84608_20120421_181859_outLine +BABEL_BP_107_84700_20120501_125141_inLine +BABEL_BP_107_84700_20120501_125141_outLine +BABEL_BP_107_84865_20120618_002645_inLine +BABEL_BP_107_84865_20120618_002645_outLine +BABEL_BP_107_84916_20120427_012731_inLine +BABEL_BP_107_84916_20120427_012731_outLine +BABEL_BP_107_84980_20120419_172354_inLine +BABEL_BP_107_84980_20120419_172354_outLine +BABEL_BP_107_85719_20120423_181434_inLine +BABEL_BP_107_85719_20120423_181434_outLine +BABEL_BP_107_85752_20120607_210520_inLine +BABEL_BP_107_85752_20120607_210520_outLine +BABEL_BP_107_85948_20120212_131910_inLine +BABEL_BP_107_85948_20120212_131910_outLine +BABEL_BP_107_86004_20120324_175639_inLine +BABEL_BP_107_86004_20120324_175639_outLine +BABEL_BP_107_86900_20120216_203256_inLine +BABEL_BP_107_86900_20120216_203256_outLine +BABEL_BP_107_86956_20120322_203435_inLine +BABEL_BP_107_86956_20120322_203435_outLine +BABEL_BP_107_87059_20120704_001703_inLine +BABEL_BP_107_87059_20120704_001703_outLine +BABEL_BP_107_87077_20120421_193746_inLine +BABEL_BP_107_87077_20120421_193746_outLine +BABEL_BP_107_87107_20120321_205615_inLine +BABEL_BP_107_87107_20120321_205615_outLine +BABEL_BP_107_87107_20120321_234308_inLine +BABEL_BP_107_87107_20120321_234308_outLine +BABEL_BP_107_87234_20120704_120118_inLine +BABEL_BP_107_87234_20120704_120118_outLine +BABEL_BP_107_87351_20120330_014139_inLine +BABEL_BP_107_87351_20120330_014139_outLine +BABEL_BP_107_87520_20120414_023319_inLine +BABEL_BP_107_87520_20120414_023319_outLine +BABEL_BP_107_87607_20120516_233058_inLine +BABEL_BP_107_87607_20120516_233058_outLine +BABEL_BP_107_87634_20120208_165319_inLine +BABEL_BP_107_87634_20120208_165319_outLine +BABEL_BP_107_87961_20120324_022603_inLine +BABEL_BP_107_87961_20120324_022603_outLine +BABEL_BP_107_88245_20120511_235523_inLine +BABEL_BP_107_88245_20120511_235523_outLine +BABEL_BP_107_88385_20120502_200409_inLine +BABEL_BP_107_88385_20120502_200409_outLine +BABEL_BP_107_88385_20120502_201320_inLine +BABEL_BP_107_88385_20120502_201320_outLine +BABEL_BP_107_88464_20120503_003553_inLine +BABEL_BP_107_88464_20120503_003553_outLine +BABEL_BP_107_88932_20120417_195406_inLine +BABEL_BP_107_88932_20120417_195406_outLine +BABEL_BP_107_88982_20120506_154243_inLine +BABEL_BP_107_88982_20120506_154243_outLine +BABEL_BP_107_89301_20120429_183901_inLine +BABEL_BP_107_89301_20120429_183901_outLine +BABEL_BP_107_89345_20120322_214445_inLine +BABEL_BP_107_89345_20120322_214445_outLine +BABEL_BP_107_89345_20120322_220001_inLine +BABEL_BP_107_89345_20120322_220001_outLine +BABEL_BP_107_89573_20120422_162720_inLine +BABEL_BP_107_89573_20120422_162720_outLine +BABEL_BP_107_89583_20120425_142134_inLine +BABEL_BP_107_89583_20120425_142134_outLine +BABEL_BP_107_89867_20120324_204851_inLine +BABEL_BP_107_89867_20120324_204851_outLine +BABEL_BP_107_90046_20120613_193455_inLine +BABEL_BP_107_90046_20120613_193455_outLine +BABEL_BP_107_90055_20120220_173056_inLine +BABEL_BP_107_90055_20120220_173056_outLine +BABEL_BP_107_90127_20120429_190926_inLine +BABEL_BP_107_90127_20120429_190926_outLine +BABEL_BP_107_90389_20120510_233725_inLine +BABEL_BP_107_90389_20120510_233725_outLine +BABEL_BP_107_90436_20120507_172546_inLine +BABEL_BP_107_90436_20120507_172546_outLine +BABEL_BP_107_90511_20120212_010634_inLine +BABEL_BP_107_90511_20120212_010634_outLine +BABEL_BP_107_90730_20120627_132153_inLine +BABEL_BP_107_90730_20120627_132153_outLine +BABEL_BP_107_90730_20120627_133239_inLine +BABEL_BP_107_90730_20120627_133239_outLine +BABEL_BP_107_90810_20120217_200922_inLine +BABEL_BP_107_90810_20120217_200922_outLine +BABEL_BP_107_90834_20120212_143912_inLine +BABEL_BP_107_90834_20120212_143912_outLine +BABEL_BP_107_91143_20120422_002758_inLine +BABEL_BP_107_91143_20120422_002758_outLine +BABEL_BP_107_91171_20120414_012621_inLine +BABEL_BP_107_91171_20120414_012621_outLine +BABEL_BP_107_91386_20120703_235839_inLine +BABEL_BP_107_91386_20120703_235839_outLine +BABEL_BP_107_91677_20120422_141358_inLine +BABEL_BP_107_91677_20120422_141358_outLine +BABEL_BP_107_91703_20120617_235231_inLine +BABEL_BP_107_92308_20120430_133906_inLine +BABEL_BP_107_92308_20120430_133906_outLine +BABEL_BP_107_92642_20120211_005506_inLine +BABEL_BP_107_92642_20120211_005506_outLine +BABEL_BP_107_92752_20120421_184804_inLine +BABEL_BP_107_92752_20120421_184805_outLine +BABEL_BP_107_92820_20120617_124233_inLine +BABEL_BP_107_92820_20120617_124233_outLine +BABEL_BP_107_92852_20120418_234454_inLine +BABEL_BP_107_92852_20120418_234454_outLine +BABEL_BP_107_93000_20120325_233431_inLine +BABEL_BP_107_93000_20120325_233431_outLine +BABEL_BP_107_93151_20120501_140536_inLine +BABEL_BP_107_93151_20120501_140536_outLine +BABEL_BP_107_93192_20120322_180400_inLine +BABEL_BP_107_93192_20120322_180400_outLine +BABEL_BP_107_93277_20120510_183523_inLine +BABEL_BP_107_93277_20120510_183523_outLine +BABEL_BP_107_93314_20120501_134510_inLine +BABEL_BP_107_93314_20120501_134510_outLine +BABEL_BP_107_93436_20120611_021137_inLine +BABEL_BP_107_93607_20120418_014651_inLine +BABEL_BP_107_93607_20120418_014651_outLine +BABEL_BP_107_93643_20120212_175939_inLine +BABEL_BP_107_93643_20120212_175939_outLine +BABEL_BP_107_93811_20120418_213351_inLine +BABEL_BP_107_93811_20120418_213351_outLine +BABEL_BP_107_94168_20120326_171855_inLine +BABEL_BP_107_94168_20120326_171855_outLine +BABEL_BP_107_94235_20120428_004200_inLine +BABEL_BP_107_94235_20120428_004200_outLine +BABEL_BP_107_94752_20120218_144213_inLine +BABEL_BP_107_94752_20120218_144213_outLine +BABEL_BP_107_95350_20120325_000241_inLine +BABEL_BP_107_95350_20120325_000241_outLine +BABEL_BP_107_95534_20120608_005148_inLine +BABEL_BP_107_95534_20120608_005148_outLine +BABEL_BP_107_95650_20120208_163126_inLine +BABEL_BP_107_95650_20120208_163126_outLine +BABEL_BP_107_95736_20120323_154852_inLine +BABEL_BP_107_95736_20120323_154852_outLine +BABEL_BP_107_95849_20120704_011515_inLine +BABEL_BP_107_95849_20120704_011515_outLine +BABEL_BP_107_95893_20120501_114843_inLine +BABEL_BP_107_95893_20120501_114843_outLine +BABEL_BP_107_95952_20120607_145525_inLine +BABEL_BP_107_95952_20120607_145526_outLine +BABEL_BP_107_96108_20120421_194651_inLine +BABEL_BP_107_96108_20120421_194651_outLine +BABEL_BP_107_96347_20120212_202200_inLine +BABEL_BP_107_96347_20120212_202200_outLine +BABEL_BP_107_96463_20120507_233133_inLine +BABEL_BP_107_96463_20120507_233133_outLine +BABEL_BP_107_96636_20120421_193514_inLine +BABEL_BP_107_96636_20120421_193514_outLine +BABEL_BP_107_96636_20120421_195252_inLine +BABEL_BP_107_96636_20120421_195252_outLine +BABEL_BP_107_96788_20120409_195914_inLine +BABEL_BP_107_96788_20120409_195914_outLine +BABEL_BP_107_97004_20120704_194048_inLine +BABEL_BP_107_97004_20120704_194048_outLine +BABEL_BP_107_97230_20120612_142451_inLine +BABEL_BP_107_97230_20120612_142451_outLine +BABEL_BP_107_97254_20120422_153600_inLine +BABEL_BP_107_97254_20120422_153600_outLine +BABEL_BP_107_97298_20120704_201748_inLine +BABEL_BP_107_97298_20120704_201748_outLine +BABEL_BP_107_97590_20120616_165917_inLine +BABEL_BP_107_97590_20120616_165917_outLine +BABEL_BP_107_97635_20120617_233435_inLine +BABEL_BP_107_97635_20120617_233435_outLine +BABEL_BP_107_97699_20120618_005543_inLine +BABEL_BP_107_97699_20120618_005543_outLine +BABEL_BP_107_97797_20120617_234645_inLine +BABEL_BP_107_97797_20120617_234645_outLine +BABEL_BP_107_97941_20120423_201113_inLine +BABEL_BP_107_97941_20120423_201113_outLine +BABEL_BP_107_97941_20120423_201934_inLine +BABEL_BP_107_97941_20120423_201934_outLine +BABEL_BP_107_98279_20120509_172421_inLine +BABEL_BP_107_98279_20120509_172421_outLine +BABEL_BP_107_98762_20120612_160310_inLine +BABEL_BP_107_98762_20120612_160310_outLine +BABEL_BP_107_99514_20120505_142249_inLine +BABEL_BP_107_99514_20120505_142249_outLine +BABEL_BP_107_99697_20120424_211952_inLine +BABEL_BP_107_99697_20120424_211952_outLine +BABEL_BP_107_99709_20120510_011731_inLine +BABEL_BP_107_99709_20120510_011731_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list b/egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list new file mode 100644 index 00000000000..81896827fbf --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list @@ -0,0 +1,194 @@ +BABEL_BP_107_11203_20120415_212056_inLine +BABEL_BP_107_11203_20120415_212056_outLine +BABEL_BP_107_11824_20120413_213002_inLine +BABEL_BP_107_11824_20120413_213002_outLine +BABEL_BP_107_14389_20120617_164138_inLine +BABEL_BP_107_14389_20120617_164138_outLine +BABEL_BP_107_14874_20120320_190424_inLine +BABEL_BP_107_14874_20120320_190424_outLine +BABEL_BP_107_14874_20120320_192210_inLine +BABEL_BP_107_14874_20120320_192210_outLine +BABEL_BP_107_15859_20120419_133516_inLine +BABEL_BP_107_15859_20120419_133516_outLine +BABEL_BP_107_17900_20120323_015142_inLine +BABEL_BP_107_17900_20120323_015142_outLine +BABEL_BP_107_20685_20120222_210447_inLine +BABEL_BP_107_20685_20120222_210447_outLine +BABEL_BP_107_20775_20120502_214146_inLine +BABEL_BP_107_20775_20120502_214146_outLine +BABEL_BP_107_22566_20120704_023628_inLine +BABEL_BP_107_22566_20120704_023628_outLine +BABEL_BP_107_24379_20120422_173418_inLine +BABEL_BP_107_24379_20120422_173418_outLine +BABEL_BP_107_24431_20120215_202205_inLine +BABEL_BP_107_24431_20120215_202205_outLine +BABEL_BP_107_25502_20120217_005526_inLine +BABEL_BP_107_25502_20120217_005526_outLine +BABEL_BP_107_25871_20120422_181122_inLine +BABEL_BP_107_25871_20120422_181122_outLine +BABEL_BP_107_27605_20120329_015050_inLine +BABEL_BP_107_27605_20120329_015050_outLine +BABEL_BP_107_27645_20120501_005559_inLine +BABEL_BP_107_27645_20120501_005559_outLine +BABEL_BP_107_28754_20120417_233136_inLine +BABEL_BP_107_28754_20120417_233136_outLine +BABEL_BP_107_29133_20120212_223742_inLine +BABEL_BP_107_29133_20120212_223742_outLine +BABEL_BP_107_29512_20120426_133304_inLine +BABEL_BP_107_29512_20120426_133304_outLine +BABEL_BP_107_31256_20120424_173937_inLine +BABEL_BP_107_31256_20120424_173937_outLine +BABEL_BP_107_32452_20120417_025731_inLine +BABEL_BP_107_32452_20120417_025731_outLine +BABEL_BP_107_32830_20120217_010905_inLine +BABEL_BP_107_32830_20120217_010905_outLine +BABEL_BP_107_32962_20120417_002922_inLine +BABEL_BP_107_32962_20120417_002922_outLine +BABEL_BP_107_34357_20120608_192929_inLine +BABEL_BP_107_34357_20120608_192929_outLine +BABEL_BP_107_34439_20120514_155943_inLine +BABEL_BP_107_34439_20120514_155943_outLine +BABEL_BP_107_35896_20120426_160252_inLine +BABEL_BP_107_35896_20120426_160252_outLine +BABEL_BP_107_36143_20120217_012635_inLine +BABEL_BP_107_36143_20120217_012635_outLine +BABEL_BP_107_36143_20120217_175752_inLine +BABEL_BP_107_36143_20120217_175752_outLine +BABEL_BP_107_37185_20120608_122828_inLine +BABEL_BP_107_37185_20120608_122828_outLine +BABEL_BP_107_37842_20120513_023632_inLine +BABEL_BP_107_37842_20120513_023632_outLine +BABEL_BP_107_38635_20120424_231446_inLine +BABEL_BP_107_38635_20120424_231446_outLine +BABEL_BP_107_38863_20120614_173605_inLine +BABEL_BP_107_38863_20120614_173605_outLine +BABEL_BP_107_38863_20120614_174335_inLine +BABEL_BP_107_38863_20120614_174335_outLine +BABEL_BP_107_38863_20120614_175009_inLine +BABEL_BP_107_38863_20120614_175009_outLine +BABEL_BP_107_41989_20120321_185501_inLine +BABEL_BP_107_41989_20120321_185501_outLine +BABEL_BP_107_41989_20120321_190714_inLine +BABEL_BP_107_41989_20120321_190714_outLine +BABEL_BP_107_42212_20120704_203258_inLine +BABEL_BP_107_42212_20120704_203258_outLine +BABEL_BP_107_42768_20120503_180000_inLine +BABEL_BP_107_42768_20120503_180000_outLine +BABEL_BP_107_43991_20120429_013420_inLine +BABEL_BP_107_43991_20120429_013420_outLine +BABEL_BP_107_44117_20120704_023955_inLine +BABEL_BP_107_44117_20120704_023955_outLine +BABEL_BP_107_45106_20120118_183909_inLine +BABEL_BP_107_45106_20120118_183909_outLine +BABEL_BP_107_45786_20120502_200051_inLine +BABEL_BP_107_45786_20120502_200051_outLine +BABEL_BP_107_46427_20120516_213127_inLine +BABEL_BP_107_46427_20120516_213127_outLine +BABEL_BP_107_46813_20120503_214109_inLine +BABEL_BP_107_46813_20120503_214109_outLine +BABEL_BP_107_47185_20120417_000125_inLine +BABEL_BP_107_47185_20120417_000125_outLine +BABEL_BP_107_47249_20120704_173500_inLine +BABEL_BP_107_47249_20120704_173500_outLine +BABEL_BP_107_48404_20120704_162020_inLine +BABEL_BP_107_48404_20120704_162020_outLine +BABEL_BP_107_50915_20120608_150955_inLine +BABEL_BP_107_50915_20120608_150955_outLine +BABEL_BP_107_51791_20120517_004528_inLine +BABEL_BP_107_51791_20120517_004528_outLine +BABEL_BP_107_52024_20120414_193538_inLine +BABEL_BP_107_52024_20120414_193538_outLine +BABEL_BP_107_52691_20120617_160904_inLine +BABEL_BP_107_52691_20120617_160904_outLine +BABEL_BP_107_52900_20120320_150335_inLine +BABEL_BP_107_52900_20120320_150335_outLine +BABEL_BP_107_53278_20120508_192335_inLine +BABEL_BP_107_53278_20120508_192335_outLine +BABEL_BP_107_55121_20120504_003327_inLine +BABEL_BP_107_55121_20120504_003327_outLine +BABEL_BP_107_55678_20120323_211821_inLine +BABEL_BP_107_55678_20120323_211821_outLine +BABEL_BP_107_56342_20120419_132008_inLine +BABEL_BP_107_56342_20120419_132008_outLine +BABEL_BP_107_57551_20120325_225227_inLine +BABEL_BP_107_57551_20120325_225227_outLine +BABEL_BP_107_57625_20120506_021834_inLine +BABEL_BP_107_57625_20120506_021834_outLine +BABEL_BP_107_59671_20120322_225750_inLine +BABEL_BP_107_59671_20120322_225750_outLine +BABEL_BP_107_60250_20120218_193537_inLine +BABEL_BP_107_60250_20120218_193537_outLine +BABEL_BP_107_61988_20120406_134336_inLine +BABEL_BP_107_61988_20120406_134336_outLine +BABEL_BP_107_63491_20120502_145101_inLine +BABEL_BP_107_63491_20120502_145101_outLine +BABEL_BP_107_65415_20120220_153755_inLine +BABEL_BP_107_65415_20120220_153755_outLine +BABEL_BP_107_65783_20120429_153408_inLine +BABEL_BP_107_65783_20120429_153408_outLine +BABEL_BP_107_66784_20120616_151422_inLine +BABEL_BP_107_66784_20120616_151422_outLine +BABEL_BP_107_68337_20120404_230000_inLine +BABEL_BP_107_68337_20120404_230000_outLine +BABEL_BP_107_69236_20120214_230344_inLine +BABEL_BP_107_69236_20120214_230344_outLine +BABEL_BP_107_70643_20120427_194211_inLine +BABEL_BP_107_70643_20120427_194211_outLine +BABEL_BP_107_72011_20120704_231031_inLine +BABEL_BP_107_72011_20120704_231031_outLine +BABEL_BP_107_73122_20120501_124450_inLine +BABEL_BP_107_73122_20120501_124450_outLine +BABEL_BP_107_75932_20120419_222819_inLine +BABEL_BP_107_75932_20120419_222819_outLine +BABEL_BP_107_76002_20120608_001301_inLine +BABEL_BP_107_76002_20120608_001301_outLine +BABEL_BP_107_76745_20120608_120713_inLine +BABEL_BP_107_76745_20120608_120713_outLine +BABEL_BP_107_78245_20120321_225726_inLine +BABEL_BP_107_78245_20120321_225726_outLine +BABEL_BP_107_79618_20120322_195037_inLine +BABEL_BP_107_79618_20120322_195037_outLine +BABEL_BP_107_79698_20120614_142804_inLine +BABEL_BP_107_79698_20120614_142804_outLine +BABEL_BP_107_80247_20120429_181855_inLine +BABEL_BP_107_80247_20120429_181855_outLine +BABEL_BP_107_81261_20120324_015429_inLine +BABEL_BP_107_81261_20120324_015429_outLine +BABEL_BP_107_81642_20120504_013042_inLine +BABEL_BP_107_81642_20120504_013042_outLine +BABEL_BP_107_81647_20120425_231333_inLine +BABEL_BP_107_81647_20120425_231333_outLine +BABEL_BP_107_81944_20120607_131513_inLine +BABEL_BP_107_81944_20120607_131513_outLine +BABEL_BP_107_83186_20120414_181142_inLine +BABEL_BP_107_83186_20120414_181142_outLine +BABEL_BP_107_84700_20120501_125141_inLine +BABEL_BP_107_84700_20120501_125141_outLine +BABEL_BP_107_84916_20120427_012731_inLine +BABEL_BP_107_84916_20120427_012731_outLine +BABEL_BP_107_85719_20120423_181434_inLine +BABEL_BP_107_85719_20120423_181434_outLine +BABEL_BP_107_87634_20120208_165319_inLine +BABEL_BP_107_87634_20120208_165319_outLine +BABEL_BP_107_88385_20120502_200409_inLine +BABEL_BP_107_88385_20120502_200409_outLine +BABEL_BP_107_88385_20120502_201320_inLine +BABEL_BP_107_88385_20120502_201320_outLine +BABEL_BP_107_92642_20120211_005506_inLine +BABEL_BP_107_92642_20120211_005506_outLine +BABEL_BP_107_92852_20120418_234454_inLine +BABEL_BP_107_92852_20120418_234454_outLine +BABEL_BP_107_93277_20120510_183523_inLine +BABEL_BP_107_93277_20120510_183523_outLine +BABEL_BP_107_95952_20120607_145525_inLine +BABEL_BP_107_95952_20120607_145526_outLine +BABEL_BP_107_97941_20120423_201113_inLine +BABEL_BP_107_97941_20120423_201113_outLine +BABEL_BP_107_97941_20120423_201934_inLine +BABEL_BP_107_97941_20120423_201934_outLine +BABEL_BP_107_98279_20120509_172421_inLine +BABEL_BP_107_98279_20120509_172421_outLine +BABEL_BP_107_98762_20120612_160310_inLine +BABEL_BP_107_98762_20120612_160310_outLine +BABEL_BP_107_99697_20120424_211952_inLine +BABEL_BP_107_99697_20120424_211952_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list b/egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list new file mode 100644 index 00000000000..522b95fc080 --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list @@ -0,0 +1,1042 @@ +BABEL_BP_107_10033_20120208_180820_outLine +BABEL_BP_107_10066_20120428_121544_inLine +BABEL_BP_107_10066_20120428_121544_outLine +BABEL_BP_107_10190_20120424_023348_inLine +BABEL_BP_107_10190_20120425_012249_inLine +BABEL_BP_107_10211_20120323_013915_inLine +BABEL_BP_107_10211_20120323_013915_outLine +BABEL_BP_107_10545_20120424_184701_inLine +BABEL_BP_107_10697_20120516_194235_inLine +BABEL_BP_107_10732_20120328_172421_inLine +BABEL_BP_107_10732_20120328_172422_outLine +BABEL_BP_107_10900_20120322_022523_inLine +BABEL_BP_107_10900_20120322_022524_outLine +BABEL_BP_107_10945_20120322_222039_inLine +BABEL_BP_107_10945_20120322_222039_outLine +BABEL_BP_107_10973_20120404_233129_inLine +BABEL_BP_107_10973_20120404_233129_outLine +BABEL_BP_107_10985_20120502_123725_inLine +BABEL_BP_107_10985_20120502_123725_outLine +BABEL_BP_107_11022_20120422_013455_inLine +BABEL_BP_107_11022_20120422_013455_outLine +BABEL_BP_107_11422_20120208_160559_inLine +BABEL_BP_107_11422_20120208_160559_outLine +BABEL_BP_107_11479_20120212_011029_inLine +BABEL_BP_107_11479_20120212_011029_outLine +BABEL_BP_107_11827_20120322_205100_inLine +BABEL_BP_107_11827_20120322_205100_outLine +BABEL_BP_107_11949_20120704_001817_inLine +BABEL_BP_107_11949_20120704_001817_outLine +BABEL_BP_107_11982_20120217_004340_inLine +BABEL_BP_107_12486_20120424_174759_inLine +BABEL_BP_107_12552_20120503_152109_inLine +BABEL_BP_107_12569_20120609_190056_inLine +BABEL_BP_107_12569_20120609_190056_outLine +BABEL_BP_107_12587_20120322_230456_inLine +BABEL_BP_107_12587_20120322_230457_outLine +BABEL_BP_107_12643_20120704_185225_inLine +BABEL_BP_107_12643_20120704_185225_outLine +BABEL_BP_107_12897_20120413_195042_inLine +BABEL_BP_107_12897_20120413_195042_outLine +BABEL_BP_107_12897_20120413_200727_inLine +BABEL_BP_107_12897_20120413_200727_outLine +BABEL_BP_107_13065_20120425_034939_inLine +BABEL_BP_107_13065_20120425_034939_outLine +BABEL_BP_107_13229_20120417_201028_inLine +BABEL_BP_107_13229_20120417_201028_outLine +BABEL_BP_107_13272_20120320_141107_outLine +BABEL_BP_107_13272_20120320_142506_outLine +BABEL_BP_107_13389_20120406_141036_inLine +BABEL_BP_107_13389_20120406_141036_outLine +BABEL_BP_107_13419_20120218_213925_inLine +BABEL_BP_107_13419_20120218_214753_inLine +BABEL_BP_107_13781_20120516_204849_inLine +BABEL_BP_107_13781_20120516_204849_outLine +BABEL_BP_107_13795_20120418_190613_inLine +BABEL_BP_107_13795_20120418_190613_outLine +BABEL_BP_107_14075_20120507_004435_inLine +BABEL_BP_107_14294_20120328_010858_inLine +BABEL_BP_107_14294_20120328_010858_outLine +BABEL_BP_107_14468_20120321_003916_inLine +BABEL_BP_107_14468_20120321_003916_outLine +BABEL_BP_107_14475_20120704_204813_inLine +BABEL_BP_107_14475_20120704_204813_outLine +BABEL_BP_107_14500_20120429_194225_outLine +BABEL_BP_107_14707_20120429_004741_inLine +BABEL_BP_107_14707_20120429_004741_outLine +BABEL_BP_107_14707_20120429_005954_inLine +BABEL_BP_107_14891_20120118_195012_inLine +BABEL_BP_107_14707_20120429_005954_outLine +BABEL_BP_107_14729_20120429_200418_outLine +BABEL_BP_107_14836_20120507_235040_outLine +BABEL_BP_107_14891_20120118_195012_outLine +BABEL_BP_107_14936_20120405_224830_inLine +BABEL_BP_107_14936_20120405_224830_outLine +BABEL_BP_107_15073_20120417_011547_outLine +BABEL_BP_107_15142_20120322_132735_outLine +BABEL_BP_107_15353_20120504_193952_inLine +BABEL_BP_107_15353_20120504_193952_outLine +BABEL_BP_107_15460_20120426_224823_inLine +BABEL_BP_107_15460_20120426_224823_outLine +BABEL_BP_107_15473_20120217_231342_inLine +BABEL_BP_107_15696_20120328_010156_outLine +BABEL_BP_107_15719_20120612_122632_inLine +BABEL_BP_107_15719_20120612_122632_outLine +BABEL_BP_107_15744_20120608_123258_inLine +BABEL_BP_107_15873_20120405_224524_inLine +BABEL_BP_107_15873_20120405_224524_outLine +BABEL_BP_107_15881_20120322_233839_inLine +BABEL_BP_107_15940_20120424_221327_inLine +BABEL_BP_107_16406_20120324_011714_inLine +BABEL_BP_107_16406_20120324_011714_outLine +BABEL_BP_107_16617_20120228_014302_inLine +BABEL_BP_107_16646_20120418_130946_outLine +BABEL_BP_107_16660_20120210_231224_outLine +BABEL_BP_107_16669_20120208_140603_inLine +BABEL_BP_107_16801_20120418_121951_inLine +BABEL_BP_107_16801_20120418_203644_inLine +BABEL_BP_107_16875_20120704_133550_inLine +BABEL_BP_107_16875_20120704_133550_outLine +BABEL_BP_107_16883_20120501_194424_inLine +BABEL_BP_107_16883_20120501_194424_outLine +BABEL_BP_107_16950_20120704_155322_inLine +BABEL_BP_107_16950_20120704_155322_outLine +BABEL_BP_107_17013_20120501_002142_inLine +BABEL_BP_107_17013_20120501_002142_outLine +BABEL_BP_107_17018_20120322_220450_inLine +BABEL_BP_107_17018_20120322_220450_outLine +BABEL_BP_107_17093_20120501_202548_outLine +BABEL_BP_107_17203_20120212_220043_outLine +BABEL_BP_107_17353_20120617_133436_inLine +BABEL_BP_107_17353_20120617_133436_outLine +BABEL_BP_107_17933_20120421_134916_inLine +BABEL_BP_107_17933_20120421_134916_outLine +BABEL_BP_107_18187_20120608_125102_outLine +BABEL_BP_107_18209_20120420_004725_inLine +BABEL_BP_107_18234_20120210_230712_inLine +BABEL_BP_107_18495_20120618_003601_outLine +BABEL_BP_107_18534_20120504_132522_inLine +BABEL_BP_107_18534_20120504_132522_outLine +BABEL_BP_107_18858_20120209_004527_outLine +BABEL_BP_107_19012_20120503_215037_inLine +BABEL_BP_107_19012_20120503_215037_outLine +BABEL_BP_107_19248_20120508_210026_inLine +BABEL_BP_107_19248_20120508_210027_outLine +BABEL_BP_107_19290_20120421_141409_inLine +BABEL_BP_107_19290_20120421_141409_outLine +BABEL_BP_107_19404_20120321_171020_inLine +BABEL_BP_107_19404_20120321_171020_outLine +BABEL_BP_107_19479_20120407_014459_inLine +BABEL_BP_107_19479_20120407_014459_outLine +BABEL_BP_107_19731_20120506_011629_inLine +BABEL_BP_107_19731_20120515_001656_inLine +BABEL_BP_107_19869_20120608_012542_outLine +BABEL_BP_107_20320_20120212_214655_inLine +BABEL_BP_107_20332_20120426_010134_inLine +BABEL_BP_107_20332_20120426_010837_inLine +BABEL_BP_107_20332_20120426_010134_outLine +BABEL_BP_107_20332_20120426_010837_outLine +BABEL_BP_107_20483_20120416_171740_outLine +BABEL_BP_107_20518_20120418_211112_inLine +BABEL_BP_107_20582_20120322_220747_inLine +BABEL_BP_107_20582_20120322_220747_outLine +BABEL_BP_107_20740_20120427_193225_inLine +BABEL_BP_107_20740_20120427_193757_inLine +BABEL_BP_107_20741_20120325_181245_outLine +BABEL_BP_107_20799_20120515_010136_inLine +BABEL_BP_107_20799_20120515_010136_outLine +BABEL_BP_107_21052_20120415_204922_inLine +BABEL_BP_107_21139_20120425_192642_outLine +BABEL_BP_107_21258_20120418_145725_inLine +BABEL_BP_107_21367_20120629_140326_outLine +BABEL_BP_107_21430_20120608_003600_outLine +BABEL_BP_107_21477_20120323_185255_inLine +BABEL_BP_107_21477_20120323_185255_outLine +BABEL_BP_107_21518_20120501_152038_inLine +BABEL_BP_107_21518_20120501_152038_outLine +BABEL_BP_107_21584_20120217_004017_inLine +BABEL_BP_107_21584_20120217_004017_outLine +BABEL_BP_107_21758_20120407_010928_inLine +BABEL_BP_107_21758_20120407_010928_outLine +BABEL_BP_107_21758_20120407_011555_inLine +BABEL_BP_107_21758_20120407_011555_outLine +BABEL_BP_107_21929_20120323_015539_inLine +BABEL_BP_107_21929_20120323_022750_inLine +BABEL_BP_107_21946_20120507_015056_inLine +BABEL_BP_107_21946_20120507_015056_outLine +BABEL_BP_107_22010_20120608_182138_inLine +BABEL_BP_107_22010_20120608_182138_outLine +BABEL_BP_107_22272_20120511_232328_inLine +BABEL_BP_107_22272_20120511_232328_outLine +BABEL_BP_107_22494_20120613_122322_outLine +BABEL_BP_107_22898_20120322_144401_inLine +BABEL_BP_107_22898_20120322_144401_outLine +BABEL_BP_107_22910_20120214_213815_inLine +BABEL_BP_107_22910_20120214_213815_outLine +BABEL_BP_107_22979_20120505_000039_inLine +BABEL_BP_107_22979_20120505_000039_outLine +BABEL_BP_107_23167_20120217_212610_inLine +BABEL_BP_107_23167_20120217_212610_outLine +BABEL_BP_107_23629_20120501_173549_inLine +BABEL_BP_107_23629_20120501_173549_outLine +BABEL_BP_107_23930_20120506_214145_inLine +BABEL_BP_107_24014_20120618_010729_inLine +BABEL_BP_107_24014_20120618_010729_outLine +BABEL_BP_107_24094_20120421_134318_outLine +BABEL_BP_107_24569_20120507_123854_outLine +BABEL_BP_107_24608_20120208_170106_outLine +BABEL_BP_107_24638_20120504_004348_outLine +BABEL_BP_107_24642_20120505_201543_inLine +BABEL_BP_107_24642_20120505_201543_outLine +BABEL_BP_107_24799_20120508_232153_outLine +BABEL_BP_107_24817_20120422_203514_inLine +BABEL_BP_107_24833_20120218_171649_outLine +BABEL_BP_107_25035_20120214_230841_inLine +BABEL_BP_107_25072_20120429_144535_inLine +BABEL_BP_107_25479_20120506_161146_inLine +BABEL_BP_107_25479_20120506_161146_outLine +BABEL_BP_107_25576_20120321_222905_outLine +BABEL_BP_107_25866_20120426_193335_inLine +BABEL_BP_107_26348_20120508_100651_inLine +BABEL_BP_107_26348_20120508_102042_inLine +BABEL_BP_107_26350_20120209_004945_inLine +BABEL_BP_107_26350_20120209_004945_outLine +BABEL_BP_107_26350_20120209_012139_inLine +BABEL_BP_107_26350_20120209_012139_outLine +BABEL_BP_107_26598_20120425_143602_outLine +BABEL_BP_107_26684_20120530_155756_inLine +BABEL_BP_107_26786_20120423_191945_inLine +BABEL_BP_107_26786_20120423_191945_outLine +BABEL_BP_107_27064_20120222_210044_inLine +BABEL_BP_107_27064_20120222_210044_outLine +BABEL_BP_107_27503_20120212_221915_inLine +BABEL_BP_107_27619_20120328_023110_outLine +BABEL_BP_107_27698_20120212_005737_inLine +BABEL_BP_107_27698_20120212_005737_outLine +BABEL_BP_107_27724_20120407_130547_inLine +BABEL_BP_107_27724_20120407_130547_outLine +BABEL_BP_107_27890_20120428_235422_inLine +BABEL_BP_107_27890_20120428_235422_outLine +BABEL_BP_107_27916_20120607_114245_outLine +BABEL_BP_107_27916_20120607_115650_outLine +BABEL_BP_107_28016_20120405_222219_inLine +BABEL_BP_107_28016_20120405_222219_outLine +BABEL_BP_107_28107_20120208_142843_outLine +BABEL_BP_107_28107_20120208_144923_outLine +BABEL_BP_107_28132_20120405_152728_outLine +BABEL_BP_107_28260_20120212_153106_inLine +BABEL_BP_107_28557_20120507_001619_outLine +BABEL_BP_107_28675_20120607_231549_inLine +BABEL_BP_107_28675_20120607_231549_outLine +BABEL_BP_107_28675_20120607_233243_inLine +BABEL_BP_107_28675_20120607_233243_outLine +BABEL_BP_107_28740_20120212_150039_inLine +BABEL_BP_107_28740_20120212_150039_outLine +BABEL_BP_107_29280_20120607_184929_outLine +BABEL_BP_107_29280_20120607_190345_outLine +BABEL_BP_107_29290_20120415_102435_inLine +BABEL_BP_107_29335_20120424_013042_inLine +BABEL_BP_107_29335_20120424_013042_outLine +BABEL_BP_107_29407_20120607_132315_inLine +BABEL_BP_107_29407_20120607_135318_inLine +BABEL_BP_107_29444_20120322_191236_outLine +BABEL_BP_107_29771_20120504_010738_outLine +BABEL_BP_107_29959_20120418_001028_inLine +BABEL_BP_107_29959_20120418_001028_outLine +BABEL_BP_107_29988_20120516_233700_inLine +BABEL_BP_107_30210_20120427_140255_inLine +BABEL_BP_107_30210_20120502_202749_inLine +BABEL_BP_107_30554_20120617_231216_outLine +BABEL_BP_107_30583_20120212_210712_inLine +BABEL_BP_107_30722_20120505_103655_inLine +BABEL_BP_107_30722_20120505_103655_outLine +BABEL_BP_107_31031_20120501_205733_inLine +BABEL_BP_107_31031_20120501_210746_inLine +BABEL_BP_107_31298_20120322_125112_outLine +BABEL_BP_107_31393_20120325_171905_inLine +BABEL_BP_107_31460_20120325_193921_inLine +BABEL_BP_107_31606_20120607_131428_inLine +BABEL_BP_107_31738_20120704_101130_outLine +BABEL_BP_107_31902_20120417_015618_inLine +BABEL_BP_107_31902_20120417_015618_outLine +BABEL_BP_107_31917_20120501_202910_inLine +BABEL_BP_107_31917_20120501_202910_outLine +BABEL_BP_107_31980_20120212_174027_inLine +BABEL_BP_107_31980_20120212_174027_outLine +BABEL_BP_107_32132_20120418_211743_inLine +BABEL_BP_107_32274_20120324_011402_inLine +BABEL_BP_107_32295_20120617_141025_inLine +BABEL_BP_107_32295_20120617_141025_outLine +BABEL_BP_107_32334_20120429_005403_inLine +BABEL_BP_107_32334_20120429_005403_outLine +BABEL_BP_107_32400_20120426_000137_inLine +BABEL_BP_107_32400_20120426_000137_outLine +BABEL_BP_107_32710_20120418_215432_inLine +BABEL_BP_107_32710_20120418_215432_outLine +BABEL_BP_107_33012_20120611_155055_inLine +BABEL_BP_107_33364_20120617_011853_inLine +BABEL_BP_107_33364_20120617_011853_outLine +BABEL_BP_107_33577_20120704_152608_outLine +BABEL_BP_107_33671_20120330_001033_inLine +BABEL_BP_107_33671_20120330_001033_outLine +BABEL_BP_107_33742_20120608_143147_inLine +BABEL_BP_107_33742_20120608_143147_outLine +BABEL_BP_107_33817_20120423_130850_inLine +BABEL_BP_107_33817_20120423_130850_outLine +BABEL_BP_107_33969_20120429_214721_outLine +BABEL_BP_107_34235_20120218_205136_outLine +BABEL_BP_107_34480_20120608_151830_inLine +BABEL_BP_107_34498_20120429_140537_inLine +BABEL_BP_107_34498_20120429_140537_outLine +BABEL_BP_107_34590_20120323_134554_inLine +BABEL_BP_107_34590_20120323_134554_outLine +BABEL_BP_107_34857_20120419_235853_inLine +BABEL_BP_107_34961_20120212_223315_inLine +BABEL_BP_107_34961_20120212_223315_outLine +BABEL_BP_107_34961_20120212_224207_inLine +BABEL_BP_107_34961_20120212_224207_outLine +BABEL_BP_107_35011_20120321_223128_inLine +BABEL_BP_107_35011_20120321_223128_outLine +BABEL_BP_107_35016_20120611_185645_outLine +BABEL_BP_107_35074_20120608_164703_outLine +BABEL_BP_107_35179_20120414_153233_inLine +BABEL_BP_107_35179_20120414_153233_outLine +BABEL_BP_107_35188_20120614_131427_inLine +BABEL_BP_107_35305_20120422_120043_outLine +BABEL_BP_107_35357_20120614_212245_inLine +BABEL_BP_107_35357_20120614_212245_outLine +BABEL_BP_107_36037_20120616_153023_outLine +BABEL_BP_107_36196_20120608_110319_inLine +BABEL_BP_107_36196_20120608_111049_inLine +BABEL_BP_107_36268_20120406_211711_inLine +BABEL_BP_107_36268_20120406_211711_outLine +BABEL_BP_107_36356_20120211_173247_inLine +BABEL_BP_107_36356_20120211_173247_outLine +BABEL_BP_107_36383_20120416_225701_outLine +BABEL_BP_107_36391_20120505_171824_inLine +BABEL_BP_107_36424_20120421_130549_inLine +BABEL_BP_107_36424_20120421_130549_outLine +BABEL_BP_107_36424_20120421_133610_inLine +BABEL_BP_107_36424_20120421_133610_outLine +BABEL_BP_107_36502_20120617_145859_inLine +BABEL_BP_107_36502_20120617_145859_outLine +BABEL_BP_107_36711_20120325_230112_inLine +BABEL_BP_107_36711_20120325_230112_outLine +BABEL_BP_107_36722_20120627_122821_inLine +BABEL_BP_107_36722_20120627_122821_outLine +BABEL_BP_107_37110_20120209_002706_inLine +BABEL_BP_107_37110_20120209_002706_outLine +BABEL_BP_107_37210_20120322_205536_outLine +BABEL_BP_107_37285_20120325_000245_inLine +BABEL_BP_107_37285_20120325_000245_outLine +BABEL_BP_107_37335_20120616_150016_inLine +BABEL_BP_107_37335_20120616_150016_outLine +BABEL_BP_107_37374_20120418_185819_inLine +BABEL_BP_107_37940_20120424_004619_inLine +BABEL_BP_107_37940_20120424_004619_outLine +BABEL_BP_107_38464_20120422_105536_outLine +BABEL_BP_107_38592_20120704_150926_outLine +BABEL_BP_107_38640_20120215_030154_inLine +BABEL_BP_107_38640_20120215_030154_outLine +BABEL_BP_107_38698_20120322_213531_inLine +BABEL_BP_107_38698_20120322_213531_outLine +BABEL_BP_107_38879_20120406_150304_inLine +BABEL_BP_107_38879_20120406_150304_outLine +BABEL_BP_107_38912_20120414_160852_inLine +BABEL_BP_107_38912_20120414_160852_outLine +BABEL_BP_107_39246_20120613_202128_inLine +BABEL_BP_107_39246_20120613_202128_outLine +BABEL_BP_107_39264_20120417_191639_inLine +BABEL_BP_107_39264_20120417_191639_outLine +BABEL_BP_107_39296_20120705_025906_inLine +BABEL_BP_107_39384_20120324_010939_inLine +BABEL_BP_107_39384_20120324_010939_outLine +BABEL_BP_107_39384_20120324_011832_inLine +BABEL_BP_107_39384_20120324_011832_outLine +BABEL_BP_107_39430_20120325_015935_inLine +BABEL_BP_107_39430_20120325_015935_outLine +BABEL_BP_107_40002_20120502_174229_outLine +BABEL_BP_107_40123_20120505_191426_inLine +BABEL_BP_107_40123_20120505_191426_outLine +BABEL_BP_107_40385_20120704_143210_outLine +BABEL_BP_107_40477_20120323_194919_outLine +BABEL_BP_107_40510_20120426_153808_inLine +BABEL_BP_107_40510_20120426_153808_outLine +BABEL_BP_107_40980_20120416_233130_inLine +BABEL_BP_107_40980_20120416_233130_outLine +BABEL_BP_107_40980_20120417_001128_inLine +BABEL_BP_107_40980_20120417_001128_outLine +BABEL_BP_107_41146_20120211_162158_inLine +BABEL_BP_107_41170_20120201_205341_inLine +BABEL_BP_107_41170_20120201_205341_outLine +BABEL_BP_107_41590_20120610_162218_outLine +BABEL_BP_107_41797_20120420_003902_inLine +BABEL_BP_107_41797_20120420_003902_outLine +BABEL_BP_107_42145_20120418_131525_inLine +BABEL_BP_107_42266_20120407_182544_outLine +BABEL_BP_107_42309_20120608_215912_inLine +BABEL_BP_107_42309_20120608_215912_outLine +BABEL_BP_107_42651_20120211_192913_inLine +BABEL_BP_107_42651_20120211_192913_outLine +BABEL_BP_107_42910_20120212_154722_inLine +BABEL_BP_107_42910_20120212_154722_outLine +BABEL_BP_107_43017_20120322_170152_inLine +BABEL_BP_107_43017_20120322_170152_outLine +BABEL_BP_107_43306_20120409_184959_inLine +BABEL_BP_107_43306_20120409_184959_outLine +BABEL_BP_107_43423_20120504_001214_inLine +BABEL_BP_107_43423_20120504_010312_inLine +BABEL_BP_107_43426_20120426_183951_inLine +BABEL_BP_107_43426_20120426_183951_outLine +BABEL_BP_107_43587_20120506_182330_inLine +BABEL_BP_107_43652_20120416_175011_inLine +BABEL_BP_107_43652_20120418_093619_inLine +BABEL_BP_107_44129_20120512_023836_inLine +BABEL_BP_107_44129_20120512_023836_outLine +BABEL_BP_107_44369_20120504_024021_inLine +BABEL_BP_107_44369_20120504_024021_outLine +BABEL_BP_107_44403_20120322_214144_inLine +BABEL_BP_107_44403_20120322_214144_outLine +BABEL_BP_107_44756_20120426_155822_inLine +BABEL_BP_107_44756_20120426_155822_outLine +BABEL_BP_107_44829_20120404_224815_outLine +BABEL_BP_107_44836_20120417_003600_outLine +BABEL_BP_107_44943_20120506_191737_inLine +BABEL_BP_107_45227_20120210_223857_inLine +BABEL_BP_107_45511_20120212_170655_inLine +BABEL_BP_107_45511_20120212_170655_outLine +BABEL_BP_107_45570_20120509_151829_inLine +BABEL_BP_107_45570_20120509_151829_outLine +BABEL_BP_107_45793_20120211_040134_inLine +BABEL_BP_107_45793_20120211_040134_outLine +BABEL_BP_107_45929_20120418_215417_outLine +BABEL_BP_107_45931_20120322_143234_inLine +BABEL_BP_107_45931_20120322_143234_outLine +BABEL_BP_107_46243_20120210_233353_inLine +BABEL_BP_107_46243_20120210_233353_outLine +BABEL_BP_107_46332_20120418_002934_inLine +BABEL_BP_107_46332_20120418_002934_outLine +BABEL_BP_107_46603_20120421_113906_inLine +BABEL_BP_107_46756_20120429_195314_outLine +BABEL_BP_107_46977_20120426_015005_inLine +BABEL_BP_107_47263_20120422_150216_inLine +BABEL_BP_107_47433_20120210_185410_outLine +BABEL_BP_107_47618_20120502_004413_inLine +BABEL_BP_107_47618_20120502_004413_outLine +BABEL_BP_107_47661_20120216_224419_inLine +BABEL_BP_107_47661_20120216_224419_outLine +BABEL_BP_107_47794_20120514_175438_inLine +BABEL_BP_107_47794_20120514_175438_outLine +BABEL_BP_107_47823_20120516_204140_inLine +BABEL_BP_107_47845_20120613_004732_outLine +BABEL_BP_107_47906_20120415_224420_inLine +BABEL_BP_107_47906_20120415_224420_outLine +BABEL_BP_107_48188_20120422_150955_inLine +BABEL_BP_107_48188_20120422_150955_outLine +BABEL_BP_107_48418_20120421_163333_inLine +BABEL_BP_107_48511_20120322_145729_inLine +BABEL_BP_107_48511_20120322_145729_outLine +BABEL_BP_107_48559_20120502_201955_inLine +BABEL_BP_107_48559_20120502_201955_outLine +BABEL_BP_107_48607_20120607_215116_outLine +BABEL_BP_107_48733_20120418_142426_inLine +BABEL_BP_107_48733_20120418_142426_outLine +BABEL_BP_107_48753_20120426_134417_inLine +BABEL_BP_107_48753_20120426_134417_outLine +BABEL_BP_107_48812_20120323_162517_inLine +BABEL_BP_107_48812_20120324_182527_inLine +BABEL_BP_107_48976_20120220_152013_inLine +BABEL_BP_107_48976_20120220_152013_outLine +BABEL_BP_107_49186_20120704_180724_inLine +BABEL_BP_107_49186_20120704_180724_outLine +BABEL_BP_107_49192_20120421_190503_outLine +BABEL_BP_107_49239_20120429_144119_inLine +BABEL_BP_107_49346_20120611_192752_outLine +BABEL_BP_107_49351_20120614_132223_inLine +BABEL_BP_107_49351_20120614_132223_outLine +BABEL_BP_107_49371_20120608_002052_inLine +BABEL_BP_107_49541_20120325_223621_inLine +BABEL_BP_107_49541_20120325_223621_outLine +BABEL_BP_107_49552_20120614_140129_inLine +BABEL_BP_107_49689_20120415_163537_inLine +BABEL_BP_107_49689_20120415_163537_outLine +BABEL_BP_107_49714_20120509_113627_outLine +BABEL_BP_107_49773_20120211_151308_inLine +BABEL_BP_107_49773_20120211_151308_outLine +BABEL_BP_107_50028_20120704_192522_inLine +BABEL_BP_107_50028_20120704_192522_outLine +BABEL_BP_107_50141_20120505_233033_inLine +BABEL_BP_107_50141_20120505_233033_outLine +BABEL_BP_107_50201_20120216_001139_inLine +BABEL_BP_107_50201_20120216_001139_outLine +BABEL_BP_107_50267_20120421_135338_inLine +BABEL_BP_107_50267_20120421_135338_outLine +BABEL_BP_107_50298_20120507_152508_outLine +BABEL_BP_107_50409_20120608_205803_inLine +BABEL_BP_107_50468_20120420_114108_inLine +BABEL_BP_107_50468_20120420_114108_outLine +BABEL_BP_107_50468_20120420_115203_inLine +BABEL_BP_107_50468_20120420_115203_outLine +BABEL_BP_107_50476_20120430_225248_inLine +BABEL_BP_107_50476_20120430_225248_outLine +BABEL_BP_107_50718_20120321_125943_inLine +BABEL_BP_107_50752_20120421_202932_inLine +BABEL_BP_107_50752_20120421_202932_outLine +BABEL_BP_107_50883_20120328_013430_inLine +BABEL_BP_107_50883_20120328_013430_outLine +BABEL_BP_107_51052_20120424_004427_inLine +BABEL_BP_107_51052_20120424_004427_outLine +BABEL_BP_107_51073_20120216_010300_outLine +BABEL_BP_107_51117_20120211_034844_inLine +BABEL_BP_107_51117_20120211_034844_outLine +BABEL_BP_107_51136_20120405_142910_inLine +BABEL_BP_107_51136_20120405_142910_outLine +BABEL_BP_107_51446_20120417_221307_inLine +BABEL_BP_107_51446_20120417_221307_outLine +BABEL_BP_107_51448_20120608_170641_inLine +BABEL_BP_107_51448_20120608_171219_inLine +BABEL_BP_107_51663_20120506_160921_inLine +BABEL_BP_107_51727_20120424_225602_inLine +BABEL_BP_107_51727_20120424_225602_outLine +BABEL_BP_107_52154_20120503_203816_inLine +BABEL_BP_107_52219_20120417_113120_inLine +BABEL_BP_107_52219_20120417_113120_outLine +BABEL_BP_107_52807_20120608_171526_inLine +BABEL_BP_107_52807_20120608_171526_outLine +BABEL_BP_107_52902_20120421_150627_outLine +BABEL_BP_107_53181_20120211_163316_inLine +BABEL_BP_107_53181_20120211_163316_outLine +BABEL_BP_107_53463_20120421_150635_inLine +BABEL_BP_107_53463_20120421_150635_outLine +BABEL_BP_107_53463_20120421_152028_inLine +BABEL_BP_107_53463_20120421_152028_outLine +BABEL_BP_107_53649_20120611_193416_outLine +BABEL_BP_107_53653_20120607_150151_outLine +BABEL_BP_107_53703_20120502_153540_outLine +BABEL_BP_107_53824_20120503_223532_inLine +BABEL_BP_107_53824_20120503_223532_outLine +BABEL_BP_107_53824_20120503_225007_inLine +BABEL_BP_107_53824_20120503_225007_outLine +BABEL_BP_107_53982_20120509_013004_outLine +BABEL_BP_107_53994_20120501_161638_outLine +BABEL_BP_107_54199_20120607_200253_inLine +BABEL_BP_107_54199_20120607_202722_inLine +BABEL_BP_107_54199_20120607_202722_outLine +BABEL_BP_107_54241_20120324_013254_inLine +BABEL_BP_107_54241_20120324_013254_outLine +BABEL_BP_107_54332_20120608_182424_inLine +BABEL_BP_107_54332_20120608_183219_inLine +BABEL_BP_107_54518_20120608_120238_inLine +BABEL_BP_107_54621_20120421_132410_inLine +BABEL_BP_107_54621_20120421_132410_outLine +BABEL_BP_107_54785_20120602_195720_inLine +BABEL_BP_107_54787_20120405_202915_inLine +BABEL_BP_107_54787_20120405_202915_outLine +BABEL_BP_107_55182_20120209_015206_inLine +BABEL_BP_107_55355_20120608_155709_inLine +BABEL_BP_107_55355_20120612_142521_inLine +BABEL_BP_107_55396_20120321_141254_outLine +BABEL_BP_107_55470_20120421_134215_outLine +BABEL_BP_107_55777_20120421_234307_inLine +BABEL_BP_107_55777_20120421_234307_outLine +BABEL_BP_107_55874_20120504_184342_inLine +BABEL_BP_107_55874_20120504_184343_outLine +BABEL_BP_107_56039_20120516_215649_inLine +BABEL_BP_107_56039_20120516_215649_outLine +BABEL_BP_107_56070_20120220_174719_inLine +BABEL_BP_107_57148_20120217_014955_inLine +BABEL_BP_107_57148_20120217_014955_outLine +BABEL_BP_107_57148_20120217_024257_inLine +BABEL_BP_107_57148_20120217_024257_outLine +BABEL_BP_107_57422_20120508_014547_inLine +BABEL_BP_107_57422_20120508_014547_outLine +BABEL_BP_107_57457_20120617_193611_inLine +BABEL_BP_107_57457_20120617_193611_outLine +BABEL_BP_107_57619_20120505_151800_inLine +BABEL_BP_107_58108_20120509_141003_inLine +BABEL_BP_107_58108_20120509_141003_outLine +BABEL_BP_107_58137_20120421_185042_inLine +BABEL_BP_107_58137_20120421_185042_outLine +BABEL_BP_107_58190_20120506_195510_outLine +BABEL_BP_107_58232_20120501_122112_inLine +BABEL_BP_107_58232_20120501_122112_outLine +BABEL_BP_107_58357_20120507_125021_inLine +BABEL_BP_107_58357_20120507_125021_outLine +BABEL_BP_107_58536_20120501_013825_inLine +BABEL_BP_107_58536_20120501_013825_outLine +BABEL_BP_107_58746_20120614_181729_inLine +BABEL_BP_107_58746_20120614_181729_outLine +BABEL_BP_107_58863_20120218_011117_inLine +BABEL_BP_107_58863_20120218_011117_outLine +BABEL_BP_107_58863_20120218_012806_inLine +BABEL_BP_107_58863_20120218_012806_outLine +BABEL_BP_107_59071_20120423_184821_inLine +BABEL_BP_107_59175_20120212_225712_inLine +BABEL_BP_107_59175_20120212_225712_outLine +BABEL_BP_107_59383_20120502_205353_inLine +BABEL_BP_107_59383_20120502_205353_outLine +BABEL_BP_107_59628_20120428_215033_inLine +BABEL_BP_107_59764_20120524_205913_inLine +BABEL_BP_107_59924_20120417_194534_inLine +BABEL_BP_107_59924_20120417_194534_outLine +BABEL_BP_107_59961_20120218_211136_inLine +BABEL_BP_107_60106_20120211_003229_inLine +BABEL_BP_107_60106_20120211_003229_outLine +BABEL_BP_107_60183_20120428_164103_inLine +BABEL_BP_107_60183_20120428_164103_outLine +BABEL_BP_107_60193_20120328_014042_inLine +BABEL_BP_107_60238_20120506_132025_outLine +BABEL_BP_107_60338_20120505_131543_inLine +BABEL_BP_107_60338_20120505_131543_outLine +BABEL_BP_107_60605_20120506_215948_inLine +BABEL_BP_107_60677_20120415_145311_inLine +BABEL_BP_107_60677_20120415_145311_outLine +BABEL_BP_107_60677_20120415_150336_inLine +BABEL_BP_107_60677_20120415_150336_outLine +BABEL_BP_107_60826_20120424_235431_inLine +BABEL_BP_107_60826_20120424_235432_outLine +BABEL_BP_107_60842_20120617_190839_inLine +BABEL_BP_107_60842_20120617_190839_outLine +BABEL_BP_107_61073_20120322_193656_inLine +BABEL_BP_107_61073_20120322_193656_outLine +BABEL_BP_107_61408_20120628_141349_outLine +BABEL_BP_107_61449_20120421_232700_inLine +BABEL_BP_107_61449_20120421_232700_outLine +BABEL_BP_107_61906_20120414_201744_inLine +BABEL_BP_107_61906_20120414_201744_outLine +BABEL_BP_107_62132_20120506_160034_inLine +BABEL_BP_107_62160_20120323_180702_outLine +BABEL_BP_107_62163_20120628_180945_inLine +BABEL_BP_107_62163_20120628_182002_inLine +BABEL_BP_107_62177_20120323_001326_inLine +BABEL_BP_107_62255_20120506_204123_inLine +BABEL_BP_107_62255_20120506_204123_outLine +BABEL_BP_107_62277_20120504_173047_inLine +BABEL_BP_107_62696_20120508_135942_outLine +BABEL_BP_107_62696_20120509_100233_outLine +BABEL_BP_107_62923_20120322_163015_inLine +BABEL_BP_107_62923_20120322_163015_outLine +BABEL_BP_107_62993_20120608_130210_inLine +BABEL_BP_107_62993_20120608_130210_outLine +BABEL_BP_107_63076_20120704_011318_inLine +BABEL_BP_107_63116_20120419_163443_inLine +BABEL_BP_107_63233_20120323_003312_inLine +BABEL_BP_107_63352_20120421_222544_inLine +BABEL_BP_107_63368_20120418_215232_inLine +BABEL_BP_107_63368_20120418_215232_outLine +BABEL_BP_107_63368_20120418_220224_inLine +BABEL_BP_107_63368_20120418_220224_outLine +BABEL_BP_107_63368_20120418_222134_inLine +BABEL_BP_107_63368_20120418_222134_outLine +BABEL_BP_107_63369_20120614_191919_inLine +BABEL_BP_107_63711_20120212_183127_inLine +BABEL_BP_107_63711_20120212_183127_outLine +BABEL_BP_107_64205_20120428_020155_inLine +BABEL_BP_107_64351_20120513_193703_outLine +BABEL_BP_107_64724_20120503_155446_inLine +BABEL_BP_107_64724_20120503_155446_outLine +BABEL_BP_107_64889_20120503_174229_inLine +BABEL_BP_107_64889_20120503_174229_outLine +BABEL_BP_107_65414_20120608_131726_inLine +BABEL_BP_107_65743_20120404_191932_inLine +BABEL_BP_107_65743_20120404_191932_outLine +BABEL_BP_107_65989_20120419_141422_inLine +BABEL_BP_107_66346_20120703_161130_inLine +BABEL_BP_107_66346_20120703_161130_outLine +BABEL_BP_107_66419_20120505_205757_inLine +BABEL_BP_107_66419_20120505_205757_outLine +BABEL_BP_107_66451_20120214_215503_inLine +BABEL_BP_107_66451_20120214_215503_outLine +BABEL_BP_107_66499_20120610_220818_inLine +BABEL_BP_107_66559_20120421_185343_inLine +BABEL_BP_107_66709_20120617_152656_outLine +BABEL_BP_107_66709_20120617_153822_outLine +BABEL_BP_107_66790_20120421_182115_inLine +BABEL_BP_107_66839_20120613_192022_inLine +BABEL_BP_107_66839_20120613_192022_outLine +BABEL_BP_107_66866_20120418_001946_inLine +BABEL_BP_107_66866_20120418_001946_outLine +BABEL_BP_107_66903_20120210_183320_inLine +BABEL_BP_107_66903_20120210_183320_outLine +BABEL_BP_107_66964_20120419_205513_inLine +BABEL_BP_107_66964_20120419_205513_outLine +BABEL_BP_107_67304_20120523_201027_inLine +BABEL_BP_107_67304_20120523_201027_outLine +BABEL_BP_107_67555_20120323_130439_outLine +BABEL_BP_107_67628_20120418_215117_inLine +BABEL_BP_107_67798_20120627_141236_inLine +BABEL_BP_107_68009_20120608_112155_inLine +BABEL_BP_107_68129_20120611_013309_outLine +BABEL_BP_107_68191_20120428_114953_outLine +BABEL_BP_107_68295_20120506_210459_outLine +BABEL_BP_107_68362_20120503_194813_outLine +BABEL_BP_107_68545_20120421_220606_inLine +BABEL_BP_107_68545_20120421_220606_outLine +BABEL_BP_107_68671_20120608_205710_inLine +BABEL_BP_107_68671_20120608_205710_outLine +BABEL_BP_107_68767_20120214_214534_inLine +BABEL_BP_107_68767_20120214_214534_outLine +BABEL_BP_107_69028_20120430_132441_inLine +BABEL_BP_107_69049_20120322_215956_inLine +BABEL_BP_107_69137_20120424_183202_inLine +BABEL_BP_107_69137_20120424_183202_outLine +BABEL_BP_107_69275_20120608_210354_inLine +BABEL_BP_107_69295_20120501_154139_inLine +BABEL_BP_107_70000_20120618_004254_inLine +BABEL_BP_107_70000_20120618_004254_outLine +BABEL_BP_107_70077_20120428_170417_inLine +BABEL_BP_107_70120_20120418_213104_inLine +BABEL_BP_107_70120_20120418_213104_outLine +BABEL_BP_107_70285_20120212_214056_inLine +BABEL_BP_107_70323_20120617_122402_outLine +BABEL_BP_107_70441_20120704_163546_inLine +BABEL_BP_107_70441_20120704_163546_outLine +BABEL_BP_107_70511_20120618_124928_outLine +BABEL_BP_107_70615_20120208_233912_inLine +BABEL_BP_107_70615_20120208_233912_outLine +BABEL_BP_107_70975_20120407_011601_inLine +BABEL_BP_107_70975_20120407_011601_outLine +BABEL_BP_107_71176_20120418_195323_inLine +BABEL_BP_107_71176_20120418_195323_outLine +BABEL_BP_107_71739_20120430_125259_inLine +BABEL_BP_107_71741_20120211_231000_inLine +BABEL_BP_107_71741_20120211_231000_outLine +BABEL_BP_107_71778_20120427_132527_inLine +BABEL_BP_107_71778_20120427_132527_outLine +BABEL_BP_107_71844_20120212_180004_inLine +BABEL_BP_107_71927_20120516_204724_inLine +BABEL_BP_107_72269_20120416_010327_inLine +BABEL_BP_107_72269_20120416_010327_outLine +BABEL_BP_107_72297_20120608_185443_inLine +BABEL_BP_107_72297_20120608_185443_outLine +BABEL_BP_107_72297_20120608_190156_inLine +BABEL_BP_107_72297_20120608_190156_outLine +BABEL_BP_107_72647_20120614_125725_inLine +BABEL_BP_107_72718_20120505_025006_inLine +BABEL_BP_107_72718_20120505_025006_outLine +BABEL_BP_107_72879_20120403_141911_inLine +BABEL_BP_107_72879_20120403_141911_outLine +BABEL_BP_107_73205_20120211_191427_outLine +BABEL_BP_107_73438_20120502_201055_inLine +BABEL_BP_107_73438_20120502_201055_outLine +BABEL_BP_107_73440_20120416_172035_inLine +BABEL_BP_107_73452_20120504_170508_inLine +BABEL_BP_107_73452_20120504_170508_outLine +BABEL_BP_107_73752_20120610_174558_inLine +BABEL_BP_107_73761_20120424_154013_inLine +BABEL_BP_107_73761_20120424_154013_outLine +BABEL_BP_107_73911_20120215_175351_inLine +BABEL_BP_107_73911_20120215_175351_outLine +BABEL_BP_107_73921_20120501_000425_outLine +BABEL_BP_107_74043_20120323_014301_outLine +BABEL_BP_107_74234_20120328_020415_inLine +BABEL_BP_107_74234_20120328_020415_outLine +BABEL_BP_107_74317_20120502_225211_inLine +BABEL_BP_107_74317_20120502_225211_outLine +BABEL_BP_107_74395_20120418_140703_inLine +BABEL_BP_107_74395_20120418_140703_outLine +BABEL_BP_107_74404_20120212_134850_outLine +BABEL_BP_107_74625_20120425_234344_inLine +BABEL_BP_107_74700_20120610_233419_inLine +BABEL_BP_107_74823_20120217_022832_inLine +BABEL_BP_107_74823_20120217_022832_outLine +BABEL_BP_107_74935_20120616_144642_inLine +BABEL_BP_107_74974_20120617_143904_inLine +BABEL_BP_107_74974_20120617_143904_outLine +BABEL_BP_107_74986_20120416_011008_inLine +BABEL_BP_107_74986_20120416_011008_outLine +BABEL_BP_107_74986_20120416_011927_inLine +BABEL_BP_107_74986_20120416_011927_outLine +BABEL_BP_107_75036_20120325_233130_inLine +BABEL_BP_107_75036_20120325_233130_outLine +BABEL_BP_107_75333_20120505_200116_inLine +BABEL_BP_107_75333_20120505_200116_outLine +BABEL_BP_107_75498_20120506_171232_inLine +BABEL_BP_107_75498_20120506_171232_outLine +BABEL_BP_107_75680_20120704_175114_inLine +BABEL_BP_107_75680_20120704_175114_outLine +BABEL_BP_107_75799_20120429_140233_inLine +BABEL_BP_107_75799_20120429_140233_outLine +BABEL_BP_107_75815_20120217_141539_inLine +BABEL_BP_107_75815_20120217_141539_outLine +BABEL_BP_107_76252_20120705_003603_outLine +BABEL_BP_107_76341_20120215_201638_inLine +BABEL_BP_107_76341_20120215_201638_outLine +BABEL_BP_107_76661_20120405_132625_inLine +BABEL_BP_107_76691_20120501_002016_inLine +BABEL_BP_107_76716_20120418_215649_outLine +BABEL_BP_107_76733_20120424_181359_inLine +BABEL_BP_107_76733_20120424_181359_outLine +BABEL_BP_107_76733_20120424_183605_inLine +BABEL_BP_107_76733_20120424_183605_outLine +BABEL_BP_107_76748_20120504_181420_inLine +BABEL_BP_107_76919_20120507_010805_outLine +BABEL_BP_107_76925_20120407_015139_inLine +BABEL_BP_107_76944_20120505_000745_inLine +BABEL_BP_107_76944_20120505_000745_outLine +BABEL_BP_107_76993_20120501_125118_inLine +BABEL_BP_107_76993_20120501_125118_outLine +BABEL_BP_107_77238_20120322_211133_outLine +BABEL_BP_107_77244_20120429_164842_inLine +BABEL_BP_107_77244_20120429_164842_outLine +BABEL_BP_107_77315_20120527_222821_outLine +BABEL_BP_107_77338_20120617_171454_inLine +BABEL_BP_107_77338_20120617_171454_outLine +BABEL_BP_107_77473_20120610_000112_inLine +BABEL_BP_107_77886_20120326_191938_inLine +BABEL_BP_107_77886_20120326_191938_outLine +BABEL_BP_107_78094_20120212_205141_inLine +BABEL_BP_107_78094_20120212_205141_outLine +BABEL_BP_107_78487_20120430_133108_inLine +BABEL_BP_107_78487_20120430_133108_outLine +BABEL_BP_107_78514_20120617_131155_outLine +BABEL_BP_107_79284_20120511_180310_inLine +BABEL_BP_107_79284_20120511_180310_outLine +BABEL_BP_107_79495_20120222_195716_inLine +BABEL_BP_107_79619_20120420_115502_inLine +BABEL_BP_107_79619_20120420_115502_outLine +BABEL_BP_107_79632_20120428_182831_inLine +BABEL_BP_107_79632_20120428_182831_outLine +BABEL_BP_107_79860_20120328_023545_inLine +BABEL_BP_107_79944_20120424_213833_inLine +BABEL_BP_107_79970_20120418_214316_inLine +BABEL_BP_107_80008_20120218_225347_inLine +BABEL_BP_107_80008_20120218_225347_outLine +BABEL_BP_107_80282_20120627_190514_inLine +BABEL_BP_107_80282_20120627_190935_inLine +BABEL_BP_107_80290_20120501_134226_inLine +BABEL_BP_107_80290_20120501_134226_outLine +BABEL_BP_107_80337_20120608_000801_inLine +BABEL_BP_107_80337_20120608_000801_outLine +BABEL_BP_107_80638_20120501_223037_inLine +BABEL_BP_107_80638_20120501_223037_outLine +BABEL_BP_107_80786_20120212_204918_inLine +BABEL_BP_107_80786_20120212_204918_outLine +BABEL_BP_107_81056_20120502_155358_inLine +BABEL_BP_107_81056_20120502_155358_outLine +BABEL_BP_107_81096_20120418_221604_inLine +BABEL_BP_107_81096_20120418_221604_outLine +BABEL_BP_107_81321_20120329_030424_outLine +BABEL_BP_107_81486_20120213_035232_inLine +BABEL_BP_107_81486_20120213_040319_inLine +BABEL_BP_107_81535_20120421_151505_inLine +BABEL_BP_107_81535_20120421_151505_outLine +BABEL_BP_107_81611_20120616_154507_outLine +BABEL_BP_107_81717_20120426_185608_inLine +BABEL_BP_107_81771_20120615_224609_inLine +BABEL_BP_107_81771_20120615_224609_outLine +BABEL_BP_107_82006_20120417_133143_outLine +BABEL_BP_107_82025_20120325_012956_inLine +BABEL_BP_107_82103_20120326_172335_inLine +BABEL_BP_107_82103_20120326_172335_outLine +BABEL_BP_107_82131_20120704_135728_inLine +BABEL_BP_107_82131_20120704_211005_inLine +BABEL_BP_107_82591_20120407_185008_outLine +BABEL_BP_107_82599_20120608_140933_outLine +BABEL_BP_107_82766_20120627_112435_outLine +BABEL_BP_107_82880_20120705_001819_inLine +BABEL_BP_107_82880_20120705_001819_outLine +BABEL_BP_107_82947_20120426_103950_inLine +BABEL_BP_107_82947_20120426_103950_outLine +BABEL_BP_107_82947_20120509_202553_inLine +BABEL_BP_107_82947_20120509_202553_outLine +BABEL_BP_107_83017_20120608_125136_inLine +BABEL_BP_107_83053_20120426_184045_inLine +BABEL_BP_107_83256_20120212_162557_outLine +BABEL_BP_107_83360_20120418_000230_inLine +BABEL_BP_107_83360_20120418_000230_outLine +BABEL_BP_107_83529_20120608_152238_outLine +BABEL_BP_107_83700_20120427_121525_inLine +BABEL_BP_107_83700_20120427_121525_outLine +BABEL_BP_107_83702_20120418_010601_inLine +BABEL_BP_107_83702_20120418_010601_outLine +BABEL_BP_107_83982_20120704_125429_outLine +BABEL_BP_107_83982_20120704_125430_inLine +BABEL_BP_107_83982_20120704_131324_inLine +BABEL_BP_107_83982_20120704_131324_outLine +BABEL_BP_107_84171_20120504_185725_inLine +BABEL_BP_107_84335_20120418_002843_inLine +BABEL_BP_107_84397_20120608_080802_outLine +BABEL_BP_107_84532_20120703_171302_inLine +BABEL_BP_107_84540_20120328_205952_outLine +BABEL_BP_107_84543_20120503_005623_inLine +BABEL_BP_107_84543_20120503_005623_outLine +BABEL_BP_107_84943_20120405_134459_inLine +BABEL_BP_107_85083_20120425_024151_inLine +BABEL_BP_107_85354_20120704_145327_inLine +BABEL_BP_107_85354_20120704_145327_outLine +BABEL_BP_107_85573_20120208_152239_inLine +BABEL_BP_107_85617_20120415_171620_inLine +BABEL_BP_107_85617_20120415_171620_outLine +BABEL_BP_107_85686_20120627_180412_inLine +BABEL_BP_107_85686_20120627_180413_outLine +BABEL_BP_107_85716_20120330_201512_outLine +BABEL_BP_107_85716_20120330_202652_outLine +BABEL_BP_107_85819_20120705_030943_inLine +BABEL_BP_107_85819_20120705_030944_outLine +BABEL_BP_107_86016_20120417_225748_inLine +BABEL_BP_107_86029_20120212_235447_inLine +BABEL_BP_107_86419_20120209_010052_inLine +BABEL_BP_107_86419_20120209_010052_outLine +BABEL_BP_107_86801_20120429_211031_inLine +BABEL_BP_107_86801_20120429_211031_outLine +BABEL_BP_107_86890_20120322_202435_inLine +BABEL_BP_107_87167_20120211_230800_outLine +BABEL_BP_107_87481_20120513_191237_inLine +BABEL_BP_107_87481_20120513_191237_outLine +BABEL_BP_107_87539_20120418_225114_inLine +BABEL_BP_107_87539_20120418_225114_outLine +BABEL_BP_107_87671_20120218_011104_inLine +BABEL_BP_107_87857_20120325_000202_inLine +BABEL_BP_107_88243_20120322_210747_inLine +BABEL_BP_107_88243_20120322_210747_outLine +BABEL_BP_107_88253_20120511_165340_inLine +BABEL_BP_107_88253_20120511_165340_outLine +BABEL_BP_107_88294_20120322_163142_outLine +BABEL_BP_107_88506_20120503_191321_inLine +BABEL_BP_107_88506_20120503_191321_outLine +BABEL_BP_107_88532_20120416_012644_inLine +BABEL_BP_107_89619_20120217_174102_inLine +BABEL_BP_107_89619_20120217_174102_outLine +BABEL_BP_107_89644_20120501_170949_inLine +BABEL_BP_107_89644_20120501_170949_outLine +BABEL_BP_107_89657_20120610_213215_inLine +BABEL_BP_107_89657_20120610_213215_outLine +BABEL_BP_107_89674_20120212_162158_inLine +BABEL_BP_107_89674_20120212_162158_outLine +BABEL_BP_107_89965_20120505_003121_inLine +BABEL_BP_107_89965_20120505_003121_outLine +BABEL_BP_107_90313_20120325_200742_inLine +BABEL_BP_107_90393_20120417_220816_inLine +BABEL_BP_107_90393_20120417_220817_outLine +BABEL_BP_107_90559_20120608_184439_inLine +BABEL_BP_107_90559_20120608_184439_outLine +BABEL_BP_107_90577_20120118_141830_inLine +BABEL_BP_107_90577_20120118_141830_outLine +BABEL_BP_107_90609_20120216_194251_inLine +BABEL_BP_107_90764_20120418_004231_outLine +BABEL_BP_107_90975_20120428_231848_inLine +BABEL_BP_107_90975_20120428_231848_outLine +BABEL_BP_107_91000_20120529_151028_inLine +BABEL_BP_107_91002_20120429_192712_inLine +BABEL_BP_107_91002_20120429_192712_outLine +BABEL_BP_107_91007_20120612_144506_inLine +BABEL_BP_107_91040_20120618_152624_outLine +BABEL_BP_107_91136_20120427_122059_inLine +BABEL_BP_107_91401_20120213_010307_inLine +BABEL_BP_107_91401_20120213_010307_outLine +BABEL_BP_107_91406_20120429_193057_inLine +BABEL_BP_107_91406_20120429_193057_outLine +BABEL_BP_107_91409_20120520_225023_outLine +BABEL_BP_107_91409_20120520_231205_outLine +BABEL_BP_107_91660_20120510_181954_inLine +BABEL_BP_107_91660_20120510_181954_outLine +BABEL_BP_107_91660_20120510_182853_inLine +BABEL_BP_107_91660_20120510_182853_outLine +BABEL_BP_107_91660_20120510_184146_inLine +BABEL_BP_107_91660_20120510_184146_outLine +BABEL_BP_107_91723_20120323_144335_outLine +BABEL_BP_107_91865_20120429_214728_inLine +BABEL_BP_107_91865_20120429_214728_outLine +BABEL_BP_107_91905_20120504_210602_inLine +BABEL_BP_107_91905_20120504_210602_outLine +BABEL_BP_107_91975_20120703_173220_inLine +BABEL_BP_107_91975_20120703_173220_outLine +BABEL_BP_107_91979_20120209_000610_inLine +BABEL_BP_107_92002_20120418_214926_outLine +BABEL_BP_107_92407_20120210_183713_inLine +BABEL_BP_107_92407_20120210_183713_outLine +BABEL_BP_107_92436_20120213_013131_inLine +BABEL_BP_107_92436_20120213_013131_outLine +BABEL_BP_107_92591_20120505_140206_outLine +BABEL_BP_107_92602_20120216_214746_inLine +BABEL_BP_107_92602_20120216_215738_inLine +BABEL_BP_107_92603_20120416_011244_inLine +BABEL_BP_107_92603_20120416_011244_outLine +BABEL_BP_107_92628_20120323_014512_inLine +BABEL_BP_107_92628_20120323_014512_outLine +BABEL_BP_107_92643_20120608_122156_inLine +BABEL_BP_107_92643_20120608_123106_inLine +BABEL_BP_107_92735_20120413_181602_inLine +BABEL_BP_107_92789_20120416_165856_inLine +BABEL_BP_107_92800_20120412_013211_outLine +BABEL_BP_107_93044_20120607_140719_inLine +BABEL_BP_107_93044_20120607_140719_outLine +BABEL_BP_107_93509_20120321_230219_inLine +BABEL_BP_107_93509_20120321_230219_outLine +BABEL_BP_107_93804_20120703_232729_inLine +BABEL_BP_107_93804_20120703_233401_inLine +BABEL_BP_107_93974_20120627_184419_inLine +BABEL_BP_107_93974_20120627_184419_outLine +BABEL_BP_107_93979_20120422_134735_inLine +BABEL_BP_107_93979_20120422_134735_outLine +BABEL_BP_107_94149_20120405_220033_outLine +BABEL_BP_107_94162_20120425_235433_inLine +BABEL_BP_107_94223_20120215_204525_inLine +BABEL_BP_107_94514_20120417_001615_inLine +BABEL_BP_107_94514_20120417_001615_outLine +BABEL_BP_107_94514_20120417_003504_inLine +BABEL_BP_107_94514_20120417_003504_outLine +BABEL_BP_107_94541_20120705_024032_outLine +BABEL_BP_107_94542_20120512_223011_inLine +BABEL_BP_107_94542_20120512_223011_outLine +BABEL_BP_107_94694_20120508_120203_inLine +BABEL_BP_107_94694_20120508_120203_outLine +BABEL_BP_107_94696_20120608_185951_inLine +BABEL_BP_107_94696_20120608_185951_outLine +BABEL_BP_107_94814_20120501_130313_inLine +BABEL_BP_107_94814_20120501_130313_outLine +BABEL_BP_107_94989_20120627_120236_outLine +BABEL_BP_107_95121_20120628_123304_inLine +BABEL_BP_107_95423_20120415_201523_inLine +BABEL_BP_107_95423_20120415_201523_outLine +BABEL_BP_107_95533_20120505_005928_inLine +BABEL_BP_107_95533_20120505_005928_outLine +BABEL_BP_107_95542_20120502_223446_inLine +BABEL_BP_107_95542_20120502_223446_outLine +BABEL_BP_107_95566_20120505_162738_inLine +BABEL_BP_107_95572_20120406_151856_inLine +BABEL_BP_107_95572_20120406_151856_outLine +BABEL_BP_107_95589_20120419_162645_inLine +BABEL_BP_107_95589_20120419_162645_outLine +BABEL_BP_107_95815_20120322_160344_inLine +BABEL_BP_107_95815_20120322_160344_outLine +BABEL_BP_107_95996_20120324_230119_inLine +BABEL_BP_107_96302_20120510_023815_inLine +BABEL_BP_107_96302_20120510_023815_outLine +BABEL_BP_107_96322_20120218_202407_inLine +BABEL_BP_107_96322_20120218_202407_outLine +BABEL_BP_107_96667_20120426_182837_inLine +BABEL_BP_107_96667_20120426_182837_outLine +BABEL_BP_107_96959_20120505_014233_inLine +BABEL_BP_107_96959_20120505_014233_outLine +BABEL_BP_107_97260_20120324_012659_outLine +BABEL_BP_107_97318_20120608_183537_inLine +BABEL_BP_107_97318_20120608_183537_outLine +BABEL_BP_107_97629_20120420_202833_inLine +BABEL_BP_107_97629_20120420_202833_outLine +BABEL_BP_107_97946_20120411_213631_outLine +BABEL_BP_107_98086_20120609_185014_inLine +BABEL_BP_107_98086_20120609_185014_outLine +BABEL_BP_107_98099_20120618_120506_outLine +BABEL_BP_107_98219_20120512_202308_inLine +BABEL_BP_107_98219_20120512_202308_outLine +BABEL_BP_107_98219_20120512_203451_inLine +BABEL_BP_107_98219_20120512_203451_outLine +BABEL_BP_107_98402_20120421_162435_inLine +BABEL_BP_107_98402_20120421_162435_outLine +BABEL_BP_107_98640_20120425_213908_outLine +BABEL_BP_107_98675_20120419_225133_inLine +BABEL_BP_107_98675_20120419_225133_outLine +BABEL_BP_107_99414_20120430_200633_inLine +BABEL_BP_107_99414_20120430_200633_outLine +BABEL_BP_107_99567_20120405_154443_outLine +BABEL_BP_107_99571_20120322_165034_inLine +BABEL_BP_107_99571_20120322_165034_outLine +BABEL_BP_107_99694_20120322_165823_inLine +BABEL_BP_107_99694_20120322_165823_outLine +BABEL_BP_107_99731_20120618_005616_outLine +BABEL_BP_107_99764_20120415_202745_inLine +BABEL_BP_107_99823_20120511_002213_inLine +BABEL_BP_107_99823_20120511_002213_outLine +BABEL_BP_107_99929_20120612_143030_inLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list new file mode 100644 index 00000000000..a47debb4917 --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list @@ -0,0 +1,126 @@ +BABEL_BP_107_12643_20120704_185225_inLine +BABEL_BP_107_12643_20120704_185225_outLine +BABEL_BP_107_13065_20120425_034939_inLine +BABEL_BP_107_13065_20120425_034939_outLine +BABEL_BP_107_13389_20120406_141036_inLine +BABEL_BP_107_13389_20120406_141036_outLine +BABEL_BP_107_14468_20120321_003916_inLine +BABEL_BP_107_14468_20120321_003916_outLine +BABEL_BP_107_14475_20120704_204813_inLine +BABEL_BP_107_14475_20120704_204813_outLine +BABEL_BP_107_14891_20120118_195012_inLine +BABEL_BP_107_14891_20120118_195012_outLine +BABEL_BP_107_17933_20120421_134916_inLine +BABEL_BP_107_17933_20120421_134916_outLine +BABEL_BP_107_19479_20120407_014459_inLine +BABEL_BP_107_19479_20120407_014459_outLine +BABEL_BP_107_21477_20120323_185255_inLine +BABEL_BP_107_21477_20120323_185255_outLine +BABEL_BP_107_21518_20120501_152038_inLine +BABEL_BP_107_21518_20120501_152038_outLine +BABEL_BP_107_22010_20120608_182138_inLine +BABEL_BP_107_22010_20120608_182138_outLine +BABEL_BP_107_22272_20120511_232328_inLine +BABEL_BP_107_22272_20120511_232328_outLine +BABEL_BP_107_22979_20120505_000039_inLine +BABEL_BP_107_22979_20120505_000039_outLine +BABEL_BP_107_23629_20120501_173549_inLine +BABEL_BP_107_23629_20120501_173549_outLine +BABEL_BP_107_27724_20120407_130547_inLine +BABEL_BP_107_27724_20120407_130547_outLine +BABEL_BP_107_31980_20120212_174027_inLine +BABEL_BP_107_31980_20120212_174027_outLine +BABEL_BP_107_34590_20120323_134554_inLine +BABEL_BP_107_34590_20120323_134554_outLine +BABEL_BP_107_36722_20120627_122821_inLine +BABEL_BP_107_36722_20120627_122821_outLine +BABEL_BP_107_38912_20120414_160852_inLine +BABEL_BP_107_38912_20120414_160852_outLine +BABEL_BP_107_41170_20120201_205341_inLine +BABEL_BP_107_41170_20120201_205341_outLine +BABEL_BP_107_42309_20120608_215912_inLine +BABEL_BP_107_42309_20120608_215912_outLine +BABEL_BP_107_42651_20120211_192913_inLine +BABEL_BP_107_42651_20120211_192913_outLine +BABEL_BP_107_42910_20120212_154722_inLine +BABEL_BP_107_42910_20120212_154722_outLine +BABEL_BP_107_43306_20120409_184959_inLine +BABEL_BP_107_43306_20120409_184959_outLine +BABEL_BP_107_44369_20120504_024021_inLine +BABEL_BP_107_44369_20120504_024021_outLine +BABEL_BP_107_44403_20120322_214144_inLine +BABEL_BP_107_44403_20120322_214144_outLine +BABEL_BP_107_44756_20120426_155822_inLine +BABEL_BP_107_44756_20120426_155822_outLine +BABEL_BP_107_49186_20120704_180724_inLine +BABEL_BP_107_49186_20120704_180724_outLine +BABEL_BP_107_49552_20120614_140129_inLine +BABEL_BP_107_50267_20120421_135338_inLine +BABEL_BP_107_50267_20120421_135338_outLine +BABEL_BP_107_50883_20120328_013430_inLine +BABEL_BP_107_50883_20120328_013430_outLine +BABEL_BP_107_52219_20120417_113120_inLine +BABEL_BP_107_52219_20120417_113120_outLine +BABEL_BP_107_53181_20120211_163316_inLine +BABEL_BP_107_53181_20120211_163316_outLine +BABEL_BP_107_54199_20120607_200253_inLine +BABEL_BP_107_54199_20120607_202722_inLine +BABEL_BP_107_54199_20120607_202722_outLine +BABEL_BP_107_54621_20120421_132410_inLine +BABEL_BP_107_54621_20120421_132410_outLine +BABEL_BP_107_55777_20120421_234307_inLine +BABEL_BP_107_55777_20120421_234307_outLine +BABEL_BP_107_58357_20120507_125021_inLine +BABEL_BP_107_58357_20120507_125021_outLine +BABEL_BP_107_59175_20120212_225712_inLine +BABEL_BP_107_59175_20120212_225712_outLine +BABEL_BP_107_60677_20120415_145311_inLine +BABEL_BP_107_60677_20120415_145311_outLine +BABEL_BP_107_60677_20120415_150336_inLine +BABEL_BP_107_60677_20120415_150336_outLine +BABEL_BP_107_61073_20120322_193656_inLine +BABEL_BP_107_61073_20120322_193656_outLine +BABEL_BP_107_62923_20120322_163015_inLine +BABEL_BP_107_62923_20120322_163015_outLine +BABEL_BP_107_63711_20120212_183127_inLine +BABEL_BP_107_63711_20120212_183127_outLine +BABEL_BP_107_66346_20120703_161130_inLine +BABEL_BP_107_66346_20120703_161130_outLine +BABEL_BP_107_66419_20120505_205757_inLine +BABEL_BP_107_66419_20120505_205757_outLine +BABEL_BP_107_66903_20120210_183320_inLine +BABEL_BP_107_66903_20120210_183320_outLine +BABEL_BP_107_67304_20120523_201027_inLine +BABEL_BP_107_67304_20120523_201027_outLine +BABEL_BP_107_71778_20120427_132527_inLine +BABEL_BP_107_71778_20120427_132527_outLine +BABEL_BP_107_73452_20120504_170508_inLine +BABEL_BP_107_73452_20120504_170508_outLine +BABEL_BP_107_73752_20120610_174558_inLine +BABEL_BP_107_73911_20120215_175351_inLine +BABEL_BP_107_73911_20120215_175351_outLine +BABEL_BP_107_74234_20120328_020415_inLine +BABEL_BP_107_74234_20120328_020415_outLine +BABEL_BP_107_75680_20120704_175114_inLine +BABEL_BP_107_75680_20120704_175114_outLine +BABEL_BP_107_80786_20120212_204918_inLine +BABEL_BP_107_80786_20120212_204918_outLine +BABEL_BP_107_81096_20120418_221604_inLine +BABEL_BP_107_81096_20120418_221604_outLine +BABEL_BP_107_81771_20120615_224609_inLine +BABEL_BP_107_81771_20120615_224609_outLine +BABEL_BP_107_82947_20120426_103950_inLine +BABEL_BP_107_82947_20120426_103950_outLine +BABEL_BP_107_82947_20120509_202553_inLine +BABEL_BP_107_82947_20120509_202553_outLine +BABEL_BP_107_84397_20120608_080802_outLine +BABEL_BP_107_85617_20120415_171620_inLine +BABEL_BP_107_85617_20120415_171620_outLine +BABEL_BP_107_86801_20120429_211031_inLine +BABEL_BP_107_86801_20120429_211031_outLine +BABEL_BP_107_90559_20120608_184439_inLine +BABEL_BP_107_90559_20120608_184439_outLine +BABEL_BP_107_90975_20120428_231848_inLine +BABEL_BP_107_90975_20120428_231848_outLine +BABEL_BP_107_96322_20120218_202407_inLine +BABEL_BP_107_96322_20120218_202407_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..4379937a74f --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list @@ -0,0 +1,916 @@ +BABEL_BP_107_10033_20120208_180820_outLine +BABEL_BP_107_10066_20120428_121544_inLine +BABEL_BP_107_10066_20120428_121544_outLine +BABEL_BP_107_10190_20120424_023348_inLine +BABEL_BP_107_10190_20120425_012249_inLine +BABEL_BP_107_10211_20120323_013915_inLine +BABEL_BP_107_10211_20120323_013915_outLine +BABEL_BP_107_10545_20120424_184701_inLine +BABEL_BP_107_10697_20120516_194235_inLine +BABEL_BP_107_10732_20120328_172421_inLine +BABEL_BP_107_10732_20120328_172422_outLine +BABEL_BP_107_10900_20120322_022523_inLine +BABEL_BP_107_10900_20120322_022524_outLine +BABEL_BP_107_10945_20120322_222039_inLine +BABEL_BP_107_10945_20120322_222039_outLine +BABEL_BP_107_10973_20120404_233129_inLine +BABEL_BP_107_10973_20120404_233129_outLine +BABEL_BP_107_10985_20120502_123725_inLine +BABEL_BP_107_10985_20120502_123725_outLine +BABEL_BP_107_11022_20120422_013455_inLine +BABEL_BP_107_11022_20120422_013455_outLine +BABEL_BP_107_11422_20120208_160559_inLine +BABEL_BP_107_11422_20120208_160559_outLine +BABEL_BP_107_11479_20120212_011029_inLine +BABEL_BP_107_11479_20120212_011029_outLine +BABEL_BP_107_11827_20120322_205100_inLine +BABEL_BP_107_11827_20120322_205100_outLine +BABEL_BP_107_11949_20120704_001817_inLine +BABEL_BP_107_11949_20120704_001817_outLine +BABEL_BP_107_11982_20120217_004340_inLine +BABEL_BP_107_12486_20120424_174759_inLine +BABEL_BP_107_12552_20120503_152109_inLine +BABEL_BP_107_12569_20120609_190056_inLine +BABEL_BP_107_12569_20120609_190056_outLine +BABEL_BP_107_12587_20120322_230456_inLine +BABEL_BP_107_12587_20120322_230457_outLine +BABEL_BP_107_12897_20120413_195042_inLine +BABEL_BP_107_12897_20120413_195042_outLine +BABEL_BP_107_12897_20120413_200727_inLine +BABEL_BP_107_12897_20120413_200727_outLine +BABEL_BP_107_13229_20120417_201028_inLine +BABEL_BP_107_13229_20120417_201028_outLine +BABEL_BP_107_13272_20120320_141107_outLine +BABEL_BP_107_13272_20120320_142506_outLine +BABEL_BP_107_13419_20120218_213925_inLine +BABEL_BP_107_13419_20120218_214753_inLine +BABEL_BP_107_13781_20120516_204849_inLine +BABEL_BP_107_13781_20120516_204849_outLine +BABEL_BP_107_13795_20120418_190613_inLine +BABEL_BP_107_13795_20120418_190613_outLine +BABEL_BP_107_14075_20120507_004435_inLine +BABEL_BP_107_14294_20120328_010858_inLine +BABEL_BP_107_14294_20120328_010858_outLine +BABEL_BP_107_14500_20120429_194225_outLine +BABEL_BP_107_14707_20120429_004741_inLine +BABEL_BP_107_14707_20120429_004741_outLine +BABEL_BP_107_14707_20120429_005954_inLine +BABEL_BP_107_14707_20120429_005954_outLine +BABEL_BP_107_14729_20120429_200418_outLine +BABEL_BP_107_14836_20120507_235040_outLine +BABEL_BP_107_14936_20120405_224830_inLine +BABEL_BP_107_14936_20120405_224830_outLine +BABEL_BP_107_15073_20120417_011547_outLine +BABEL_BP_107_15142_20120322_132735_outLine +BABEL_BP_107_15353_20120504_193952_inLine +BABEL_BP_107_15353_20120504_193952_outLine +BABEL_BP_107_15460_20120426_224823_inLine +BABEL_BP_107_15460_20120426_224823_outLine +BABEL_BP_107_15473_20120217_231342_inLine +BABEL_BP_107_15696_20120328_010156_outLine +BABEL_BP_107_15719_20120612_122632_inLine +BABEL_BP_107_15719_20120612_122632_outLine +BABEL_BP_107_15744_20120608_123258_inLine +BABEL_BP_107_15873_20120405_224524_inLine +BABEL_BP_107_15873_20120405_224524_outLine +BABEL_BP_107_15881_20120322_233839_inLine +BABEL_BP_107_15940_20120424_221327_inLine +BABEL_BP_107_16406_20120324_011714_inLine +BABEL_BP_107_16406_20120324_011714_outLine +BABEL_BP_107_16617_20120228_014302_inLine +BABEL_BP_107_16646_20120418_130946_outLine +BABEL_BP_107_16660_20120210_231224_outLine +BABEL_BP_107_16669_20120208_140603_inLine +BABEL_BP_107_16801_20120418_121951_inLine +BABEL_BP_107_16801_20120418_203644_inLine +BABEL_BP_107_16875_20120704_133550_inLine +BABEL_BP_107_16875_20120704_133550_outLine +BABEL_BP_107_16883_20120501_194424_inLine +BABEL_BP_107_16883_20120501_194424_outLine +BABEL_BP_107_16950_20120704_155322_inLine +BABEL_BP_107_16950_20120704_155322_outLine +BABEL_BP_107_17013_20120501_002142_inLine +BABEL_BP_107_17013_20120501_002142_outLine +BABEL_BP_107_17018_20120322_220450_inLine +BABEL_BP_107_17018_20120322_220450_outLine +BABEL_BP_107_17093_20120501_202548_outLine +BABEL_BP_107_17203_20120212_220043_outLine +BABEL_BP_107_17353_20120617_133436_inLine +BABEL_BP_107_17353_20120617_133436_outLine +BABEL_BP_107_18187_20120608_125102_outLine +BABEL_BP_107_18209_20120420_004725_inLine +BABEL_BP_107_18234_20120210_230712_inLine +BABEL_BP_107_18495_20120618_003601_outLine +BABEL_BP_107_18534_20120504_132522_inLine +BABEL_BP_107_18534_20120504_132522_outLine +BABEL_BP_107_18858_20120209_004527_outLine +BABEL_BP_107_19012_20120503_215037_inLine +BABEL_BP_107_19012_20120503_215037_outLine +BABEL_BP_107_19248_20120508_210026_inLine +BABEL_BP_107_19248_20120508_210027_outLine +BABEL_BP_107_19290_20120421_141409_inLine +BABEL_BP_107_19290_20120421_141409_outLine +BABEL_BP_107_19404_20120321_171020_inLine +BABEL_BP_107_19404_20120321_171020_outLine +BABEL_BP_107_19731_20120506_011629_inLine +BABEL_BP_107_19731_20120515_001656_inLine +BABEL_BP_107_19869_20120608_012542_outLine +BABEL_BP_107_20320_20120212_214655_inLine +BABEL_BP_107_20332_20120426_010134_inLine +BABEL_BP_107_20332_20120426_010837_inLine +BABEL_BP_107_20332_20120426_010134_outLine +BABEL_BP_107_20332_20120426_010837_outLine +BABEL_BP_107_20483_20120416_171740_outLine +BABEL_BP_107_20518_20120418_211112_inLine +BABEL_BP_107_20582_20120322_220747_inLine +BABEL_BP_107_20582_20120322_220747_outLine +BABEL_BP_107_20740_20120427_193225_inLine +BABEL_BP_107_20740_20120427_193757_inLine +BABEL_BP_107_20741_20120325_181245_outLine +BABEL_BP_107_20799_20120515_010136_inLine +BABEL_BP_107_20799_20120515_010136_outLine +BABEL_BP_107_21052_20120415_204922_inLine +BABEL_BP_107_21139_20120425_192642_outLine +BABEL_BP_107_21258_20120418_145725_inLine +BABEL_BP_107_21367_20120629_140326_outLine +BABEL_BP_107_21430_20120608_003600_outLine +BABEL_BP_107_21584_20120217_004017_inLine +BABEL_BP_107_21584_20120217_004017_outLine +BABEL_BP_107_21758_20120407_010928_inLine +BABEL_BP_107_21758_20120407_010928_outLine +BABEL_BP_107_21758_20120407_011555_inLine +BABEL_BP_107_21758_20120407_011555_outLine +BABEL_BP_107_21929_20120323_015539_inLine +BABEL_BP_107_21929_20120323_022750_inLine +BABEL_BP_107_21946_20120507_015056_inLine +BABEL_BP_107_21946_20120507_015056_outLine +BABEL_BP_107_22494_20120613_122322_outLine +BABEL_BP_107_22898_20120322_144401_inLine +BABEL_BP_107_22898_20120322_144401_outLine +BABEL_BP_107_22910_20120214_213815_inLine +BABEL_BP_107_22910_20120214_213815_outLine +BABEL_BP_107_23167_20120217_212610_inLine +BABEL_BP_107_23167_20120217_212610_outLine +BABEL_BP_107_23930_20120506_214145_inLine +BABEL_BP_107_24014_20120618_010729_inLine +BABEL_BP_107_24014_20120618_010729_outLine +BABEL_BP_107_24094_20120421_134318_outLine +BABEL_BP_107_24569_20120507_123854_outLine +BABEL_BP_107_24608_20120208_170106_outLine +BABEL_BP_107_24638_20120504_004348_outLine +BABEL_BP_107_24642_20120505_201543_inLine +BABEL_BP_107_24642_20120505_201543_outLine +BABEL_BP_107_24799_20120508_232153_outLine +BABEL_BP_107_24817_20120422_203514_inLine +BABEL_BP_107_24833_20120218_171649_outLine +BABEL_BP_107_25035_20120214_230841_inLine +BABEL_BP_107_25072_20120429_144535_inLine +BABEL_BP_107_25479_20120506_161146_inLine +BABEL_BP_107_25479_20120506_161146_outLine +BABEL_BP_107_25576_20120321_222905_outLine +BABEL_BP_107_25866_20120426_193335_inLine +BABEL_BP_107_26348_20120508_100651_inLine +BABEL_BP_107_26348_20120508_102042_inLine +BABEL_BP_107_26350_20120209_004945_inLine +BABEL_BP_107_26350_20120209_004945_outLine +BABEL_BP_107_26350_20120209_012139_inLine +BABEL_BP_107_26350_20120209_012139_outLine +BABEL_BP_107_26598_20120425_143602_outLine +BABEL_BP_107_26684_20120530_155756_inLine +BABEL_BP_107_26786_20120423_191945_inLine +BABEL_BP_107_26786_20120423_191945_outLine +BABEL_BP_107_27064_20120222_210044_inLine +BABEL_BP_107_27064_20120222_210044_outLine +BABEL_BP_107_27503_20120212_221915_inLine +BABEL_BP_107_27619_20120328_023110_outLine +BABEL_BP_107_27698_20120212_005737_inLine +BABEL_BP_107_27698_20120212_005737_outLine +BABEL_BP_107_27890_20120428_235422_inLine +BABEL_BP_107_27890_20120428_235422_outLine +BABEL_BP_107_27916_20120607_114245_outLine +BABEL_BP_107_27916_20120607_115650_outLine +BABEL_BP_107_28016_20120405_222219_inLine +BABEL_BP_107_28016_20120405_222219_outLine +BABEL_BP_107_28107_20120208_142843_outLine +BABEL_BP_107_28107_20120208_144923_outLine +BABEL_BP_107_28132_20120405_152728_outLine +BABEL_BP_107_28260_20120212_153106_inLine +BABEL_BP_107_28557_20120507_001619_outLine +BABEL_BP_107_28675_20120607_231549_inLine +BABEL_BP_107_28675_20120607_231549_outLine +BABEL_BP_107_28675_20120607_233243_inLine +BABEL_BP_107_28675_20120607_233243_outLine +BABEL_BP_107_28740_20120212_150039_inLine +BABEL_BP_107_28740_20120212_150039_outLine +BABEL_BP_107_29280_20120607_184929_outLine +BABEL_BP_107_29280_20120607_190345_outLine +BABEL_BP_107_29290_20120415_102435_inLine +BABEL_BP_107_29335_20120424_013042_inLine +BABEL_BP_107_29335_20120424_013042_outLine +BABEL_BP_107_29407_20120607_132315_inLine +BABEL_BP_107_29407_20120607_135318_inLine +BABEL_BP_107_29444_20120322_191236_outLine +BABEL_BP_107_29771_20120504_010738_outLine +BABEL_BP_107_29959_20120418_001028_inLine +BABEL_BP_107_29959_20120418_001028_outLine +BABEL_BP_107_29988_20120516_233700_inLine +BABEL_BP_107_30210_20120427_140255_inLine +BABEL_BP_107_30210_20120502_202749_inLine +BABEL_BP_107_30554_20120617_231216_outLine +BABEL_BP_107_30583_20120212_210712_inLine +BABEL_BP_107_30722_20120505_103655_inLine +BABEL_BP_107_30722_20120505_103655_outLine +BABEL_BP_107_31031_20120501_205733_inLine +BABEL_BP_107_31031_20120501_210746_inLine +BABEL_BP_107_31298_20120322_125112_outLine +BABEL_BP_107_31393_20120325_171905_inLine +BABEL_BP_107_31460_20120325_193921_inLine +BABEL_BP_107_31606_20120607_131428_inLine +BABEL_BP_107_31738_20120704_101130_outLine +BABEL_BP_107_31902_20120417_015618_inLine +BABEL_BP_107_31902_20120417_015618_outLine +BABEL_BP_107_31917_20120501_202910_inLine +BABEL_BP_107_31917_20120501_202910_outLine +BABEL_BP_107_32132_20120418_211743_inLine +BABEL_BP_107_32274_20120324_011402_inLine +BABEL_BP_107_32295_20120617_141025_inLine +BABEL_BP_107_32295_20120617_141025_outLine +BABEL_BP_107_32334_20120429_005403_inLine +BABEL_BP_107_32334_20120429_005403_outLine +BABEL_BP_107_32400_20120426_000137_inLine +BABEL_BP_107_32400_20120426_000137_outLine +BABEL_BP_107_32710_20120418_215432_inLine +BABEL_BP_107_32710_20120418_215432_outLine +BABEL_BP_107_33012_20120611_155055_inLine +BABEL_BP_107_33364_20120617_011853_inLine +BABEL_BP_107_33364_20120617_011853_outLine +BABEL_BP_107_33577_20120704_152608_outLine +BABEL_BP_107_33671_20120330_001033_inLine +BABEL_BP_107_33671_20120330_001033_outLine +BABEL_BP_107_33742_20120608_143147_inLine +BABEL_BP_107_33742_20120608_143147_outLine +BABEL_BP_107_33817_20120423_130850_inLine +BABEL_BP_107_33817_20120423_130850_outLine +BABEL_BP_107_33969_20120429_214721_outLine +BABEL_BP_107_34235_20120218_205136_outLine +BABEL_BP_107_34480_20120608_151830_inLine +BABEL_BP_107_34498_20120429_140537_inLine +BABEL_BP_107_34498_20120429_140537_outLine +BABEL_BP_107_34857_20120419_235853_inLine +BABEL_BP_107_34961_20120212_223315_inLine +BABEL_BP_107_34961_20120212_223315_outLine +BABEL_BP_107_34961_20120212_224207_inLine +BABEL_BP_107_34961_20120212_224207_outLine +BABEL_BP_107_35011_20120321_223128_inLine +BABEL_BP_107_35011_20120321_223128_outLine +BABEL_BP_107_35016_20120611_185645_outLine +BABEL_BP_107_35074_20120608_164703_outLine +BABEL_BP_107_35179_20120414_153233_inLine +BABEL_BP_107_35179_20120414_153233_outLine +BABEL_BP_107_35188_20120614_131427_inLine +BABEL_BP_107_35305_20120422_120043_outLine +BABEL_BP_107_35357_20120614_212245_inLine +BABEL_BP_107_35357_20120614_212245_outLine +BABEL_BP_107_36037_20120616_153023_outLine +BABEL_BP_107_36196_20120608_110319_inLine +BABEL_BP_107_36196_20120608_111049_inLine +BABEL_BP_107_36268_20120406_211711_inLine +BABEL_BP_107_36268_20120406_211711_outLine +BABEL_BP_107_36356_20120211_173247_inLine +BABEL_BP_107_36356_20120211_173247_outLine +BABEL_BP_107_36383_20120416_225701_outLine +BABEL_BP_107_36391_20120505_171824_inLine +BABEL_BP_107_36424_20120421_130549_inLine +BABEL_BP_107_36424_20120421_130549_outLine +BABEL_BP_107_36424_20120421_133610_inLine +BABEL_BP_107_36424_20120421_133610_outLine +BABEL_BP_107_36502_20120617_145859_inLine +BABEL_BP_107_36502_20120617_145859_outLine +BABEL_BP_107_36711_20120325_230112_inLine +BABEL_BP_107_36711_20120325_230112_outLine +BABEL_BP_107_37110_20120209_002706_inLine +BABEL_BP_107_37110_20120209_002706_outLine +BABEL_BP_107_37210_20120322_205536_outLine +BABEL_BP_107_37285_20120325_000245_inLine +BABEL_BP_107_37285_20120325_000245_outLine +BABEL_BP_107_37335_20120616_150016_inLine +BABEL_BP_107_37335_20120616_150016_outLine +BABEL_BP_107_37374_20120418_185819_inLine +BABEL_BP_107_37940_20120424_004619_inLine +BABEL_BP_107_37940_20120424_004619_outLine +BABEL_BP_107_38464_20120422_105536_outLine +BABEL_BP_107_38592_20120704_150926_outLine +BABEL_BP_107_38640_20120215_030154_inLine +BABEL_BP_107_38640_20120215_030154_outLine +BABEL_BP_107_38698_20120322_213531_inLine +BABEL_BP_107_38698_20120322_213531_outLine +BABEL_BP_107_38879_20120406_150304_inLine +BABEL_BP_107_38879_20120406_150304_outLine +BABEL_BP_107_39246_20120613_202128_inLine +BABEL_BP_107_39246_20120613_202128_outLine +BABEL_BP_107_39264_20120417_191639_inLine +BABEL_BP_107_39264_20120417_191639_outLine +BABEL_BP_107_39296_20120705_025906_inLine +BABEL_BP_107_39384_20120324_010939_inLine +BABEL_BP_107_39384_20120324_010939_outLine +BABEL_BP_107_39384_20120324_011832_inLine +BABEL_BP_107_39384_20120324_011832_outLine +BABEL_BP_107_39430_20120325_015935_inLine +BABEL_BP_107_39430_20120325_015935_outLine +BABEL_BP_107_40002_20120502_174229_outLine +BABEL_BP_107_40123_20120505_191426_inLine +BABEL_BP_107_40123_20120505_191426_outLine +BABEL_BP_107_40385_20120704_143210_outLine +BABEL_BP_107_40477_20120323_194919_outLine +BABEL_BP_107_40510_20120426_153808_inLine +BABEL_BP_107_40510_20120426_153808_outLine +BABEL_BP_107_40980_20120416_233130_inLine +BABEL_BP_107_40980_20120416_233130_outLine +BABEL_BP_107_40980_20120417_001128_inLine +BABEL_BP_107_40980_20120417_001128_outLine +BABEL_BP_107_41146_20120211_162158_inLine +BABEL_BP_107_41590_20120610_162218_outLine +BABEL_BP_107_41797_20120420_003902_inLine +BABEL_BP_107_41797_20120420_003902_outLine +BABEL_BP_107_42145_20120418_131525_inLine +BABEL_BP_107_42266_20120407_182544_outLine +BABEL_BP_107_43017_20120322_170152_inLine +BABEL_BP_107_43017_20120322_170152_outLine +BABEL_BP_107_43423_20120504_001214_inLine +BABEL_BP_107_43423_20120504_010312_inLine +BABEL_BP_107_43426_20120426_183951_inLine +BABEL_BP_107_43426_20120426_183951_outLine +BABEL_BP_107_43587_20120506_182330_inLine +BABEL_BP_107_43652_20120416_175011_inLine +BABEL_BP_107_43652_20120418_093619_inLine +BABEL_BP_107_44129_20120512_023836_inLine +BABEL_BP_107_44129_20120512_023836_outLine +BABEL_BP_107_44829_20120404_224815_outLine +BABEL_BP_107_44836_20120417_003600_outLine +BABEL_BP_107_44943_20120506_191737_inLine +BABEL_BP_107_45227_20120210_223857_inLine +BABEL_BP_107_45511_20120212_170655_inLine +BABEL_BP_107_45511_20120212_170655_outLine +BABEL_BP_107_45570_20120509_151829_inLine +BABEL_BP_107_45570_20120509_151829_outLine +BABEL_BP_107_45793_20120211_040134_inLine +BABEL_BP_107_45793_20120211_040134_outLine +BABEL_BP_107_45929_20120418_215417_outLine +BABEL_BP_107_45931_20120322_143234_inLine +BABEL_BP_107_45931_20120322_143234_outLine +BABEL_BP_107_46243_20120210_233353_inLine +BABEL_BP_107_46243_20120210_233353_outLine +BABEL_BP_107_46332_20120418_002934_inLine +BABEL_BP_107_46332_20120418_002934_outLine +BABEL_BP_107_46603_20120421_113906_inLine +BABEL_BP_107_46756_20120429_195314_outLine +BABEL_BP_107_46977_20120426_015005_inLine +BABEL_BP_107_47263_20120422_150216_inLine +BABEL_BP_107_47433_20120210_185410_outLine +BABEL_BP_107_47618_20120502_004413_inLine +BABEL_BP_107_47618_20120502_004413_outLine +BABEL_BP_107_47661_20120216_224419_inLine +BABEL_BP_107_47661_20120216_224419_outLine +BABEL_BP_107_47794_20120514_175438_inLine +BABEL_BP_107_47794_20120514_175438_outLine +BABEL_BP_107_47823_20120516_204140_inLine +BABEL_BP_107_47845_20120613_004732_outLine +BABEL_BP_107_47906_20120415_224420_inLine +BABEL_BP_107_47906_20120415_224420_outLine +BABEL_BP_107_48188_20120422_150955_inLine +BABEL_BP_107_48188_20120422_150955_outLine +BABEL_BP_107_48418_20120421_163333_inLine +BABEL_BP_107_48511_20120322_145729_inLine +BABEL_BP_107_48511_20120322_145729_outLine +BABEL_BP_107_48559_20120502_201955_inLine +BABEL_BP_107_48559_20120502_201955_outLine +BABEL_BP_107_48607_20120607_215116_outLine +BABEL_BP_107_48733_20120418_142426_inLine +BABEL_BP_107_48733_20120418_142426_outLine +BABEL_BP_107_48753_20120426_134417_inLine +BABEL_BP_107_48753_20120426_134417_outLine +BABEL_BP_107_48812_20120323_162517_inLine +BABEL_BP_107_48812_20120324_182527_inLine +BABEL_BP_107_48976_20120220_152013_inLine +BABEL_BP_107_48976_20120220_152013_outLine +BABEL_BP_107_49192_20120421_190503_outLine +BABEL_BP_107_49239_20120429_144119_inLine +BABEL_BP_107_49346_20120611_192752_outLine +BABEL_BP_107_49351_20120614_132223_inLine +BABEL_BP_107_49351_20120614_132223_outLine +BABEL_BP_107_49371_20120608_002052_inLine +BABEL_BP_107_49541_20120325_223621_inLine +BABEL_BP_107_49541_20120325_223621_outLine +BABEL_BP_107_49689_20120415_163537_inLine +BABEL_BP_107_49689_20120415_163537_outLine +BABEL_BP_107_49714_20120509_113627_outLine +BABEL_BP_107_49773_20120211_151308_inLine +BABEL_BP_107_49773_20120211_151308_outLine +BABEL_BP_107_50028_20120704_192522_inLine +BABEL_BP_107_50028_20120704_192522_outLine +BABEL_BP_107_50141_20120505_233033_inLine +BABEL_BP_107_50141_20120505_233033_outLine +BABEL_BP_107_50201_20120216_001139_inLine +BABEL_BP_107_50201_20120216_001139_outLine +BABEL_BP_107_50298_20120507_152508_outLine +BABEL_BP_107_50409_20120608_205803_inLine +BABEL_BP_107_50468_20120420_114108_inLine +BABEL_BP_107_50468_20120420_114108_outLine +BABEL_BP_107_50468_20120420_115203_inLine +BABEL_BP_107_50468_20120420_115203_outLine +BABEL_BP_107_50476_20120430_225248_inLine +BABEL_BP_107_50476_20120430_225248_outLine +BABEL_BP_107_50718_20120321_125943_inLine +BABEL_BP_107_50752_20120421_202932_inLine +BABEL_BP_107_50752_20120421_202932_outLine +BABEL_BP_107_51052_20120424_004427_inLine +BABEL_BP_107_51052_20120424_004427_outLine +BABEL_BP_107_51073_20120216_010300_outLine +BABEL_BP_107_51117_20120211_034844_inLine +BABEL_BP_107_51117_20120211_034844_outLine +BABEL_BP_107_51136_20120405_142910_inLine +BABEL_BP_107_51136_20120405_142910_outLine +BABEL_BP_107_51446_20120417_221307_inLine +BABEL_BP_107_51446_20120417_221307_outLine +BABEL_BP_107_51448_20120608_170641_inLine +BABEL_BP_107_51448_20120608_171219_inLine +BABEL_BP_107_51663_20120506_160921_inLine +BABEL_BP_107_51727_20120424_225602_inLine +BABEL_BP_107_51727_20120424_225602_outLine +BABEL_BP_107_52154_20120503_203816_inLine +BABEL_BP_107_52807_20120608_171526_inLine +BABEL_BP_107_52807_20120608_171526_outLine +BABEL_BP_107_52902_20120421_150627_outLine +BABEL_BP_107_53463_20120421_150635_inLine +BABEL_BP_107_53463_20120421_150635_outLine +BABEL_BP_107_53463_20120421_152028_inLine +BABEL_BP_107_53463_20120421_152028_outLine +BABEL_BP_107_53649_20120611_193416_outLine +BABEL_BP_107_53653_20120607_150151_outLine +BABEL_BP_107_53703_20120502_153540_outLine +BABEL_BP_107_53824_20120503_223532_inLine +BABEL_BP_107_53824_20120503_223532_outLine +BABEL_BP_107_53824_20120503_225007_inLine +BABEL_BP_107_53824_20120503_225007_outLine +BABEL_BP_107_53982_20120509_013004_outLine +BABEL_BP_107_53994_20120501_161638_outLine +BABEL_BP_107_54241_20120324_013254_inLine +BABEL_BP_107_54241_20120324_013254_outLine +BABEL_BP_107_54332_20120608_182424_inLine +BABEL_BP_107_54332_20120608_183219_inLine +BABEL_BP_107_54518_20120608_120238_inLine +BABEL_BP_107_54785_20120602_195720_inLine +BABEL_BP_107_54787_20120405_202915_inLine +BABEL_BP_107_54787_20120405_202915_outLine +BABEL_BP_107_55182_20120209_015206_inLine +BABEL_BP_107_55355_20120608_155709_inLine +BABEL_BP_107_55355_20120612_142521_inLine +BABEL_BP_107_55396_20120321_141254_outLine +BABEL_BP_107_55470_20120421_134215_outLine +BABEL_BP_107_55874_20120504_184342_inLine +BABEL_BP_107_55874_20120504_184343_outLine +BABEL_BP_107_56039_20120516_215649_inLine +BABEL_BP_107_56039_20120516_215649_outLine +BABEL_BP_107_56070_20120220_174719_inLine +BABEL_BP_107_57148_20120217_014955_inLine +BABEL_BP_107_57148_20120217_014955_outLine +BABEL_BP_107_57148_20120217_024257_inLine +BABEL_BP_107_57148_20120217_024257_outLine +BABEL_BP_107_57422_20120508_014547_inLine +BABEL_BP_107_57422_20120508_014547_outLine +BABEL_BP_107_57457_20120617_193611_inLine +BABEL_BP_107_57457_20120617_193611_outLine +BABEL_BP_107_57619_20120505_151800_inLine +BABEL_BP_107_58108_20120509_141003_inLine +BABEL_BP_107_58108_20120509_141003_outLine +BABEL_BP_107_58137_20120421_185042_inLine +BABEL_BP_107_58137_20120421_185042_outLine +BABEL_BP_107_58190_20120506_195510_outLine +BABEL_BP_107_58232_20120501_122112_inLine +BABEL_BP_107_58232_20120501_122112_outLine +BABEL_BP_107_58536_20120501_013825_inLine +BABEL_BP_107_58536_20120501_013825_outLine +BABEL_BP_107_58746_20120614_181729_inLine +BABEL_BP_107_58746_20120614_181729_outLine +BABEL_BP_107_58863_20120218_011117_inLine +BABEL_BP_107_58863_20120218_011117_outLine +BABEL_BP_107_58863_20120218_012806_inLine +BABEL_BP_107_58863_20120218_012806_outLine +BABEL_BP_107_59071_20120423_184821_inLine +BABEL_BP_107_59383_20120502_205353_inLine +BABEL_BP_107_59383_20120502_205353_outLine +BABEL_BP_107_59628_20120428_215033_inLine +BABEL_BP_107_59764_20120524_205913_inLine +BABEL_BP_107_59924_20120417_194534_inLine +BABEL_BP_107_59924_20120417_194534_outLine +BABEL_BP_107_59961_20120218_211136_inLine +BABEL_BP_107_60106_20120211_003229_inLine +BABEL_BP_107_60106_20120211_003229_outLine +BABEL_BP_107_60183_20120428_164103_inLine +BABEL_BP_107_60183_20120428_164103_outLine +BABEL_BP_107_60193_20120328_014042_inLine +BABEL_BP_107_60238_20120506_132025_outLine +BABEL_BP_107_60338_20120505_131543_inLine +BABEL_BP_107_60338_20120505_131543_outLine +BABEL_BP_107_60605_20120506_215948_inLine +BABEL_BP_107_60826_20120424_235431_inLine +BABEL_BP_107_60826_20120424_235432_outLine +BABEL_BP_107_60842_20120617_190839_inLine +BABEL_BP_107_60842_20120617_190839_outLine +BABEL_BP_107_61408_20120628_141349_outLine +BABEL_BP_107_61449_20120421_232700_inLine +BABEL_BP_107_61449_20120421_232700_outLine +BABEL_BP_107_61906_20120414_201744_inLine +BABEL_BP_107_61906_20120414_201744_outLine +BABEL_BP_107_62132_20120506_160034_inLine +BABEL_BP_107_62160_20120323_180702_outLine +BABEL_BP_107_62163_20120628_180945_inLine +BABEL_BP_107_62163_20120628_182002_inLine +BABEL_BP_107_62177_20120323_001326_inLine +BABEL_BP_107_62255_20120506_204123_inLine +BABEL_BP_107_62255_20120506_204123_outLine +BABEL_BP_107_62277_20120504_173047_inLine +BABEL_BP_107_62696_20120508_135942_outLine +BABEL_BP_107_62696_20120509_100233_outLine +BABEL_BP_107_62993_20120608_130210_inLine +BABEL_BP_107_62993_20120608_130210_outLine +BABEL_BP_107_63076_20120704_011318_inLine +BABEL_BP_107_63116_20120419_163443_inLine +BABEL_BP_107_63233_20120323_003312_inLine +BABEL_BP_107_63352_20120421_222544_inLine +BABEL_BP_107_63368_20120418_215232_inLine +BABEL_BP_107_63368_20120418_215232_outLine +BABEL_BP_107_63368_20120418_220224_inLine +BABEL_BP_107_63368_20120418_220224_outLine +BABEL_BP_107_63368_20120418_222134_inLine +BABEL_BP_107_63368_20120418_222134_outLine +BABEL_BP_107_63369_20120614_191919_inLine +BABEL_BP_107_64205_20120428_020155_inLine +BABEL_BP_107_64351_20120513_193703_outLine +BABEL_BP_107_64724_20120503_155446_inLine +BABEL_BP_107_64724_20120503_155446_outLine +BABEL_BP_107_64889_20120503_174229_inLine +BABEL_BP_107_64889_20120503_174229_outLine +BABEL_BP_107_65414_20120608_131726_inLine +BABEL_BP_107_65743_20120404_191932_inLine +BABEL_BP_107_65743_20120404_191932_outLine +BABEL_BP_107_65989_20120419_141422_inLine +BABEL_BP_107_66451_20120214_215503_inLine +BABEL_BP_107_66451_20120214_215503_outLine +BABEL_BP_107_66499_20120610_220818_inLine +BABEL_BP_107_66559_20120421_185343_inLine +BABEL_BP_107_66709_20120617_152656_outLine +BABEL_BP_107_66709_20120617_153822_outLine +BABEL_BP_107_66790_20120421_182115_inLine +BABEL_BP_107_66839_20120613_192022_inLine +BABEL_BP_107_66839_20120613_192022_outLine +BABEL_BP_107_66866_20120418_001946_inLine +BABEL_BP_107_66866_20120418_001946_outLine +BABEL_BP_107_66964_20120419_205513_inLine +BABEL_BP_107_66964_20120419_205513_outLine +BABEL_BP_107_67555_20120323_130439_outLine +BABEL_BP_107_67628_20120418_215117_inLine +BABEL_BP_107_67798_20120627_141236_inLine +BABEL_BP_107_68009_20120608_112155_inLine +BABEL_BP_107_68129_20120611_013309_outLine +BABEL_BP_107_68191_20120428_114953_outLine +BABEL_BP_107_68295_20120506_210459_outLine +BABEL_BP_107_68362_20120503_194813_outLine +BABEL_BP_107_68545_20120421_220606_inLine +BABEL_BP_107_68545_20120421_220606_outLine +BABEL_BP_107_68671_20120608_205710_inLine +BABEL_BP_107_68671_20120608_205710_outLine +BABEL_BP_107_68767_20120214_214534_inLine +BABEL_BP_107_68767_20120214_214534_outLine +BABEL_BP_107_69028_20120430_132441_inLine +BABEL_BP_107_69049_20120322_215956_inLine +BABEL_BP_107_69137_20120424_183202_inLine +BABEL_BP_107_69137_20120424_183202_outLine +BABEL_BP_107_69275_20120608_210354_inLine +BABEL_BP_107_69295_20120501_154139_inLine +BABEL_BP_107_70000_20120618_004254_inLine +BABEL_BP_107_70000_20120618_004254_outLine +BABEL_BP_107_70077_20120428_170417_inLine +BABEL_BP_107_70120_20120418_213104_inLine +BABEL_BP_107_70120_20120418_213104_outLine +BABEL_BP_107_70285_20120212_214056_inLine +BABEL_BP_107_70323_20120617_122402_outLine +BABEL_BP_107_70441_20120704_163546_inLine +BABEL_BP_107_70441_20120704_163546_outLine +BABEL_BP_107_70511_20120618_124928_outLine +BABEL_BP_107_70615_20120208_233912_inLine +BABEL_BP_107_70615_20120208_233912_outLine +BABEL_BP_107_70975_20120407_011601_inLine +BABEL_BP_107_70975_20120407_011601_outLine +BABEL_BP_107_71176_20120418_195323_inLine +BABEL_BP_107_71176_20120418_195323_outLine +BABEL_BP_107_71739_20120430_125259_inLine +BABEL_BP_107_71741_20120211_231000_inLine +BABEL_BP_107_71741_20120211_231000_outLine +BABEL_BP_107_71844_20120212_180004_inLine +BABEL_BP_107_71927_20120516_204724_inLine +BABEL_BP_107_72269_20120416_010327_inLine +BABEL_BP_107_72269_20120416_010327_outLine +BABEL_BP_107_72297_20120608_185443_inLine +BABEL_BP_107_72297_20120608_185443_outLine +BABEL_BP_107_72297_20120608_190156_inLine +BABEL_BP_107_72297_20120608_190156_outLine +BABEL_BP_107_72647_20120614_125725_inLine +BABEL_BP_107_72718_20120505_025006_inLine +BABEL_BP_107_72718_20120505_025006_outLine +BABEL_BP_107_72879_20120403_141911_inLine +BABEL_BP_107_72879_20120403_141911_outLine +BABEL_BP_107_73205_20120211_191427_outLine +BABEL_BP_107_73438_20120502_201055_inLine +BABEL_BP_107_73438_20120502_201055_outLine +BABEL_BP_107_73440_20120416_172035_inLine +BABEL_BP_107_73761_20120424_154013_inLine +BABEL_BP_107_73761_20120424_154013_outLine +BABEL_BP_107_73921_20120501_000425_outLine +BABEL_BP_107_74043_20120323_014301_outLine +BABEL_BP_107_74317_20120502_225211_inLine +BABEL_BP_107_74317_20120502_225211_outLine +BABEL_BP_107_74395_20120418_140703_inLine +BABEL_BP_107_74395_20120418_140703_outLine +BABEL_BP_107_74404_20120212_134850_outLine +BABEL_BP_107_74625_20120425_234344_inLine +BABEL_BP_107_74700_20120610_233419_inLine +BABEL_BP_107_74823_20120217_022832_inLine +BABEL_BP_107_74823_20120217_022832_outLine +BABEL_BP_107_74935_20120616_144642_inLine +BABEL_BP_107_74974_20120617_143904_inLine +BABEL_BP_107_74974_20120617_143904_outLine +BABEL_BP_107_74986_20120416_011008_inLine +BABEL_BP_107_74986_20120416_011008_outLine +BABEL_BP_107_74986_20120416_011927_inLine +BABEL_BP_107_74986_20120416_011927_outLine +BABEL_BP_107_75036_20120325_233130_inLine +BABEL_BP_107_75036_20120325_233130_outLine +BABEL_BP_107_75333_20120505_200116_inLine +BABEL_BP_107_75333_20120505_200116_outLine +BABEL_BP_107_75498_20120506_171232_inLine +BABEL_BP_107_75498_20120506_171232_outLine +BABEL_BP_107_75799_20120429_140233_inLine +BABEL_BP_107_75799_20120429_140233_outLine +BABEL_BP_107_75815_20120217_141539_inLine +BABEL_BP_107_75815_20120217_141539_outLine +BABEL_BP_107_76252_20120705_003603_outLine +BABEL_BP_107_76341_20120215_201638_inLine +BABEL_BP_107_76341_20120215_201638_outLine +BABEL_BP_107_76661_20120405_132625_inLine +BABEL_BP_107_76691_20120501_002016_inLine +BABEL_BP_107_76716_20120418_215649_outLine +BABEL_BP_107_76733_20120424_181359_inLine +BABEL_BP_107_76733_20120424_181359_outLine +BABEL_BP_107_76733_20120424_183605_inLine +BABEL_BP_107_76733_20120424_183605_outLine +BABEL_BP_107_76748_20120504_181420_inLine +BABEL_BP_107_76919_20120507_010805_outLine +BABEL_BP_107_76925_20120407_015139_inLine +BABEL_BP_107_76944_20120505_000745_inLine +BABEL_BP_107_76944_20120505_000745_outLine +BABEL_BP_107_76993_20120501_125118_inLine +BABEL_BP_107_76993_20120501_125118_outLine +BABEL_BP_107_77238_20120322_211133_outLine +BABEL_BP_107_77244_20120429_164842_inLine +BABEL_BP_107_77244_20120429_164842_outLine +BABEL_BP_107_77315_20120527_222821_outLine +BABEL_BP_107_77338_20120617_171454_inLine +BABEL_BP_107_77338_20120617_171454_outLine +BABEL_BP_107_77473_20120610_000112_inLine +BABEL_BP_107_77886_20120326_191938_inLine +BABEL_BP_107_77886_20120326_191938_outLine +BABEL_BP_107_78094_20120212_205141_inLine +BABEL_BP_107_78094_20120212_205141_outLine +BABEL_BP_107_78487_20120430_133108_inLine +BABEL_BP_107_78487_20120430_133108_outLine +BABEL_BP_107_78514_20120617_131155_outLine +BABEL_BP_107_79284_20120511_180310_inLine +BABEL_BP_107_79284_20120511_180310_outLine +BABEL_BP_107_79495_20120222_195716_inLine +BABEL_BP_107_79619_20120420_115502_inLine +BABEL_BP_107_79619_20120420_115502_outLine +BABEL_BP_107_79632_20120428_182831_inLine +BABEL_BP_107_79632_20120428_182831_outLine +BABEL_BP_107_79860_20120328_023545_inLine +BABEL_BP_107_79944_20120424_213833_inLine +BABEL_BP_107_79970_20120418_214316_inLine +BABEL_BP_107_80008_20120218_225347_inLine +BABEL_BP_107_80008_20120218_225347_outLine +BABEL_BP_107_80282_20120627_190514_inLine +BABEL_BP_107_80282_20120627_190935_inLine +BABEL_BP_107_80290_20120501_134226_inLine +BABEL_BP_107_80290_20120501_134226_outLine +BABEL_BP_107_80337_20120608_000801_inLine +BABEL_BP_107_80337_20120608_000801_outLine +BABEL_BP_107_80638_20120501_223037_inLine +BABEL_BP_107_80638_20120501_223037_outLine +BABEL_BP_107_81056_20120502_155358_inLine +BABEL_BP_107_81056_20120502_155358_outLine +BABEL_BP_107_81321_20120329_030424_outLine +BABEL_BP_107_81486_20120213_035232_inLine +BABEL_BP_107_81486_20120213_040319_inLine +BABEL_BP_107_81535_20120421_151505_inLine +BABEL_BP_107_81535_20120421_151505_outLine +BABEL_BP_107_81611_20120616_154507_outLine +BABEL_BP_107_81717_20120426_185608_inLine +BABEL_BP_107_82006_20120417_133143_outLine +BABEL_BP_107_82025_20120325_012956_inLine +BABEL_BP_107_82103_20120326_172335_inLine +BABEL_BP_107_82103_20120326_172335_outLine +BABEL_BP_107_82131_20120704_135728_inLine +BABEL_BP_107_82131_20120704_211005_inLine +BABEL_BP_107_82591_20120407_185008_outLine +BABEL_BP_107_82599_20120608_140933_outLine +BABEL_BP_107_82766_20120627_112435_outLine +BABEL_BP_107_82880_20120705_001819_inLine +BABEL_BP_107_82880_20120705_001819_outLine +BABEL_BP_107_83017_20120608_125136_inLine +BABEL_BP_107_83053_20120426_184045_inLine +BABEL_BP_107_83256_20120212_162557_outLine +BABEL_BP_107_83360_20120418_000230_inLine +BABEL_BP_107_83360_20120418_000230_outLine +BABEL_BP_107_83529_20120608_152238_outLine +BABEL_BP_107_83700_20120427_121525_inLine +BABEL_BP_107_83700_20120427_121525_outLine +BABEL_BP_107_83702_20120418_010601_inLine +BABEL_BP_107_83702_20120418_010601_outLine +BABEL_BP_107_83982_20120704_125429_outLine +BABEL_BP_107_83982_20120704_125430_inLine +BABEL_BP_107_83982_20120704_131324_inLine +BABEL_BP_107_83982_20120704_131324_outLine +BABEL_BP_107_84171_20120504_185725_inLine +BABEL_BP_107_84335_20120418_002843_inLine +BABEL_BP_107_84532_20120703_171302_inLine +BABEL_BP_107_84540_20120328_205952_outLine +BABEL_BP_107_84543_20120503_005623_inLine +BABEL_BP_107_84543_20120503_005623_outLine +BABEL_BP_107_84943_20120405_134459_inLine +BABEL_BP_107_85083_20120425_024151_inLine +BABEL_BP_107_85354_20120704_145327_inLine +BABEL_BP_107_85354_20120704_145327_outLine +BABEL_BP_107_85573_20120208_152239_inLine +BABEL_BP_107_85686_20120627_180412_inLine +BABEL_BP_107_85686_20120627_180413_outLine +BABEL_BP_107_85716_20120330_201512_outLine +BABEL_BP_107_85716_20120330_202652_outLine +BABEL_BP_107_85819_20120705_030943_inLine +BABEL_BP_107_85819_20120705_030944_outLine +BABEL_BP_107_86016_20120417_225748_inLine +BABEL_BP_107_86029_20120212_235447_inLine +BABEL_BP_107_86419_20120209_010052_inLine +BABEL_BP_107_86419_20120209_010052_outLine +BABEL_BP_107_86890_20120322_202435_inLine +BABEL_BP_107_87167_20120211_230800_outLine +BABEL_BP_107_87481_20120513_191237_inLine +BABEL_BP_107_87481_20120513_191237_outLine +BABEL_BP_107_87539_20120418_225114_inLine +BABEL_BP_107_87539_20120418_225114_outLine +BABEL_BP_107_87671_20120218_011104_inLine +BABEL_BP_107_87857_20120325_000202_inLine +BABEL_BP_107_88243_20120322_210747_inLine +BABEL_BP_107_88243_20120322_210747_outLine +BABEL_BP_107_88253_20120511_165340_inLine +BABEL_BP_107_88253_20120511_165340_outLine +BABEL_BP_107_88294_20120322_163142_outLine +BABEL_BP_107_88506_20120503_191321_inLine +BABEL_BP_107_88506_20120503_191321_outLine +BABEL_BP_107_88532_20120416_012644_inLine +BABEL_BP_107_89619_20120217_174102_inLine +BABEL_BP_107_89619_20120217_174102_outLine +BABEL_BP_107_89644_20120501_170949_inLine +BABEL_BP_107_89644_20120501_170949_outLine +BABEL_BP_107_89657_20120610_213215_inLine +BABEL_BP_107_89657_20120610_213215_outLine +BABEL_BP_107_89674_20120212_162158_inLine +BABEL_BP_107_89674_20120212_162158_outLine +BABEL_BP_107_89965_20120505_003121_inLine +BABEL_BP_107_89965_20120505_003121_outLine +BABEL_BP_107_90313_20120325_200742_inLine +BABEL_BP_107_90393_20120417_220816_inLine +BABEL_BP_107_90393_20120417_220817_outLine +BABEL_BP_107_90577_20120118_141830_inLine +BABEL_BP_107_90577_20120118_141830_outLine +BABEL_BP_107_90609_20120216_194251_inLine +BABEL_BP_107_90764_20120418_004231_outLine +BABEL_BP_107_91000_20120529_151028_inLine +BABEL_BP_107_91002_20120429_192712_inLine +BABEL_BP_107_91002_20120429_192712_outLine +BABEL_BP_107_91007_20120612_144506_inLine +BABEL_BP_107_91040_20120618_152624_outLine +BABEL_BP_107_91136_20120427_122059_inLine +BABEL_BP_107_91401_20120213_010307_inLine +BABEL_BP_107_91401_20120213_010307_outLine +BABEL_BP_107_91406_20120429_193057_inLine +BABEL_BP_107_91406_20120429_193057_outLine +BABEL_BP_107_91409_20120520_225023_outLine +BABEL_BP_107_91409_20120520_231205_outLine +BABEL_BP_107_91660_20120510_181954_inLine +BABEL_BP_107_91660_20120510_181954_outLine +BABEL_BP_107_91660_20120510_182853_inLine +BABEL_BP_107_91660_20120510_182853_outLine +BABEL_BP_107_91660_20120510_184146_inLine +BABEL_BP_107_91660_20120510_184146_outLine +BABEL_BP_107_91723_20120323_144335_outLine +BABEL_BP_107_91865_20120429_214728_inLine +BABEL_BP_107_91865_20120429_214728_outLine +BABEL_BP_107_91905_20120504_210602_inLine +BABEL_BP_107_91905_20120504_210602_outLine +BABEL_BP_107_91975_20120703_173220_inLine +BABEL_BP_107_91975_20120703_173220_outLine +BABEL_BP_107_91979_20120209_000610_inLine +BABEL_BP_107_92002_20120418_214926_outLine +BABEL_BP_107_92407_20120210_183713_inLine +BABEL_BP_107_92407_20120210_183713_outLine +BABEL_BP_107_92436_20120213_013131_inLine +BABEL_BP_107_92436_20120213_013131_outLine +BABEL_BP_107_92591_20120505_140206_outLine +BABEL_BP_107_92602_20120216_214746_inLine +BABEL_BP_107_92602_20120216_215738_inLine +BABEL_BP_107_92603_20120416_011244_inLine +BABEL_BP_107_92603_20120416_011244_outLine +BABEL_BP_107_92628_20120323_014512_inLine +BABEL_BP_107_92628_20120323_014512_outLine +BABEL_BP_107_92643_20120608_122156_inLine +BABEL_BP_107_92643_20120608_123106_inLine +BABEL_BP_107_92735_20120413_181602_inLine +BABEL_BP_107_92789_20120416_165856_inLine +BABEL_BP_107_92800_20120412_013211_outLine +BABEL_BP_107_93044_20120607_140719_inLine +BABEL_BP_107_93044_20120607_140719_outLine +BABEL_BP_107_93509_20120321_230219_inLine +BABEL_BP_107_93509_20120321_230219_outLine +BABEL_BP_107_93804_20120703_232729_inLine +BABEL_BP_107_93804_20120703_233401_inLine +BABEL_BP_107_93974_20120627_184419_inLine +BABEL_BP_107_93974_20120627_184419_outLine +BABEL_BP_107_93979_20120422_134735_inLine +BABEL_BP_107_93979_20120422_134735_outLine +BABEL_BP_107_94149_20120405_220033_outLine +BABEL_BP_107_94162_20120425_235433_inLine +BABEL_BP_107_94223_20120215_204525_inLine +BABEL_BP_107_94514_20120417_001615_inLine +BABEL_BP_107_94514_20120417_001615_outLine +BABEL_BP_107_94514_20120417_003504_inLine +BABEL_BP_107_94514_20120417_003504_outLine +BABEL_BP_107_94541_20120705_024032_outLine +BABEL_BP_107_94542_20120512_223011_inLine +BABEL_BP_107_94542_20120512_223011_outLine +BABEL_BP_107_94694_20120508_120203_inLine +BABEL_BP_107_94694_20120508_120203_outLine +BABEL_BP_107_94696_20120608_185951_inLine +BABEL_BP_107_94696_20120608_185951_outLine +BABEL_BP_107_94814_20120501_130313_inLine +BABEL_BP_107_94814_20120501_130313_outLine +BABEL_BP_107_94989_20120627_120236_outLine +BABEL_BP_107_95121_20120628_123304_inLine +BABEL_BP_107_95423_20120415_201523_inLine +BABEL_BP_107_95423_20120415_201523_outLine +BABEL_BP_107_95533_20120505_005928_inLine +BABEL_BP_107_95533_20120505_005928_outLine +BABEL_BP_107_95542_20120502_223446_inLine +BABEL_BP_107_95542_20120502_223446_outLine +BABEL_BP_107_95566_20120505_162738_inLine +BABEL_BP_107_95572_20120406_151856_inLine +BABEL_BP_107_95572_20120406_151856_outLine +BABEL_BP_107_95589_20120419_162645_inLine +BABEL_BP_107_95589_20120419_162645_outLine +BABEL_BP_107_95815_20120322_160344_inLine +BABEL_BP_107_95815_20120322_160344_outLine +BABEL_BP_107_95996_20120324_230119_inLine +BABEL_BP_107_96302_20120510_023815_inLine +BABEL_BP_107_96302_20120510_023815_outLine +BABEL_BP_107_96667_20120426_182837_inLine +BABEL_BP_107_96667_20120426_182837_outLine +BABEL_BP_107_96959_20120505_014233_inLine +BABEL_BP_107_96959_20120505_014233_outLine +BABEL_BP_107_97260_20120324_012659_outLine +BABEL_BP_107_97318_20120608_183537_inLine +BABEL_BP_107_97318_20120608_183537_outLine +BABEL_BP_107_97629_20120420_202833_inLine +BABEL_BP_107_97629_20120420_202833_outLine +BABEL_BP_107_97946_20120411_213631_outLine +BABEL_BP_107_98086_20120609_185014_inLine +BABEL_BP_107_98086_20120609_185014_outLine +BABEL_BP_107_98099_20120618_120506_outLine +BABEL_BP_107_98219_20120512_202308_inLine +BABEL_BP_107_98219_20120512_202308_outLine +BABEL_BP_107_98219_20120512_203451_inLine +BABEL_BP_107_98219_20120512_203451_outLine +BABEL_BP_107_98402_20120421_162435_inLine +BABEL_BP_107_98402_20120421_162435_outLine +BABEL_BP_107_98640_20120425_213908_outLine +BABEL_BP_107_98675_20120419_225133_inLine +BABEL_BP_107_98675_20120419_225133_outLine +BABEL_BP_107_99414_20120430_200633_inLine +BABEL_BP_107_99414_20120430_200633_outLine +BABEL_BP_107_99567_20120405_154443_outLine +BABEL_BP_107_99571_20120322_165034_inLine +BABEL_BP_107_99571_20120322_165034_outLine +BABEL_BP_107_99694_20120322_165823_inLine +BABEL_BP_107_99694_20120322_165823_outLine +BABEL_BP_107_99731_20120618_005616_outLine +BABEL_BP_107_99764_20120415_202745_inLine +BABEL_BP_107_99823_20120511_002213_inLine +BABEL_BP_107_99823_20120511_002213_outLine +BABEL_BP_107_99929_20120612_143030_inLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/dev.list b/egs/babel/s5d/conf/lists/201-haitian/dev.list new file mode 100644 index 00000000000..208f92ee9cb --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/dev.list @@ -0,0 +1,126 @@ +BABEL_OP1_201_10019_20130527_022947_inLine +BABEL_OP1_201_10019_20130527_022947_outLine +BABEL_OP1_201_10319_20130306_021244_inLine +BABEL_OP1_201_10319_20130306_021244_outLine +BABEL_OP1_201_14440_20130302_012105_inLine +BABEL_OP1_201_14440_20130302_012105_outLine +BABEL_OP1_201_15324_20130228_031225_inLine +BABEL_OP1_201_15324_20130228_031225_outLine +BABEL_OP1_201_15535_20130305_062354_inLine +BABEL_OP1_201_15535_20130305_062354_outLine +BABEL_OP1_201_15638_20130305_060156_inLine +BABEL_OP1_201_15638_20130305_060156_outLine +BABEL_OP1_201_16184_20130305_081912_inLine +BABEL_OP1_201_16184_20130305_081912_outLine +BABEL_OP1_201_21029_20130529_114410_inLine +BABEL_OP1_201_21029_20130529_114410_outLine +BABEL_OP1_201_21029_20130529_115127_inLine +BABEL_OP1_201_21029_20130529_115127_outLine +BABEL_OP1_201_21109_20130414_085917_inLine +BABEL_OP1_201_21109_20130414_085917_outLine +BABEL_OP1_201_21393_20130501_071647_inLine +BABEL_OP1_201_21393_20130501_071647_outLine +BABEL_OP1_201_23151_20130428_054353_inLine +BABEL_OP1_201_23151_20130428_054353_outLine +BABEL_OP1_201_23983_20130503_023139_inLine +BABEL_OP1_201_23983_20130503_023139_outLine +BABEL_OP1_201_23983_20130503_023729_inLine +BABEL_OP1_201_23983_20130503_023729_outLine +BABEL_OP1_201_26074_20130522_003756_inLine +BABEL_OP1_201_26074_20130522_003756_outLine +BABEL_OP1_201_26206_20130302_073520_inLine +BABEL_OP1_201_26206_20130302_073520_outLine +BABEL_OP1_201_32832_20130430_060411_inLine +BABEL_OP1_201_32832_20130430_060411_outLine +BABEL_OP1_201_32861_20130429_111248_inLine +BABEL_OP1_201_32861_20130429_111248_outLine +BABEL_OP1_201_32998_20130531_000201_inLine +BABEL_OP1_201_32998_20130531_000201_outLine +BABEL_OP1_201_35583_20130429_033957_inLine +BABEL_OP1_201_35583_20130429_033957_outLine +BABEL_OP1_201_41400_20130430_094739_inLine +BABEL_OP1_201_41400_20130430_094739_outLine +BABEL_OP1_201_41609_20130404_034518_inLine +BABEL_OP1_201_41609_20130404_034518_outLine +BABEL_OP1_201_45843_20130227_092425_inLine +BABEL_OP1_201_45843_20130227_092425_outLine +BABEL_OP1_201_45843_20130227_095551_inLine +BABEL_OP1_201_45843_20130227_095551_outLine +BABEL_OP1_201_46315_20130302_045420_inLine +BABEL_OP1_201_46315_20130302_045420_outLine +BABEL_OP1_201_47877_20130429_092603_inLine +BABEL_OP1_201_47877_20130429_092603_outLine +BABEL_OP1_201_49197_20130529_061436_inLine +BABEL_OP1_201_49197_20130529_061436_outLine +BABEL_OP1_201_49287_20130227_083257_inLine +BABEL_OP1_201_49287_20130227_083257_outLine +BABEL_OP1_201_51858_20130224_055705_inLine +BABEL_OP1_201_51858_20130224_055705_outLine +BABEL_OP1_201_52025_20130226_082606_inLine +BABEL_OP1_201_52025_20130226_082606_outLine +BABEL_OP1_201_52694_20130518_050051_inLine +BABEL_OP1_201_52694_20130518_050051_outLine +BABEL_OP1_201_54162_20130508_044116_inLine +BABEL_OP1_201_54162_20130508_044116_outLine +BABEL_OP1_201_59898_20130223_041449_inLine +BABEL_OP1_201_59898_20130223_041449_outLine +BABEL_OP1_201_61011_20130228_062832_inLine +BABEL_OP1_201_61011_20130228_062832_outLine +BABEL_OP1_201_61357_20130602_030259_inLine +BABEL_OP1_201_61357_20130602_030259_outLine +BABEL_OP1_201_62456_20130521_040629_inLine +BABEL_OP1_201_62456_20130521_040629_outLine +BABEL_OP1_201_63757_20130531_014819_inLine +BABEL_OP1_201_63757_20130531_014819_outLine +BABEL_OP1_201_65252_20130503_025634_inLine +BABEL_OP1_201_65252_20130503_025634_outLine +BABEL_OP1_201_65640_20130429_103434_inLine +BABEL_OP1_201_65640_20130429_103434_outLine +BABEL_OP1_201_67085_20130503_043953_inLine +BABEL_OP1_201_67085_20130503_043953_outLine +BABEL_OP1_201_67842_20130528_081111_inLine +BABEL_OP1_201_67842_20130528_081111_outLine +BABEL_OP1_201_70110_20130224_022802_inLine +BABEL_OP1_201_70110_20130224_022802_outLine +BABEL_OP1_201_70716_20130503_015538_inLine +BABEL_OP1_201_70716_20130503_015538_outLine +BABEL_OP1_201_70986_20130307_075426_inLine +BABEL_OP1_201_70986_20130307_075426_outLine +BABEL_OP1_201_71263_20130602_021725_inLine +BABEL_OP1_201_71263_20130602_021725_outLine +BABEL_OP1_201_72654_20130510_063658_inLine +BABEL_OP1_201_72654_20130510_063658_outLine +BABEL_OP1_201_74226_20130303_125222_inLine +BABEL_OP1_201_74226_20130303_125222_outLine +BABEL_OP1_201_75223_20130221_024906_inLine +BABEL_OP1_201_75223_20130221_024906_outLine +BABEL_OP1_201_77112_20130528_050544_inLine +BABEL_OP1_201_77112_20130528_050544_outLine +BABEL_OP1_201_78194_20121206_064008_inLine +BABEL_OP1_201_78194_20121206_064008_outLine +BABEL_OP1_201_78360_20130430_101414_inLine +BABEL_OP1_201_78360_20130430_101414_outLine +BABEL_OP1_201_78454_20130531_032436_inLine +BABEL_OP1_201_78454_20130531_032436_outLine +BABEL_OP1_201_79571_20130302_074959_inLine +BABEL_OP1_201_79571_20130302_074959_outLine +BABEL_OP1_201_80881_20130220_022131_inLine +BABEL_OP1_201_80881_20130220_022131_outLine +BABEL_OP1_201_81553_20130430_095301_inLine +BABEL_OP1_201_81553_20130430_095301_outLine +BABEL_OP1_201_82035_20130601_052036_inLine +BABEL_OP1_201_82035_20130601_052036_outLine +BABEL_OP1_201_84125_20130227_022410_inLine +BABEL_OP1_201_84125_20130227_022410_outLine +BABEL_OP1_201_85439_20130503_071053_inLine +BABEL_OP1_201_85439_20130503_071053_outLine +BABEL_OP1_201_88982_20130512_060722_inLine +BABEL_OP1_201_88982_20130512_060722_outLine +BABEL_OP1_201_96584_20130427_001740_inLine +BABEL_OP1_201_96584_20130427_001740_outLine +BABEL_OP1_201_96842_20130503_081834_inLine +BABEL_OP1_201_96842_20130503_081834_outLine +BABEL_OP1_201_96985_20130313_031020_inLine +BABEL_OP1_201_96985_20130313_031020_outLine +BABEL_OP1_201_99813_20130514_080612_inLine +BABEL_OP1_201_99813_20130514_080612_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/eval.list b/egs/babel/s5d/conf/lists/201-haitian/eval.list new file mode 100644 index 00000000000..d9a4445b43d --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/eval.list @@ -0,0 +1,194 @@ +BABEL_OP1_201_10188_20121207_034031_inLine +BABEL_OP1_201_10188_20121207_034031_outLine +BABEL_OP1_201_11581_20130524_035647_inLine +BABEL_OP1_201_11581_20130524_035647_outLine +BABEL_OP1_201_13427_20130517_044959_inLine +BABEL_OP1_201_13427_20130517_044959_outLine +BABEL_OP1_201_14228_20130312_063112_inLine +BABEL_OP1_201_14228_20130312_063112_outLine +BABEL_OP1_201_14537_20130604_084139_inLine +BABEL_OP1_201_14537_20130604_084139_outLine +BABEL_OP1_201_15926_20130302_065808_inLine +BABEL_OP1_201_15926_20130302_065808_outLine +BABEL_OP1_201_16056_20130328_050018_inLine +BABEL_OP1_201_16056_20130328_050018_outLine +BABEL_OP1_201_17165_20130509_020154_inLine +BABEL_OP1_201_17165_20130509_020154_outLine +BABEL_OP1_201_18242_20130603_023106_inLine +BABEL_OP1_201_18242_20130603_023106_outLine +BABEL_OP1_201_19101_20130521_032103_inLine +BABEL_OP1_201_19101_20130521_032103_outLine +BABEL_OP1_201_19545_20130517_060948_inLine +BABEL_OP1_201_19545_20130517_060948_outLine +BABEL_OP1_201_19621_20130517_031837_inLine +BABEL_OP1_201_19621_20130517_031837_outLine +BABEL_OP1_201_19672_20130301_110157_inLine +BABEL_OP1_201_19672_20130301_110157_outLine +BABEL_OP1_201_22641_20130222_024500_inLine +BABEL_OP1_201_22641_20130222_024500_outLine +BABEL_OP1_201_23260_20130502_085418_inLine +BABEL_OP1_201_23260_20130502_085418_outLine +BABEL_OP1_201_23395_20130521_052906_inLine +BABEL_OP1_201_23395_20130521_052906_outLine +BABEL_OP1_201_23628_20130528_052627_inLine +BABEL_OP1_201_23628_20130528_052627_outLine +BABEL_OP1_201_23731_20130517_014107_inLine +BABEL_OP1_201_23731_20130517_014107_outLine +BABEL_OP1_201_25412_20130531_050830_inLine +BABEL_OP1_201_25412_20130531_050830_outLine +BABEL_OP1_201_31484_20130304_060634_inLine +BABEL_OP1_201_31484_20130304_060634_outLine +BABEL_OP1_201_31583_20130630_090026_inLine +BABEL_OP1_201_31583_20130630_090026_outLine +BABEL_OP1_201_34019_20130224_123823_inLine +BABEL_OP1_201_34019_20130224_123823_outLine +BABEL_OP1_201_34688_20130226_033106_inLine +BABEL_OP1_201_34688_20130226_033106_outLine +BABEL_OP1_201_35202_20130228_143257_inLine +BABEL_OP1_201_35202_20130228_143257_outLine +BABEL_OP1_201_35202_20130228_144257_inLine +BABEL_OP1_201_35202_20130228_144257_outLine +BABEL_OP1_201_37064_20130528_095008_inLine +BABEL_OP1_201_37064_20130528_095008_outLine +BABEL_OP1_201_37290_20130602_070243_inLine +BABEL_OP1_201_37290_20130602_070243_outLine +BABEL_OP1_201_39159_20130226_043216_inLine +BABEL_OP1_201_39159_20130226_043216_outLine +BABEL_OP1_201_39744_20130226_025333_inLine +BABEL_OP1_201_39744_20130226_025333_outLine +BABEL_OP1_201_41038_20130301_095640_inLine +BABEL_OP1_201_41038_20130301_095640_outLine +BABEL_OP1_201_41745_20130530_021647_inLine +BABEL_OP1_201_41745_20130530_021647_outLine +BABEL_OP1_201_43285_20130303_112216_inLine +BABEL_OP1_201_43285_20130303_112216_outLine +BABEL_OP1_201_44255_20130427_232421_inLine +BABEL_OP1_201_44255_20130427_232421_outLine +BABEL_OP1_201_44255_20130427_233501_inLine +BABEL_OP1_201_44255_20130427_233501_outLine +BABEL_OP1_201_44847_20130228_021744_inLine +BABEL_OP1_201_44847_20130228_021744_outLine +BABEL_OP1_201_44847_20130301_014421_inLine +BABEL_OP1_201_44847_20130301_014421_outLine +BABEL_OP1_201_44868_20130301_094502_inLine +BABEL_OP1_201_44868_20130301_094502_outLine +BABEL_OP1_201_44868_20130301_095004_inLine +BABEL_OP1_201_44868_20130301_095004_outLine +BABEL_OP1_201_45106_20130228_110111_inLine +BABEL_OP1_201_45106_20130228_110111_outLine +BABEL_OP1_201_46202_20130301_041831_inLine +BABEL_OP1_201_46202_20130301_041831_outLine +BABEL_OP1_201_46712_20130527_095034_inLine +BABEL_OP1_201_46712_20130527_095034_outLine +BABEL_OP1_201_46974_20130305_032251_inLine +BABEL_OP1_201_46974_20130305_032251_outLine +BABEL_OP1_201_49775_20130312_061709_inLine +BABEL_OP1_201_49775_20130312_061709_outLine +BABEL_OP1_201_52222_20130221_115458_inLine +BABEL_OP1_201_52222_20130221_115458_outLine +BABEL_OP1_201_52442_20130511_033818_inLine +BABEL_OP1_201_52442_20130511_033818_outLine +BABEL_OP1_201_54405_20130512_043326_inLine +BABEL_OP1_201_54405_20130512_043326_outLine +BABEL_OP1_201_56523_20130530_035306_inLine +BABEL_OP1_201_56523_20130530_035306_outLine +BABEL_OP1_201_56720_20130305_084355_inLine +BABEL_OP1_201_56720_20130305_084355_outLine +BABEL_OP1_201_57609_20130519_003542_inLine +BABEL_OP1_201_57609_20130519_003542_outLine +BABEL_OP1_201_57922_20130601_024619_inLine +BABEL_OP1_201_57922_20130601_024619_outLine +BABEL_OP1_201_57935_20130522_034918_inLine +BABEL_OP1_201_57935_20130522_034918_outLine +BABEL_OP1_201_59645_20130510_022401_inLine +BABEL_OP1_201_59645_20130510_022401_outLine +BABEL_OP1_201_60352_20130301_071549_inLine +BABEL_OP1_201_60352_20130301_071549_outLine +BABEL_OP1_201_60352_20130301_072624_inLine +BABEL_OP1_201_60352_20130301_072624_outLine +BABEL_OP1_201_60508_20130221_023139_inLine +BABEL_OP1_201_60508_20130221_023139_outLine +BABEL_OP1_201_61440_20130602_061805_inLine +BABEL_OP1_201_61440_20130602_061805_outLine +BABEL_OP1_201_61963_20130430_084852_inLine +BABEL_OP1_201_61963_20130430_084852_outLine +BABEL_OP1_201_62155_20130507_055437_inLine +BABEL_OP1_201_62155_20130507_055437_outLine +BABEL_OP1_201_63309_20130214_111801_inLine +BABEL_OP1_201_63309_20130214_111801_outLine +BABEL_OP1_201_63481_20130306_031400_inLine +BABEL_OP1_201_63481_20130306_031400_outLine +BABEL_OP1_201_63511_20130704_101544_inLine +BABEL_OP1_201_63511_20130704_101544_outLine +BABEL_OP1_201_64638_20130228_015923_inLine +BABEL_OP1_201_64638_20130228_015923_outLine +BABEL_OP1_201_64870_20130521_011614_inLine +BABEL_OP1_201_64870_20130521_011614_outLine +BABEL_OP1_201_66967_20130223_042440_inLine +BABEL_OP1_201_66967_20130223_042440_outLine +BABEL_OP1_201_67552_20130302_031450_inLine +BABEL_OP1_201_67552_20130302_031450_outLine +BABEL_OP1_201_67592_20130413_085928_inLine +BABEL_OP1_201_67592_20130413_085928_outLine +BABEL_OP1_201_67794_20130528_054900_inLine +BABEL_OP1_201_67794_20130528_054900_outLine +BABEL_OP1_201_67794_20130528_060329_inLine +BABEL_OP1_201_67794_20130528_060329_outLine +BABEL_OP1_201_68059_20130514_015440_inLine +BABEL_OP1_201_68059_20130514_015440_outLine +BABEL_OP1_201_69633_20130302_015041_inLine +BABEL_OP1_201_69633_20130302_015041_outLine +BABEL_OP1_201_73757_20130510_051523_inLine +BABEL_OP1_201_73757_20130510_051523_outLine +BABEL_OP1_201_75359_20130502_024157_inLine +BABEL_OP1_201_75359_20130502_024157_outLine +BABEL_OP1_201_76773_20130529_015651_inLine +BABEL_OP1_201_76773_20130529_015651_outLine +BABEL_OP1_201_77139_20130221_080959_inLine +BABEL_OP1_201_77139_20130221_080959_outLine +BABEL_OP1_201_77391_20130529_083139_inLine +BABEL_OP1_201_77391_20130529_083139_outLine +BABEL_OP1_201_77567_20130305_071815_inLine +BABEL_OP1_201_77567_20130305_071815_outLine +BABEL_OP1_201_78630_20130604_103056_inLine +BABEL_OP1_201_78630_20130604_103056_outLine +BABEL_OP1_201_80897_20130602_013830_inLine +BABEL_OP1_201_80897_20130602_013830_outLine +BABEL_OP1_201_81229_20130529_053302_inLine +BABEL_OP1_201_81229_20130529_053302_outLine +BABEL_OP1_201_81404_20130528_042634_inLine +BABEL_OP1_201_81404_20130528_042634_outLine +BABEL_OP1_201_82030_20130704_095440_inLine +BABEL_OP1_201_82030_20130704_095440_outLine +BABEL_OP1_201_82030_20130704_100506_inLine +BABEL_OP1_201_82030_20130704_100506_outLine +BABEL_OP1_201_83366_20130228_065600_inLine +BABEL_OP1_201_83366_20130228_065600_outLine +BABEL_OP1_201_83783_20130524_015629_inLine +BABEL_OP1_201_83783_20130524_015629_outLine +BABEL_OP1_201_84327_20130305_092405_inLine +BABEL_OP1_201_84327_20130305_092405_outLine +BABEL_OP1_201_84583_20130518_020910_inLine +BABEL_OP1_201_84583_20130518_020910_outLine +BABEL_OP1_201_86748_20130428_024819_inLine +BABEL_OP1_201_86748_20130428_024819_outLine +BABEL_OP1_201_89045_20130307_055651_inLine +BABEL_OP1_201_89045_20130307_055651_outLine +BABEL_OP1_201_91930_20130429_004949_inLine +BABEL_OP1_201_91930_20130429_004949_outLine +BABEL_OP1_201_91930_20130429_005907_inLine +BABEL_OP1_201_91930_20130429_005907_outLine +BABEL_OP1_201_92060_20130502_110221_inLine +BABEL_OP1_201_92060_20130502_110221_outLine +BABEL_OP1_201_92698_20130510_005433_inLine +BABEL_OP1_201_92698_20130510_005433_outLine +BABEL_OP1_201_93861_20130512_005008_inLine +BABEL_OP1_201_93861_20130512_005008_outLine +BABEL_OP1_201_94141_20130430_122007_inLine +BABEL_OP1_201_94141_20130430_122007_outLine +BABEL_OP1_201_94166_20130429_044116_inLine +BABEL_OP1_201_94166_20130429_044116_outLine +BABEL_OP1_201_94587_20130305_100125_inLine +BABEL_OP1_201_94587_20130305_100125_outLine +BABEL_OP1_201_94745_20130301_131752_inLine +BABEL_OP1_201_94745_20130301_131752_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/evalpart1.list b/egs/babel/s5d/conf/lists/201-haitian/evalpart1.list new file mode 100644 index 00000000000..0b771a04457 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/evalpart1.list @@ -0,0 +1,64 @@ +BABEL_OP1_201_15926_20130302_065808_inLine +BABEL_OP1_201_15926_20130302_065808_outLine +BABEL_OP1_201_19545_20130517_060948_inLine +BABEL_OP1_201_19545_20130517_060948_outLine +BABEL_OP1_201_23395_20130521_052906_inLine +BABEL_OP1_201_23395_20130521_052906_outLine +BABEL_OP1_201_23628_20130528_052627_inLine +BABEL_OP1_201_23628_20130528_052627_outLine +BABEL_OP1_201_23731_20130517_014107_inLine +BABEL_OP1_201_23731_20130517_014107_outLine +BABEL_OP1_201_31583_20130630_090026_inLine +BABEL_OP1_201_31583_20130630_090026_outLine +BABEL_OP1_201_39159_20130226_043216_inLine +BABEL_OP1_201_39159_20130226_043216_outLine +BABEL_OP1_201_41745_20130530_021647_inLine +BABEL_OP1_201_41745_20130530_021647_outLine +BABEL_OP1_201_44255_20130427_232421_inLine +BABEL_OP1_201_44255_20130427_232421_outLine +BABEL_OP1_201_44255_20130427_233501_inLine +BABEL_OP1_201_44255_20130427_233501_outLine +BABEL_OP1_201_44868_20130301_094502_inLine +BABEL_OP1_201_44868_20130301_094502_outLine +BABEL_OP1_201_44868_20130301_095004_inLine +BABEL_OP1_201_44868_20130301_095004_outLine +BABEL_OP1_201_45106_20130228_110111_inLine +BABEL_OP1_201_45106_20130228_110111_outLine +BABEL_OP1_201_46712_20130527_095034_inLine +BABEL_OP1_201_46712_20130527_095034_outLine +BABEL_OP1_201_49775_20130312_061709_inLine +BABEL_OP1_201_49775_20130312_061709_outLine +BABEL_OP1_201_57922_20130601_024619_inLine +BABEL_OP1_201_57922_20130601_024619_outLine +BABEL_OP1_201_60508_20130221_023139_inLine +BABEL_OP1_201_60508_20130221_023139_outLine +BABEL_OP1_201_62155_20130507_055437_inLine +BABEL_OP1_201_62155_20130507_055437_outLine +BABEL_OP1_201_63481_20130306_031400_inLine +BABEL_OP1_201_63481_20130306_031400_outLine +BABEL_OP1_201_63511_20130704_101544_inLine +BABEL_OP1_201_63511_20130704_101544_outLine +BABEL_OP1_201_64638_20130228_015923_inLine +BABEL_OP1_201_64638_20130228_015923_outLine +BABEL_OP1_201_75359_20130502_024157_inLine +BABEL_OP1_201_75359_20130502_024157_outLine +BABEL_OP1_201_76773_20130529_015651_inLine +BABEL_OP1_201_76773_20130529_015651_outLine +BABEL_OP1_201_77139_20130221_080959_inLine +BABEL_OP1_201_77139_20130221_080959_outLine +BABEL_OP1_201_77567_20130305_071815_inLine +BABEL_OP1_201_77567_20130305_071815_outLine +BABEL_OP1_201_78630_20130604_103056_inLine +BABEL_OP1_201_78630_20130604_103056_outLine +BABEL_OP1_201_80897_20130602_013830_inLine +BABEL_OP1_201_80897_20130602_013830_outLine +BABEL_OP1_201_81229_20130529_053302_inLine +BABEL_OP1_201_81229_20130529_053302_outLine +BABEL_OP1_201_83366_20130228_065600_inLine +BABEL_OP1_201_83366_20130228_065600_outLine +BABEL_OP1_201_83783_20130524_015629_inLine +BABEL_OP1_201_83783_20130524_015629_outLine +BABEL_OP1_201_86748_20130428_024819_inLine +BABEL_OP1_201_86748_20130428_024819_outLine +BABEL_OP1_201_94141_20130430_122007_inLine +BABEL_OP1_201_94141_20130430_122007_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list b/egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list new file mode 100644 index 00000000000..751c6ca4652 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list @@ -0,0 +1,760 @@ +BABEL_OP1_201_10002_20130212_152853_inLine +BABEL_OP1_201_10002_20130212_152853_outLine +BABEL_OP1_201_10036_20130528_005502_inLine +BABEL_OP1_201_10036_20130528_005502_outLine +BABEL_OP1_201_10482_20130305_105317_inLine +BABEL_OP1_201_10482_20130305_105317_outLine +BABEL_OP1_201_10647_20130428_045536_inLine +BABEL_OP1_201_10647_20130428_045536_outLine +BABEL_OP1_201_10901_20130529_031421_inLine +BABEL_OP1_201_10901_20130529_031421_outLine +BABEL_OP1_201_11096_20130603_043221_inLine +BABEL_OP1_201_11096_20130603_043221_outLine +BABEL_OP1_201_11663_20130601_002903_inLine +BABEL_OP1_201_11663_20130601_002903_outLine +BABEL_OP1_201_11673_20130226_015822_inLine +BABEL_OP1_201_11673_20130226_015822_outLine +BABEL_OP1_201_11797_20130328_033102_inLine +BABEL_OP1_201_11797_20130328_033102_outLine +BABEL_OP1_201_12220_20130528_051622_inLine +BABEL_OP1_201_12220_20130528_051622_outLine +BABEL_OP1_201_12242_20130603_033446_inLine +BABEL_OP1_201_12242_20130603_033446_outLine +BABEL_OP1_201_12606_20130429_120351_inLine +BABEL_OP1_201_12606_20130429_120351_outLine +BABEL_OP1_201_12606_20130429_121040_inLine +BABEL_OP1_201_12606_20130429_121040_outLine +BABEL_OP1_201_12635_20130429_040127_inLine +BABEL_OP1_201_12635_20130429_040127_outLine +BABEL_OP1_201_12767_20130509_005500_inLine +BABEL_OP1_201_12767_20130509_005500_outLine +BABEL_OP1_201_13178_20130301_043649_inLine +BABEL_OP1_201_13178_20130301_043649_outLine +BABEL_OP1_201_13324_20130529_035029_inLine +BABEL_OP1_201_13324_20130529_035029_outLine +BABEL_OP1_201_13483_20130306_062423_inLine +BABEL_OP1_201_13483_20130306_062423_outLine +BABEL_OP1_201_13490_20130508_033252_inLine +BABEL_OP1_201_13490_20130508_033252_outLine +BABEL_OP1_201_13664_20130117_073343_inLine +BABEL_OP1_201_13664_20130117_073343_outLine +BABEL_OP1_201_14179_20130303_111502_inLine +BABEL_OP1_201_14179_20130303_111502_outLine +BABEL_OP1_201_14229_20130528_023254_inLine +BABEL_OP1_201_14229_20130528_023254_outLine +BABEL_OP1_201_14539_20130501_223201_inLine +BABEL_OP1_201_14539_20130501_223201_outLine +BABEL_OP1_201_14560_20130301_065543_inLine +BABEL_OP1_201_14560_20130301_065543_outLine +BABEL_OP1_201_14807_20130522_012156_inLine +BABEL_OP1_201_14807_20130522_012156_outLine +BABEL_OP1_201_14899_20130301_035636_inLine +BABEL_OP1_201_14899_20130301_035636_outLine +BABEL_OP1_201_14972_20130518_025852_inLine +BABEL_OP1_201_14972_20130518_025852_outLine +BABEL_OP1_201_15216_20130503_005405_inLine +BABEL_OP1_201_15216_20130503_005405_outLine +BABEL_OP1_201_15322_20130701_030436_inLine +BABEL_OP1_201_15322_20130701_030436_outLine +BABEL_OP1_201_15382_20130228_050819_inLine +BABEL_OP1_201_15382_20130228_050819_outLine +BABEL_OP1_201_15702_20130301_041117_inLine +BABEL_OP1_201_15702_20130301_041117_outLine +BABEL_OP1_201_15730_20130305_034450_inLine +BABEL_OP1_201_15730_20130305_034450_outLine +BABEL_OP1_201_15848_20130130_070404_inLine +BABEL_OP1_201_15848_20130130_070404_outLine +BABEL_OP1_201_15902_20130323_005824_inLine +BABEL_OP1_201_15902_20130323_005824_outLine +BABEL_OP1_201_16149_20130322_021647_inLine +BABEL_OP1_201_16149_20130322_021647_outLine +BABEL_OP1_201_16467_20130704_025921_inLine +BABEL_OP1_201_16467_20130704_025921_outLine +BABEL_OP1_201_16800_20130702_085158_inLine +BABEL_OP1_201_16800_20130702_085158_outLine +BABEL_OP1_201_16924_20130301_032937_inLine +BABEL_OP1_201_16924_20130301_032937_outLine +BABEL_OP1_201_16938_20130514_072820_inLine +BABEL_OP1_201_16938_20130514_072820_outLine +BABEL_OP1_201_17032_20130306_103506_inLine +BABEL_OP1_201_17032_20130306_103506_outLine +BABEL_OP1_201_17113_20130519_093427_inLine +BABEL_OP1_201_17113_20130519_093427_outLine +BABEL_OP1_201_17472_20130311_075957_inLine +BABEL_OP1_201_17472_20130311_075957_outLine +BABEL_OP1_201_17496_20130301_030157_inLine +BABEL_OP1_201_17496_20130301_030157_outLine +BABEL_OP1_201_17520_20130518_012147_inLine +BABEL_OP1_201_17520_20130518_012147_outLine +BABEL_OP1_201_17567_20130512_065938_inLine +BABEL_OP1_201_17567_20130512_065938_outLine +BABEL_OP1_201_17881_20130429_230318_inLine +BABEL_OP1_201_17881_20130429_230318_outLine +BABEL_OP1_201_17923_20130529_021211_inLine +BABEL_OP1_201_17923_20130529_021211_outLine +BABEL_OP1_201_18118_20130501_084131_inLine +BABEL_OP1_201_18118_20130501_084131_outLine +BABEL_OP1_201_18766_20130502_102418_inLine +BABEL_OP1_201_18766_20130502_102418_outLine +BABEL_OP1_201_19134_20130601_040621_inLine +BABEL_OP1_201_19134_20130601_040621_outLine +BABEL_OP1_201_19589_20130502_093932_inLine +BABEL_OP1_201_19589_20130502_093932_outLine +BABEL_OP1_201_19722_20130425_005348_inLine +BABEL_OP1_201_19722_20130425_005348_outLine +BABEL_OP1_201_19749_20130429_090621_inLine +BABEL_OP1_201_19749_20130429_090621_outLine +BABEL_OP1_201_19767_20130502_130900_inLine +BABEL_OP1_201_19767_20130502_130900_outLine +BABEL_OP1_201_19877_20130502_085421_inLine +BABEL_OP1_201_19877_20130502_085421_outLine +BABEL_OP1_201_20330_20130429_035418_inLine +BABEL_OP1_201_20330_20130429_035418_outLine +BABEL_OP1_201_20437_20130216_094002_inLine +BABEL_OP1_201_20437_20130216_094002_outLine +BABEL_OP1_201_20768_20130701_035344_inLine +BABEL_OP1_201_20768_20130701_035344_outLine +BABEL_OP1_201_20800_20130529_035944_inLine +BABEL_OP1_201_20800_20130529_035944_outLine +BABEL_OP1_201_20972_20130603_035417_inLine +BABEL_OP1_201_20972_20130603_035417_outLine +BABEL_OP1_201_21244_20130602_073304_inLine +BABEL_OP1_201_21244_20130602_073304_outLine +BABEL_OP1_201_21807_20130522_042858_inLine +BABEL_OP1_201_21807_20130522_042858_outLine +BABEL_OP1_201_21892_20130430_033520_inLine +BABEL_OP1_201_21892_20130430_033520_outLine +BABEL_OP1_201_22466_20121206_070403_inLine +BABEL_OP1_201_22466_20121206_070403_outLine +BABEL_OP1_201_22494_20130305_052405_inLine +BABEL_OP1_201_22494_20130305_052405_outLine +BABEL_OP1_201_22624_20130305_121723_inLine +BABEL_OP1_201_22624_20130305_121723_outLine +BABEL_OP1_201_23046_20130527_110737_inLine +BABEL_OP1_201_23046_20130527_110737_outLine +BABEL_OP1_201_23119_20130321_054320_inLine +BABEL_OP1_201_23119_20130321_054320_outLine +BABEL_OP1_201_23190_20130603_224243_inLine +BABEL_OP1_201_23190_20130603_224243_outLine +BABEL_OP1_201_23195_20130227_050013_inLine +BABEL_OP1_201_23195_20130227_050013_outLine +BABEL_OP1_201_23239_20130305_093734_inLine +BABEL_OP1_201_23239_20130305_093734_outLine +BABEL_OP1_201_23893_20130430_080021_inLine +BABEL_OP1_201_23893_20130430_080021_outLine +BABEL_OP1_201_24231_20130502_123747_inLine +BABEL_OP1_201_24231_20130502_123747_outLine +BABEL_OP1_201_24239_20130703_230221_inLine +BABEL_OP1_201_24239_20130703_230221_outLine +BABEL_OP1_201_24270_20130530_020630_inLine +BABEL_OP1_201_24270_20130530_020630_outLine +BABEL_OP1_201_24290_20130703_074550_inLine +BABEL_OP1_201_24290_20130703_074550_outLine +BABEL_OP1_201_24470_20130531_024204_inLine +BABEL_OP1_201_24470_20130531_024204_outLine +BABEL_OP1_201_24501_20130429_102945_inLine +BABEL_OP1_201_24501_20130429_102945_outLine +BABEL_OP1_201_24532_20130307_060030_inLine +BABEL_OP1_201_24532_20130307_060030_outLine +BABEL_OP1_201_24586_20130430_025349_inLine +BABEL_OP1_201_24586_20130430_032300_inLine +BABEL_OP1_201_24586_20130430_032300_outLine +BABEL_OP1_201_24586_20130430_033306_inLine +BABEL_OP1_201_24586_20130430_033306_outLine +BABEL_OP1_201_24589_20130529_111014_inLine +BABEL_OP1_201_24589_20130529_111014_outLine +BABEL_OP1_201_24679_20130222_072407_inLine +BABEL_OP1_201_24679_20130222_072407_outLine +BABEL_OP1_201_24982_20130529_044009_inLine +BABEL_OP1_201_24982_20130529_044009_outLine +BABEL_OP1_201_25015_20130501_223825_inLine +BABEL_OP1_201_25015_20130501_223825_outLine +BABEL_OP1_201_25961_20130223_033405_inLine +BABEL_OP1_201_25961_20130223_033405_outLine +BABEL_OP1_201_26072_20130429_011940_inLine +BABEL_OP1_201_26072_20130429_011940_outLine +BABEL_OP1_201_26388_20130528_030259_inLine +BABEL_OP1_201_26388_20130528_030259_outLine +BABEL_OP1_201_26836_20130528_100100_inLine +BABEL_OP1_201_26836_20130528_100100_outLine +BABEL_OP1_201_26836_20130528_101331_inLine +BABEL_OP1_201_26836_20130528_101331_outLine +BABEL_OP1_201_26999_20130228_090136_inLine +BABEL_OP1_201_26999_20130228_090136_outLine +BABEL_OP1_201_27042_20130701_075011_inLine +BABEL_OP1_201_27042_20130701_075011_outLine +BABEL_OP1_201_27203_20130602_005950_inLine +BABEL_OP1_201_27203_20130602_005950_outLine +BABEL_OP1_201_27590_20130304_072243_inLine +BABEL_OP1_201_27590_20130304_072243_outLine +BABEL_OP1_201_28419_20130528_035005_inLine +BABEL_OP1_201_28419_20130528_035005_outLine +BABEL_OP1_201_28522_20130303_104614_inLine +BABEL_OP1_201_28522_20130303_104614_outLine +BABEL_OP1_201_28600_20130701_051100_inLine +BABEL_OP1_201_28600_20130701_051100_outLine +BABEL_OP1_201_28606_20130305_101646_inLine +BABEL_OP1_201_28606_20130305_101646_outLine +BABEL_OP1_201_28775_20130529_005204_inLine +BABEL_OP1_201_28775_20130529_005204_outLine +BABEL_OP1_201_28814_20130704_000405_inLine +BABEL_OP1_201_28814_20130704_000405_outLine +BABEL_OP1_201_28871_20121207_015933_inLine +BABEL_OP1_201_28871_20121207_015933_outLine +BABEL_OP1_201_28945_20130528_094913_inLine +BABEL_OP1_201_28945_20130528_094913_outLine +BABEL_OP1_201_29023_20130530_024701_inLine +BABEL_OP1_201_29023_20130530_024701_outLine +BABEL_OP1_201_29072_20130304_052508_inLine +BABEL_OP1_201_29072_20130304_052508_outLine +BABEL_OP1_201_29168_20130222_015942_inLine +BABEL_OP1_201_29168_20130222_015942_outLine +BABEL_OP1_201_30180_20130528_033242_inLine +BABEL_OP1_201_30180_20130528_033242_outLine +BABEL_OP1_201_30395_20130529_034626_inLine +BABEL_OP1_201_30395_20130529_034626_outLine +BABEL_OP1_201_30432_20130227_084229_inLine +BABEL_OP1_201_30432_20130227_084229_outLine +BABEL_OP1_201_30576_20130527_002801_inLine +BABEL_OP1_201_30576_20130527_002801_outLine +BABEL_OP1_201_31109_20130510_030741_inLine +BABEL_OP1_201_31109_20130510_030741_outLine +BABEL_OP1_201_31628_20130301_081256_inLine +BABEL_OP1_201_31628_20130301_081256_outLine +BABEL_OP1_201_32097_20130130_021717_inLine +BABEL_OP1_201_32097_20130130_021717_outLine +BABEL_OP1_201_32122_20130529_070011_inLine +BABEL_OP1_201_32122_20130529_070011_outLine +BABEL_OP1_201_32171_20130220_084632_inLine +BABEL_OP1_201_32171_20130220_084632_outLine +BABEL_OP1_201_32708_20130528_093343_inLine +BABEL_OP1_201_32708_20130528_093343_outLine +BABEL_OP1_201_33229_20130429_025144_inLine +BABEL_OP1_201_33229_20130429_025144_outLine +BABEL_OP1_201_33659_20130214_000335_inLine +BABEL_OP1_201_33659_20130214_000335_outLine +BABEL_OP1_201_33806_20130630_224040_inLine +BABEL_OP1_201_33806_20130630_224040_outLine +BABEL_OP1_201_34106_20130305_032650_inLine +BABEL_OP1_201_34106_20130305_032650_outLine +BABEL_OP1_201_34145_20130301_033324_inLine +BABEL_OP1_201_34145_20130301_033324_outLine +BABEL_OP1_201_34197_20130227_065321_inLine +BABEL_OP1_201_34197_20130227_065321_outLine +BABEL_OP1_201_34336_20130527_071806_inLine +BABEL_OP1_201_34336_20130527_071806_outLine +BABEL_OP1_201_34679_20130529_040931_inLine +BABEL_OP1_201_34679_20130529_040931_outLine +BABEL_OP1_201_34826_20130430_025628_inLine +BABEL_OP1_201_34826_20130430_025628_outLine +BABEL_OP1_201_34903_20130302_052444_inLine +BABEL_OP1_201_34903_20130302_052444_outLine +BABEL_OP1_201_35000_20130702_092721_inLine +BABEL_OP1_201_35000_20130702_092721_outLine +BABEL_OP1_201_35008_20130305_114402_inLine +BABEL_OP1_201_35008_20130305_114402_outLine +BABEL_OP1_201_35467_20130321_032230_inLine +BABEL_OP1_201_35467_20130321_032230_outLine +BABEL_OP1_201_36219_20130528_021139_inLine +BABEL_OP1_201_36219_20130528_021139_outLine +BABEL_OP1_201_36341_20130226_074136_inLine +BABEL_OP1_201_36341_20130226_074136_outLine +BABEL_OP1_201_36894_20130221_070614_inLine +BABEL_OP1_201_36894_20130221_070614_outLine +BABEL_OP1_201_37271_20130430_025526_inLine +BABEL_OP1_201_37271_20130430_025526_outLine +BABEL_OP1_201_37598_20130601_032226_inLine +BABEL_OP1_201_37598_20130601_032226_outLine +BABEL_OP1_201_38076_20130302_132339_inLine +BABEL_OP1_201_38076_20130302_132339_outLine +BABEL_OP1_201_38878_20130228_041057_inLine +BABEL_OP1_201_38878_20130228_041057_outLine +BABEL_OP1_201_39426_20130429_085957_inLine +BABEL_OP1_201_39426_20130429_085957_outLine +BABEL_OP1_201_39638_20130126_082343_inLine +BABEL_OP1_201_39638_20130126_082343_outLine +BABEL_OP1_201_40713_20130530_005109_inLine +BABEL_OP1_201_40713_20130530_005109_outLine +BABEL_OP1_201_41097_20130228_063046_inLine +BABEL_OP1_201_41097_20130228_063046_outLine +BABEL_OP1_201_41334_20130630_085009_inLine +BABEL_OP1_201_41334_20130630_085009_outLine +BABEL_OP1_201_41469_20130303_034949_inLine +BABEL_OP1_201_41469_20130303_034949_outLine +BABEL_OP1_201_41542_20130429_084921_inLine +BABEL_OP1_201_41542_20130429_084921_outLine +BABEL_OP1_201_41618_20130518_035113_inLine +BABEL_OP1_201_41618_20130518_035113_outLine +BABEL_OP1_201_41685_20130214_090836_inLine +BABEL_OP1_201_41685_20130214_090836_outLine +BABEL_OP1_201_41720_20130203_053934_inLine +BABEL_OP1_201_41720_20130203_053934_outLine +BABEL_OP1_201_41890_20130430_020800_inLine +BABEL_OP1_201_41890_20130430_020800_outLine +BABEL_OP1_201_42155_20130521_023245_inLine +BABEL_OP1_201_42155_20130521_023245_outLine +BABEL_OP1_201_42243_20130303_022442_inLine +BABEL_OP1_201_42243_20130303_022442_outLine +BABEL_OP1_201_42497_20130529_040557_inLine +BABEL_OP1_201_42497_20130529_040557_outLine +BABEL_OP1_201_42619_20130228_081700_inLine +BABEL_OP1_201_42619_20130228_081700_outLine +BABEL_OP1_201_42771_20130516_235914_inLine +BABEL_OP1_201_42771_20130516_235914_outLine +BABEL_OP1_201_42834_20130227_094847_inLine +BABEL_OP1_201_42834_20130227_094847_outLine +BABEL_OP1_201_42991_20130301_104105_inLine +BABEL_OP1_201_42991_20130301_104105_outLine +BABEL_OP1_201_43286_20130301_085932_inLine +BABEL_OP1_201_43286_20130301_085932_outLine +BABEL_OP1_201_43323_20130211_115349_inLine +BABEL_OP1_201_43323_20130211_120743_inLine +BABEL_OP1_201_43323_20130211_120743_outLine +BABEL_OP1_201_43588_20130430_054932_inLine +BABEL_OP1_201_43588_20130430_054932_outLine +BABEL_OP1_201_43646_20130130_080323_inLine +BABEL_OP1_201_43646_20130130_080323_outLine +BABEL_OP1_201_43784_20130529_104333_inLine +BABEL_OP1_201_43784_20130529_104333_outLine +BABEL_OP1_201_43794_20130603_014105_inLine +BABEL_OP1_201_43794_20130603_014105_outLine +BABEL_OP1_201_44477_20130302_072308_inLine +BABEL_OP1_201_44477_20130302_072308_outLine +BABEL_OP1_201_44477_20130302_073645_inLine +BABEL_OP1_201_44477_20130302_073645_outLine +BABEL_OP1_201_44478_20130502_075027_inLine +BABEL_OP1_201_44478_20130502_075027_outLine +BABEL_OP1_201_44709_20130303_114051_inLine +BABEL_OP1_201_44709_20130303_114051_outLine +BABEL_OP1_201_45559_20130503_033307_inLine +BABEL_OP1_201_45559_20130503_033307_outLine +BABEL_OP1_201_46066_20130429_123746_inLine +BABEL_OP1_201_46066_20130429_123746_outLine +BABEL_OP1_201_46169_20130702_011629_inLine +BABEL_OP1_201_46169_20130702_011629_outLine +BABEL_OP1_201_46310_20130328_024919_inLine +BABEL_OP1_201_46310_20130328_024919_outLine +BABEL_OP1_201_46550_20130528_065103_inLine +BABEL_OP1_201_46550_20130528_065103_outLine +BABEL_OP1_201_46558_20130220_030534_inLine +BABEL_OP1_201_46558_20130220_030534_outLine +BABEL_OP1_201_46589_20130302_082301_inLine +BABEL_OP1_201_46589_20130302_082301_outLine +BABEL_OP1_201_46625_20130308_141424_inLine +BABEL_OP1_201_46625_20130308_141424_outLine +BABEL_OP1_201_46681_20130530_033328_inLine +BABEL_OP1_201_46681_20130530_033328_outLine +BABEL_OP1_201_46770_20130429_011947_inLine +BABEL_OP1_201_46770_20130429_011947_outLine +BABEL_OP1_201_46976_20130517_023139_inLine +BABEL_OP1_201_46976_20130517_023139_outLine +BABEL_OP1_201_47270_20130427_010445_inLine +BABEL_OP1_201_47270_20130427_010445_outLine +BABEL_OP1_201_47270_20130427_011917_inLine +BABEL_OP1_201_47270_20130427_011917_outLine +BABEL_OP1_201_47270_20130427_013155_inLine +BABEL_OP1_201_47270_20130427_013155_outLine +BABEL_OP1_201_47802_20130524_044824_inLine +BABEL_OP1_201_47802_20130524_044824_outLine +BABEL_OP1_201_47878_20130522_021958_inLine +BABEL_OP1_201_47878_20130522_021958_outLine +BABEL_OP1_201_48243_20130602_122113_inLine +BABEL_OP1_201_48243_20130602_122113_outLine +BABEL_OP1_201_48299_20130226_120812_inLine +BABEL_OP1_201_48299_20130226_120812_outLine +BABEL_OP1_201_48299_20130226_122743_inLine +BABEL_OP1_201_48299_20130226_122743_outLine +BABEL_OP1_201_48907_20130429_093546_inLine +BABEL_OP1_201_48907_20130429_093546_outLine +BABEL_OP1_201_49027_20130529_101617_inLine +BABEL_OP1_201_49027_20130529_101617_outLine +BABEL_OP1_201_49118_20130429_023211_inLine +BABEL_OP1_201_49118_20130429_023211_outLine +BABEL_OP1_201_49216_20130314_070036_inLine +BABEL_OP1_201_49216_20130314_070036_outLine +BABEL_OP1_201_49502_20130302_064002_inLine +BABEL_OP1_201_49502_20130302_064002_outLine +BABEL_OP1_201_49630_20130306_105833_inLine +BABEL_OP1_201_49630_20130306_105833_outLine +BABEL_OP1_201_49637_20130426_020402_inLine +BABEL_OP1_201_49637_20130426_020402_outLine +BABEL_OP1_201_49768_20130529_082143_inLine +BABEL_OP1_201_49768_20130529_082143_outLine +BABEL_OP1_201_49902_20130527_063448_inLine +BABEL_OP1_201_49902_20130527_063448_outLine +BABEL_OP1_201_49907_20130529_101707_inLine +BABEL_OP1_201_49907_20130529_101707_outLine +BABEL_OP1_201_49945_20130501_080703_inLine +BABEL_OP1_201_49945_20130501_080703_outLine +BABEL_OP1_201_50549_20130428_053142_inLine +BABEL_OP1_201_50549_20130428_053142_outLine +BABEL_OP1_201_50549_20130428_055313_inLine +BABEL_OP1_201_50549_20130428_055313_outLine +BABEL_OP1_201_50601_20130521_045944_inLine +BABEL_OP1_201_50601_20130521_045944_outLine +BABEL_OP1_201_50681_20130228_015155_inLine +BABEL_OP1_201_50681_20130228_015155_outLine +BABEL_OP1_201_50681_20130228_020643_inLine +BABEL_OP1_201_50681_20130228_020643_outLine +BABEL_OP1_201_50726_20130228_033852_inLine +BABEL_OP1_201_50726_20130228_033852_outLine +BABEL_OP1_201_50779_20130522_051719_inLine +BABEL_OP1_201_50779_20130522_051719_outLine +BABEL_OP1_201_50810_20130312_055632_inLine +BABEL_OP1_201_50810_20130312_055632_outLine +BABEL_OP1_201_50940_20130309_041526_inLine +BABEL_OP1_201_50940_20130309_041526_outLine +BABEL_OP1_201_51611_20130530_094039_inLine +BABEL_OP1_201_51611_20130530_094039_outLine +BABEL_OP1_201_52301_20130223_024524_inLine +BABEL_OP1_201_52301_20130223_024524_outLine +BABEL_OP1_201_52404_20130301_233232_inLine +BABEL_OP1_201_52404_20130301_233232_outLine +BABEL_OP1_201_52422_20130428_023051_inLine +BABEL_OP1_201_52422_20130428_023051_outLine +BABEL_OP1_201_52490_20130220_051000_inLine +BABEL_OP1_201_52490_20130220_051000_outLine +BABEL_OP1_201_52804_20130529_032046_inLine +BABEL_OP1_201_52804_20130529_032046_outLine +BABEL_OP1_201_52818_20130301_121852_inLine +BABEL_OP1_201_52818_20130301_121852_outLine +BABEL_OP1_201_53917_20130429_091547_inLine +BABEL_OP1_201_53917_20130429_091547_outLine +BABEL_OP1_201_55259_20130526_073400_inLine +BABEL_OP1_201_55259_20130526_073400_outLine +BABEL_OP1_201_55267_20130228_064943_inLine +BABEL_OP1_201_55267_20130228_064943_outLine +BABEL_OP1_201_55968_20130314_043319_inLine +BABEL_OP1_201_55968_20130314_043319_outLine +BABEL_OP1_201_55968_20130314_044612_inLine +BABEL_OP1_201_55968_20130314_044612_outLine +BABEL_OP1_201_56023_20130501_081011_inLine +BABEL_OP1_201_56023_20130501_081011_outLine +BABEL_OP1_201_56307_20130301_024958_inLine +BABEL_OP1_201_56307_20130301_024958_outLine +BABEL_OP1_201_57065_20130302_033227_inLine +BABEL_OP1_201_57065_20130302_033227_outLine +BABEL_OP1_201_57093_20130510_071214_inLine +BABEL_OP1_201_57093_20130510_071214_outLine +BABEL_OP1_201_57233_20130206_090034_inLine +BABEL_OP1_201_57233_20130206_090034_outLine +BABEL_OP1_201_57464_20130428_051858_inLine +BABEL_OP1_201_57464_20130428_051858_outLine +BABEL_OP1_201_57548_20130518_042831_inLine +BABEL_OP1_201_57548_20130518_042831_outLine +BABEL_OP1_201_57678_20130528_022013_inLine +BABEL_OP1_201_57678_20130528_022013_outLine +BABEL_OP1_201_58107_20130518_004334_inLine +BABEL_OP1_201_58107_20130518_004334_outLine +BABEL_OP1_201_58145_20130602_044301_inLine +BABEL_OP1_201_58145_20130602_044301_outLine +BABEL_OP1_201_58313_20130522_055528_inLine +BABEL_OP1_201_58313_20130522_055528_outLine +BABEL_OP1_201_58585_20130429_003422_inLine +BABEL_OP1_201_58585_20130429_003422_outLine +BABEL_OP1_201_58821_20130306_091219_inLine +BABEL_OP1_201_58821_20130306_091219_outLine +BABEL_OP1_201_59039_20130220_090641_inLine +BABEL_OP1_201_59039_20130220_090641_outLine +BABEL_OP1_201_59509_20130227_090836_inLine +BABEL_OP1_201_59509_20130227_090836_outLine +BABEL_OP1_201_59509_20130227_092230_inLine +BABEL_OP1_201_59509_20130227_092230_outLine +BABEL_OP1_201_60115_20130301_114138_inLine +BABEL_OP1_201_60115_20130301_114138_outLine +BABEL_OP1_201_60418_20130301_073212_inLine +BABEL_OP1_201_60418_20130301_073212_outLine +BABEL_OP1_201_60436_20130503_044737_inLine +BABEL_OP1_201_60436_20130503_044737_outLine +BABEL_OP1_201_60474_20130527_081400_inLine +BABEL_OP1_201_60474_20130527_081400_outLine +BABEL_OP1_201_60661_20130529_023958_inLine +BABEL_OP1_201_60661_20130529_023958_outLine +BABEL_OP1_201_61435_20130430_031742_inLine +BABEL_OP1_201_61435_20130430_031742_outLine +BABEL_OP1_201_61873_20130519_030703_inLine +BABEL_OP1_201_61873_20130519_030703_outLine +BABEL_OP1_201_62014_20130228_083820_inLine +BABEL_OP1_201_62014_20130228_083820_outLine +BABEL_OP1_201_63081_20130226_035431_inLine +BABEL_OP1_201_63081_20130226_035431_outLine +BABEL_OP1_201_63084_20130301_114742_inLine +BABEL_OP1_201_63084_20130301_114742_outLine +BABEL_OP1_201_63307_20130521_235343_inLine +BABEL_OP1_201_63307_20130521_235343_outLine +BABEL_OP1_201_63425_20130301_080734_inLine +BABEL_OP1_201_63425_20130301_080734_outLine +BABEL_OP1_201_63604_20130412_021112_inLine +BABEL_OP1_201_63604_20130412_021112_outLine +BABEL_OP1_201_64259_20130202_090605_inLine +BABEL_OP1_201_64259_20130202_090605_outLine +BABEL_OP1_201_64398_20130301_084125_inLine +BABEL_OP1_201_64398_20130301_084125_outLine +BABEL_OP1_201_65064_20130521_061233_inLine +BABEL_OP1_201_65064_20130521_061233_outLine +BABEL_OP1_201_65561_20130305_120931_inLine +BABEL_OP1_201_65561_20130305_120931_outLine +BABEL_OP1_201_65723_20130529_004610_inLine +BABEL_OP1_201_65723_20130529_004610_outLine +BABEL_OP1_201_66045_20130509_044408_inLine +BABEL_OP1_201_66045_20130509_044408_outLine +BABEL_OP1_201_66472_20130517_041032_inLine +BABEL_OP1_201_66472_20130517_041032_outLine +BABEL_OP1_201_67213_20130224_044805_inLine +BABEL_OP1_201_67213_20130224_044805_outLine +BABEL_OP1_201_67283_20130223_012433_inLine +BABEL_OP1_201_67283_20130223_012433_outLine +BABEL_OP1_201_67401_20130522_063044_inLine +BABEL_OP1_201_67401_20130522_063044_outLine +BABEL_OP1_201_67622_20130306_012440_inLine +BABEL_OP1_201_67622_20130306_012440_outLine +BABEL_OP1_201_68040_20130517_004413_inLine +BABEL_OP1_201_68040_20130517_004413_outLine +BABEL_OP1_201_68068_20130302_042557_inLine +BABEL_OP1_201_68068_20130302_042557_outLine +BABEL_OP1_201_68244_20130228_052832_inLine +BABEL_OP1_201_68244_20130228_052832_outLine +BABEL_OP1_201_68306_20130301_132523_inLine +BABEL_OP1_201_68306_20130301_132523_outLine +BABEL_OP1_201_68748_20130301_051957_inLine +BABEL_OP1_201_68748_20130301_051957_outLine +BABEL_OP1_201_68924_20130228_031746_inLine +BABEL_OP1_201_68924_20130228_031746_outLine +BABEL_OP1_201_69107_20130518_053632_inLine +BABEL_OP1_201_69107_20130518_053632_outLine +BABEL_OP1_201_69574_20130313_015419_inLine +BABEL_OP1_201_69574_20130313_015419_outLine +BABEL_OP1_201_69578_20130509_033949_inLine +BABEL_OP1_201_69578_20130509_033949_outLine +BABEL_OP1_201_69636_20130302_024254_inLine +BABEL_OP1_201_69636_20130302_024254_outLine +BABEL_OP1_201_70343_20130302_035639_inLine +BABEL_OP1_201_70343_20130302_035639_outLine +BABEL_OP1_201_70343_20130302_040518_inLine +BABEL_OP1_201_70343_20130302_040518_outLine +BABEL_OP1_201_70386_20130528_033752_inLine +BABEL_OP1_201_70386_20130528_033752_outLine +BABEL_OP1_201_70601_20130528_025629_inLine +BABEL_OP1_201_70601_20130528_025629_outLine +BABEL_OP1_201_70794_20130314_065330_inLine +BABEL_OP1_201_70794_20130314_065330_outLine +BABEL_OP1_201_71121_20130215_075206_inLine +BABEL_OP1_201_71121_20130215_075206_outLine +BABEL_OP1_201_72324_20130227_080108_inLine +BABEL_OP1_201_72324_20130227_080108_outLine +BABEL_OP1_201_72349_20130527_005409_inLine +BABEL_OP1_201_72349_20130527_005409_outLine +BABEL_OP1_201_72587_20130227_092146_inLine +BABEL_OP1_201_72587_20130227_092146_outLine +BABEL_OP1_201_72844_20130320_030750_inLine +BABEL_OP1_201_72844_20130320_030750_outLine +BABEL_OP1_201_73430_20130306_070252_inLine +BABEL_OP1_201_73430_20130306_070252_outLine +BABEL_OP1_201_73485_20130704_012751_inLine +BABEL_OP1_201_73485_20130704_012751_outLine +BABEL_OP1_201_73511_20130305_064018_inLine +BABEL_OP1_201_73511_20130305_064018_outLine +BABEL_OP1_201_73518_20130427_020953_inLine +BABEL_OP1_201_73518_20130427_020953_outLine +BABEL_OP1_201_73591_20121205_085430_inLine +BABEL_OP1_201_73591_20121205_085430_outLine +BABEL_OP1_201_73591_20121205_091943_inLine +BABEL_OP1_201_73591_20121205_091943_outLine +BABEL_OP1_201_73964_20130502_060046_inLine +BABEL_OP1_201_73964_20130502_060046_outLine +BABEL_OP1_201_74280_20130307_060529_inLine +BABEL_OP1_201_74280_20130307_060529_outLine +BABEL_OP1_201_74728_20130502_015015_inLine +BABEL_OP1_201_74728_20130502_015015_outLine +BABEL_OP1_201_74799_20130530_004139_inLine +BABEL_OP1_201_74799_20130530_004139_outLine +BABEL_OP1_201_74921_20130302_015536_inLine +BABEL_OP1_201_74921_20130302_015536_outLine +BABEL_OP1_201_74921_20130302_020351_inLine +BABEL_OP1_201_74921_20130302_020351_outLine +BABEL_OP1_201_75064_20130528_032631_inLine +BABEL_OP1_201_75064_20130528_032631_outLine +BABEL_OP1_201_75342_20130305_071206_inLine +BABEL_OP1_201_75342_20130305_071206_outLine +BABEL_OP1_201_75764_20130428_041456_inLine +BABEL_OP1_201_75764_20130428_041456_outLine +BABEL_OP1_201_75993_20130529_053731_inLine +BABEL_OP1_201_75993_20130529_053731_outLine +BABEL_OP1_201_76683_20130524_053916_inLine +BABEL_OP1_201_76683_20130524_053916_outLine +BABEL_OP1_201_77126_20121205_072118_inLine +BABEL_OP1_201_77126_20121205_072118_outLine +BABEL_OP1_201_77427_20130528_003638_inLine +BABEL_OP1_201_77427_20130528_003638_outLine +BABEL_OP1_201_78116_20130304_074916_inLine +BABEL_OP1_201_78116_20130304_074916_outLine +BABEL_OP1_201_78398_20130529_023517_inLine +BABEL_OP1_201_78398_20130529_023517_outLine +BABEL_OP1_201_78943_20130528_034620_inLine +BABEL_OP1_201_78943_20130528_034620_outLine +BABEL_OP1_201_79129_20130524_031851_inLine +BABEL_OP1_201_79129_20130524_031851_outLine +BABEL_OP1_201_79167_20130303_071948_inLine +BABEL_OP1_201_79167_20130303_071948_outLine +BABEL_OP1_201_79167_20130303_093604_inLine +BABEL_OP1_201_79167_20130303_093604_outLine +BABEL_OP1_201_79429_20130216_152022_inLine +BABEL_OP1_201_79429_20130216_152022_outLine +BABEL_OP1_201_80306_20130509_071053_inLine +BABEL_OP1_201_80306_20130509_071053_outLine +BABEL_OP1_201_81287_20130305_141750_inLine +BABEL_OP1_201_81287_20130305_141750_outLine +BABEL_OP1_201_81392_20130304_082518_inLine +BABEL_OP1_201_81392_20130304_082518_outLine +BABEL_OP1_201_81424_20130304_080620_inLine +BABEL_OP1_201_81424_20130304_080620_outLine +BABEL_OP1_201_81433_20130514_063900_inLine +BABEL_OP1_201_81433_20130514_063900_outLine +BABEL_OP1_201_81674_20130224_134642_inLine +BABEL_OP1_201_81674_20130224_134642_outLine +BABEL_OP1_201_81810_20130302_043825_inLine +BABEL_OP1_201_81810_20130302_043825_outLine +BABEL_OP1_201_81971_20130227_030618_inLine +BABEL_OP1_201_81971_20130227_030618_outLine +BABEL_OP1_201_82123_20130505_053636_inLine +BABEL_OP1_201_82123_20130505_053636_outLine +BABEL_OP1_201_82138_20130509_063904_inLine +BABEL_OP1_201_82138_20130509_063904_outLine +BABEL_OP1_201_82140_20130510_013208_inLine +BABEL_OP1_201_82140_20130510_013208_outLine +BABEL_OP1_201_82637_20130227_044340_inLine +BABEL_OP1_201_82637_20130227_044340_outLine +BABEL_OP1_201_82904_20130427_005507_inLine +BABEL_OP1_201_82904_20130427_005507_outLine +BABEL_OP1_201_82979_20130529_063602_inLine +BABEL_OP1_201_82979_20130529_063602_outLine +BABEL_OP1_201_83238_20130514_054056_inLine +BABEL_OP1_201_83238_20130514_054056_outLine +BABEL_OP1_201_83430_20130210_094011_inLine +BABEL_OP1_201_83430_20130210_094011_outLine +BABEL_OP1_201_83455_20130511_053045_inLine +BABEL_OP1_201_83455_20130511_053045_outLine +BABEL_OP1_201_83625_20130128_091225_inLine +BABEL_OP1_201_83625_20130128_091225_outLine +BABEL_OP1_201_83651_20130604_075201_inLine +BABEL_OP1_201_83651_20130604_075201_outLine +BABEL_OP1_201_83929_20121205_055436_inLine +BABEL_OP1_201_83929_20121205_055436_outLine +BABEL_OP1_201_83929_20121206_061559_inLine +BABEL_OP1_201_83929_20121206_061559_outLine +BABEL_OP1_201_83935_20130305_104443_inLine +BABEL_OP1_201_83935_20130305_104443_outLine +BABEL_OP1_201_84547_20130227_041326_inLine +BABEL_OP1_201_84547_20130227_041326_outLine +BABEL_OP1_201_84715_20130429_094324_inLine +BABEL_OP1_201_84715_20130429_094324_outLine +BABEL_OP1_201_84936_20130301_073352_inLine +BABEL_OP1_201_84936_20130301_073352_outLine +BABEL_OP1_201_85010_20130206_122216_inLine +BABEL_OP1_201_85010_20130206_122216_outLine +BABEL_OP1_201_85047_20130510_055057_inLine +BABEL_OP1_201_85047_20130510_055057_outLine +BABEL_OP1_201_85048_20130522_072215_inLine +BABEL_OP1_201_85048_20130522_072215_outLine +BABEL_OP1_201_85647_20130511_015627_inLine +BABEL_OP1_201_85647_20130511_015627_outLine +BABEL_OP1_201_86191_20130528_045113_inLine +BABEL_OP1_201_86191_20130528_045113_outLine +BABEL_OP1_201_86191_20130528_051540_inLine +BABEL_OP1_201_86191_20130528_051540_outLine +BABEL_OP1_201_86433_20130303_035210_inLine +BABEL_OP1_201_86433_20130303_035210_outLine +BABEL_OP1_201_86467_20130221_031701_inLine +BABEL_OP1_201_86467_20130221_031701_outLine +BABEL_OP1_201_86557_20130306_054158_inLine +BABEL_OP1_201_86557_20130306_054158_outLine +BABEL_OP1_201_86635_20130227_080743_inLine +BABEL_OP1_201_86635_20130227_080743_outLine +BABEL_OP1_201_86676_20130302_034945_inLine +BABEL_OP1_201_86676_20130302_034945_outLine +BABEL_OP1_201_86888_20130301_011747_inLine +BABEL_OP1_201_86888_20130301_011747_outLine +BABEL_OP1_201_87074_20130529_072238_inLine +BABEL_OP1_201_87074_20130529_072238_outLine +BABEL_OP1_201_87179_20130414_223248_inLine +BABEL_OP1_201_87179_20130414_223248_outLine +BABEL_OP1_201_87298_20130530_035908_inLine +BABEL_OP1_201_87298_20130530_035908_outLine +BABEL_OP1_201_87313_20130228_054816_inLine +BABEL_OP1_201_87313_20130228_054816_outLine +BABEL_OP1_201_87545_20130501_052733_inLine +BABEL_OP1_201_87545_20130501_052733_outLine +BABEL_OP1_201_87731_20130216_084329_inLine +BABEL_OP1_201_87731_20130216_084329_outLine +BABEL_OP1_201_87796_20130531_043218_inLine +BABEL_OP1_201_87796_20130531_043218_outLine +BABEL_OP1_201_88445_20130228_100123_inLine +BABEL_OP1_201_88445_20130228_100123_outLine +BABEL_OP1_201_88661_20130305_103247_inLine +BABEL_OP1_201_88661_20130305_103247_outLine +BABEL_OP1_201_89059_20130429_001658_inLine +BABEL_OP1_201_89059_20130429_001658_outLine +BABEL_OP1_201_89877_20130602_052802_inLine +BABEL_OP1_201_89877_20130602_052802_outLine +BABEL_OP1_201_90347_20130601_020619_inLine +BABEL_OP1_201_90347_20130601_020619_outLine +BABEL_OP1_201_90777_20130530_043440_inLine +BABEL_OP1_201_90777_20130530_043440_outLine +BABEL_OP1_201_91125_20130301_044113_inLine +BABEL_OP1_201_91125_20130301_044113_outLine +BABEL_OP1_201_91336_20130511_010308_inLine +BABEL_OP1_201_91336_20130511_010308_outLine +BABEL_OP1_201_91891_20130306_084037_inLine +BABEL_OP1_201_91891_20130306_084037_outLine +BABEL_OP1_201_91944_20130529_030733_inLine +BABEL_OP1_201_91944_20130529_030733_outLine +BABEL_OP1_201_91977_20130228_225341_inLine +BABEL_OP1_201_91977_20130228_225341_outLine +BABEL_OP1_201_92509_20130222_064302_inLine +BABEL_OP1_201_92509_20130222_064302_outLine +BABEL_OP1_201_92557_20130428_115801_inLine +BABEL_OP1_201_92557_20130428_115801_outLine +BABEL_OP1_201_92740_20130301_044629_inLine +BABEL_OP1_201_92740_20130301_044629_outLine +BABEL_OP1_201_92792_20130630_124723_inLine +BABEL_OP1_201_92792_20130630_124723_outLine +BABEL_OP1_201_92942_20130601_011759_inLine +BABEL_OP1_201_92942_20130601_011759_outLine +BABEL_OP1_201_93222_20130127_012443_inLine +BABEL_OP1_201_93222_20130127_012443_outLine +BABEL_OP1_201_93224_20130227_095611_inLine +BABEL_OP1_201_93224_20130227_095611_outLine +BABEL_OP1_201_93604_20130502_071337_inLine +BABEL_OP1_201_93604_20130502_071337_outLine +BABEL_OP1_201_93964_20130511_000644_inLine +BABEL_OP1_201_93964_20130511_000644_outLine +BABEL_OP1_201_94025_20130303_091916_inLine +BABEL_OP1_201_94025_20130303_091916_outLine +BABEL_OP1_201_94316_20130503_072805_inLine +BABEL_OP1_201_94316_20130503_072805_outLine +BABEL_OP1_201_94449_20130704_033336_inLine +BABEL_OP1_201_94449_20130704_033336_outLine +BABEL_OP1_201_94487_20130502_053741_inLine +BABEL_OP1_201_94487_20130502_053741_outLine +BABEL_OP1_201_94666_20130512_052019_inLine +BABEL_OP1_201_94666_20130512_052019_outLine +BABEL_OP1_201_94869_20130313_052715_inLine +BABEL_OP1_201_94869_20130313_052715_outLine +BABEL_OP1_201_94923_20130531_054229_inLine +BABEL_OP1_201_94923_20130531_054229_outLine +BABEL_OP1_201_95446_20130430_051750_inLine +BABEL_OP1_201_95446_20130430_051750_outLine +BABEL_OP1_201_96059_20130430_034442_inLine +BABEL_OP1_201_96059_20130430_034442_outLine +BABEL_OP1_201_96376_20130704_011157_inLine +BABEL_OP1_201_96376_20130704_011157_outLine +BABEL_OP1_201_96820_20130514_032741_inLine +BABEL_OP1_201_96820_20130514_032741_outLine +BABEL_OP1_201_97363_20130528_063449_inLine +BABEL_OP1_201_97363_20130528_063449_outLine +BABEL_OP1_201_97557_20130228_004756_inLine +BABEL_OP1_201_97557_20130228_004756_outLine +BABEL_OP1_201_99202_20130521_003552_inLine +BABEL_OP1_201_99202_20130521_003552_outLine +BABEL_OP1_201_99955_20130429_001807_inLine +BABEL_OP1_201_99955_20130429_001807_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list new file mode 100644 index 00000000000..c6271d71566 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list @@ -0,0 +1,126 @@ +BABEL_OP1_201_13178_20130301_043649_inLine +BABEL_OP1_201_13178_20130301_043649_outLine +BABEL_OP1_201_14229_20130528_023254_inLine +BABEL_OP1_201_14229_20130528_023254_outLine +BABEL_OP1_201_15216_20130503_005405_inLine +BABEL_OP1_201_15216_20130503_005405_outLine +BABEL_OP1_201_15848_20130130_070404_inLine +BABEL_OP1_201_15848_20130130_070404_outLine +BABEL_OP1_201_16938_20130514_072820_inLine +BABEL_OP1_201_16938_20130514_072820_outLine +BABEL_OP1_201_17881_20130429_230318_inLine +BABEL_OP1_201_17881_20130429_230318_outLine +BABEL_OP1_201_17923_20130529_021211_inLine +BABEL_OP1_201_17923_20130529_021211_outLine +BABEL_OP1_201_18118_20130501_084131_inLine +BABEL_OP1_201_18118_20130501_084131_outLine +BABEL_OP1_201_19722_20130425_005348_inLine +BABEL_OP1_201_19722_20130425_005348_outLine +BABEL_OP1_201_19749_20130429_090621_inLine +BABEL_OP1_201_19749_20130429_090621_outLine +BABEL_OP1_201_20768_20130701_035344_inLine +BABEL_OP1_201_20768_20130701_035344_outLine +BABEL_OP1_201_20800_20130529_035944_inLine +BABEL_OP1_201_20800_20130529_035944_outLine +BABEL_OP1_201_21244_20130602_073304_inLine +BABEL_OP1_201_21244_20130602_073304_outLine +BABEL_OP1_201_24290_20130703_074550_inLine +BABEL_OP1_201_24290_20130703_074550_outLine +BABEL_OP1_201_24589_20130529_111014_inLine +BABEL_OP1_201_24589_20130529_111014_outLine +BABEL_OP1_201_26072_20130429_011940_inLine +BABEL_OP1_201_26072_20130429_011940_outLine +BABEL_OP1_201_28606_20130305_101646_inLine +BABEL_OP1_201_28606_20130305_101646_outLine +BABEL_OP1_201_28871_20121207_015933_inLine +BABEL_OP1_201_28871_20121207_015933_outLine +BABEL_OP1_201_31109_20130510_030741_inLine +BABEL_OP1_201_31109_20130510_030741_outLine +BABEL_OP1_201_36219_20130528_021139_inLine +BABEL_OP1_201_36219_20130528_021139_outLine +BABEL_OP1_201_36341_20130226_074136_inLine +BABEL_OP1_201_36341_20130226_074136_outLine +BABEL_OP1_201_37271_20130430_025526_inLine +BABEL_OP1_201_37271_20130430_025526_outLine +BABEL_OP1_201_40713_20130530_005109_inLine +BABEL_OP1_201_40713_20130530_005109_outLine +BABEL_OP1_201_41097_20130228_063046_inLine +BABEL_OP1_201_41097_20130228_063046_outLine +BABEL_OP1_201_41618_20130518_035113_inLine +BABEL_OP1_201_41618_20130518_035113_outLine +BABEL_OP1_201_42243_20130303_022442_inLine +BABEL_OP1_201_42243_20130303_022442_outLine +BABEL_OP1_201_42619_20130228_081700_inLine +BABEL_OP1_201_42619_20130228_081700_outLine +BABEL_OP1_201_43646_20130130_080323_inLine +BABEL_OP1_201_43646_20130130_080323_outLine +BABEL_OP1_201_45559_20130503_033307_inLine +BABEL_OP1_201_45559_20130503_033307_outLine +BABEL_OP1_201_46625_20130308_141424_inLine +BABEL_OP1_201_46625_20130308_141424_outLine +BABEL_OP1_201_47270_20130427_010445_inLine +BABEL_OP1_201_47270_20130427_010445_outLine +BABEL_OP1_201_47270_20130427_011917_inLine +BABEL_OP1_201_47270_20130427_011917_outLine +BABEL_OP1_201_47270_20130427_013155_inLine +BABEL_OP1_201_47270_20130427_013155_outLine +BABEL_OP1_201_48907_20130429_093546_inLine +BABEL_OP1_201_48907_20130429_093546_outLine +BABEL_OP1_201_49118_20130429_023211_inLine +BABEL_OP1_201_49118_20130429_023211_outLine +BABEL_OP1_201_49502_20130302_064002_inLine +BABEL_OP1_201_49502_20130302_064002_outLine +BABEL_OP1_201_49902_20130527_063448_inLine +BABEL_OP1_201_49902_20130527_063448_outLine +BABEL_OP1_201_50601_20130521_045944_inLine +BABEL_OP1_201_50601_20130521_045944_outLine +BABEL_OP1_201_50681_20130228_015155_inLine +BABEL_OP1_201_50681_20130228_015155_outLine +BABEL_OP1_201_50681_20130228_020643_inLine +BABEL_OP1_201_50681_20130228_020643_outLine +BABEL_OP1_201_50726_20130228_033852_inLine +BABEL_OP1_201_50726_20130228_033852_outLine +BABEL_OP1_201_52804_20130529_032046_inLine +BABEL_OP1_201_52804_20130529_032046_outLine +BABEL_OP1_201_53917_20130429_091547_inLine +BABEL_OP1_201_53917_20130429_091547_outLine +BABEL_OP1_201_57093_20130510_071214_inLine +BABEL_OP1_201_57093_20130510_071214_outLine +BABEL_OP1_201_60418_20130301_073212_inLine +BABEL_OP1_201_60418_20130301_073212_outLine +BABEL_OP1_201_63425_20130301_080734_inLine +BABEL_OP1_201_63425_20130301_080734_outLine +BABEL_OP1_201_65723_20130529_004610_inLine +BABEL_OP1_201_65723_20130529_004610_outLine +BABEL_OP1_201_68040_20130517_004413_inLine +BABEL_OP1_201_68040_20130517_004413_outLine +BABEL_OP1_201_70601_20130528_025629_inLine +BABEL_OP1_201_70601_20130528_025629_outLine +BABEL_OP1_201_71121_20130215_075206_inLine +BABEL_OP1_201_71121_20130215_075206_outLine +BABEL_OP1_201_72349_20130527_005409_inLine +BABEL_OP1_201_72349_20130527_005409_outLine +BABEL_OP1_201_74799_20130530_004139_inLine +BABEL_OP1_201_74799_20130530_004139_outLine +BABEL_OP1_201_77126_20121205_072118_inLine +BABEL_OP1_201_77126_20121205_072118_outLine +BABEL_OP1_201_81674_20130224_134642_inLine +BABEL_OP1_201_81674_20130224_134642_outLine +BABEL_OP1_201_83935_20130305_104443_inLine +BABEL_OP1_201_83935_20130305_104443_outLine +BABEL_OP1_201_85048_20130522_072215_inLine +BABEL_OP1_201_85048_20130522_072215_outLine +BABEL_OP1_201_87545_20130501_052733_inLine +BABEL_OP1_201_87545_20130501_052733_outLine +BABEL_OP1_201_91336_20130511_010308_inLine +BABEL_OP1_201_91336_20130511_010308_outLine +BABEL_OP1_201_92792_20130630_124723_inLine +BABEL_OP1_201_92792_20130630_124723_outLine +BABEL_OP1_201_92942_20130601_011759_inLine +BABEL_OP1_201_92942_20130601_011759_outLine +BABEL_OP1_201_93224_20130227_095611_inLine +BABEL_OP1_201_93224_20130227_095611_outLine +BABEL_OP1_201_94666_20130512_052019_inLine +BABEL_OP1_201_94666_20130512_052019_outLine +BABEL_OP1_201_94923_20130531_054229_inLine +BABEL_OP1_201_94923_20130531_054229_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..701e74d974b --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list @@ -0,0 +1,634 @@ +BABEL_OP1_201_10002_20130212_152853_inLine +BABEL_OP1_201_10002_20130212_152853_outLine +BABEL_OP1_201_10036_20130528_005502_inLine +BABEL_OP1_201_10036_20130528_005502_outLine +BABEL_OP1_201_10482_20130305_105317_inLine +BABEL_OP1_201_10482_20130305_105317_outLine +BABEL_OP1_201_10647_20130428_045536_inLine +BABEL_OP1_201_10647_20130428_045536_outLine +BABEL_OP1_201_10901_20130529_031421_inLine +BABEL_OP1_201_10901_20130529_031421_outLine +BABEL_OP1_201_11096_20130603_043221_inLine +BABEL_OP1_201_11096_20130603_043221_outLine +BABEL_OP1_201_11663_20130601_002903_inLine +BABEL_OP1_201_11663_20130601_002903_outLine +BABEL_OP1_201_11673_20130226_015822_inLine +BABEL_OP1_201_11673_20130226_015822_outLine +BABEL_OP1_201_11797_20130328_033102_inLine +BABEL_OP1_201_11797_20130328_033102_outLine +BABEL_OP1_201_12220_20130528_051622_inLine +BABEL_OP1_201_12220_20130528_051622_outLine +BABEL_OP1_201_12242_20130603_033446_inLine +BABEL_OP1_201_12242_20130603_033446_outLine +BABEL_OP1_201_12606_20130429_120351_inLine +BABEL_OP1_201_12606_20130429_120351_outLine +BABEL_OP1_201_12606_20130429_121040_inLine +BABEL_OP1_201_12606_20130429_121040_outLine +BABEL_OP1_201_12635_20130429_040127_inLine +BABEL_OP1_201_12635_20130429_040127_outLine +BABEL_OP1_201_12767_20130509_005500_inLine +BABEL_OP1_201_12767_20130509_005500_outLine +BABEL_OP1_201_13324_20130529_035029_inLine +BABEL_OP1_201_13324_20130529_035029_outLine +BABEL_OP1_201_13483_20130306_062423_inLine +BABEL_OP1_201_13483_20130306_062423_outLine +BABEL_OP1_201_13490_20130508_033252_inLine +BABEL_OP1_201_13490_20130508_033252_outLine +BABEL_OP1_201_13664_20130117_073343_inLine +BABEL_OP1_201_13664_20130117_073343_outLine +BABEL_OP1_201_14179_20130303_111502_inLine +BABEL_OP1_201_14179_20130303_111502_outLine +BABEL_OP1_201_14539_20130501_223201_inLine +BABEL_OP1_201_14539_20130501_223201_outLine +BABEL_OP1_201_14560_20130301_065543_inLine +BABEL_OP1_201_14560_20130301_065543_outLine +BABEL_OP1_201_14807_20130522_012156_inLine +BABEL_OP1_201_14807_20130522_012156_outLine +BABEL_OP1_201_14899_20130301_035636_inLine +BABEL_OP1_201_14899_20130301_035636_outLine +BABEL_OP1_201_14972_20130518_025852_inLine +BABEL_OP1_201_14972_20130518_025852_outLine +BABEL_OP1_201_15322_20130701_030436_inLine +BABEL_OP1_201_15322_20130701_030436_outLine +BABEL_OP1_201_15382_20130228_050819_inLine +BABEL_OP1_201_15382_20130228_050819_outLine +BABEL_OP1_201_15702_20130301_041117_inLine +BABEL_OP1_201_15702_20130301_041117_outLine +BABEL_OP1_201_15730_20130305_034450_inLine +BABEL_OP1_201_15730_20130305_034450_outLine +BABEL_OP1_201_15902_20130323_005824_inLine +BABEL_OP1_201_15902_20130323_005824_outLine +BABEL_OP1_201_16149_20130322_021647_inLine +BABEL_OP1_201_16149_20130322_021647_outLine +BABEL_OP1_201_16467_20130704_025921_inLine +BABEL_OP1_201_16467_20130704_025921_outLine +BABEL_OP1_201_16800_20130702_085158_inLine +BABEL_OP1_201_16800_20130702_085158_outLine +BABEL_OP1_201_16924_20130301_032937_inLine +BABEL_OP1_201_16924_20130301_032937_outLine +BABEL_OP1_201_17032_20130306_103506_inLine +BABEL_OP1_201_17032_20130306_103506_outLine +BABEL_OP1_201_17113_20130519_093427_inLine +BABEL_OP1_201_17113_20130519_093427_outLine +BABEL_OP1_201_17472_20130311_075957_inLine +BABEL_OP1_201_17472_20130311_075957_outLine +BABEL_OP1_201_17496_20130301_030157_inLine +BABEL_OP1_201_17496_20130301_030157_outLine +BABEL_OP1_201_17520_20130518_012147_inLine +BABEL_OP1_201_17520_20130518_012147_outLine +BABEL_OP1_201_17567_20130512_065938_inLine +BABEL_OP1_201_17567_20130512_065938_outLine +BABEL_OP1_201_18766_20130502_102418_inLine +BABEL_OP1_201_18766_20130502_102418_outLine +BABEL_OP1_201_19134_20130601_040621_inLine +BABEL_OP1_201_19134_20130601_040621_outLine +BABEL_OP1_201_19589_20130502_093932_inLine +BABEL_OP1_201_19589_20130502_093932_outLine +BABEL_OP1_201_19767_20130502_130900_inLine +BABEL_OP1_201_19767_20130502_130900_outLine +BABEL_OP1_201_19877_20130502_085421_inLine +BABEL_OP1_201_19877_20130502_085421_outLine +BABEL_OP1_201_20330_20130429_035418_inLine +BABEL_OP1_201_20330_20130429_035418_outLine +BABEL_OP1_201_20437_20130216_094002_inLine +BABEL_OP1_201_20437_20130216_094002_outLine +BABEL_OP1_201_20972_20130603_035417_inLine +BABEL_OP1_201_20972_20130603_035417_outLine +BABEL_OP1_201_21807_20130522_042858_inLine +BABEL_OP1_201_21807_20130522_042858_outLine +BABEL_OP1_201_21892_20130430_033520_inLine +BABEL_OP1_201_21892_20130430_033520_outLine +BABEL_OP1_201_22466_20121206_070403_inLine +BABEL_OP1_201_22466_20121206_070403_outLine +BABEL_OP1_201_22494_20130305_052405_inLine +BABEL_OP1_201_22494_20130305_052405_outLine +BABEL_OP1_201_22624_20130305_121723_inLine +BABEL_OP1_201_22624_20130305_121723_outLine +BABEL_OP1_201_23046_20130527_110737_inLine +BABEL_OP1_201_23046_20130527_110737_outLine +BABEL_OP1_201_23119_20130321_054320_inLine +BABEL_OP1_201_23119_20130321_054320_outLine +BABEL_OP1_201_23190_20130603_224243_inLine +BABEL_OP1_201_23190_20130603_224243_outLine +BABEL_OP1_201_23195_20130227_050013_inLine +BABEL_OP1_201_23195_20130227_050013_outLine +BABEL_OP1_201_23239_20130305_093734_inLine +BABEL_OP1_201_23239_20130305_093734_outLine +BABEL_OP1_201_23893_20130430_080021_inLine +BABEL_OP1_201_23893_20130430_080021_outLine +BABEL_OP1_201_24231_20130502_123747_inLine +BABEL_OP1_201_24231_20130502_123747_outLine +BABEL_OP1_201_24239_20130703_230221_inLine +BABEL_OP1_201_24239_20130703_230221_outLine +BABEL_OP1_201_24270_20130530_020630_inLine +BABEL_OP1_201_24270_20130530_020630_outLine +BABEL_OP1_201_24470_20130531_024204_inLine +BABEL_OP1_201_24470_20130531_024204_outLine +BABEL_OP1_201_24501_20130429_102945_inLine +BABEL_OP1_201_24501_20130429_102945_outLine +BABEL_OP1_201_24532_20130307_060030_inLine +BABEL_OP1_201_24532_20130307_060030_outLine +BABEL_OP1_201_24586_20130430_025349_inLine +BABEL_OP1_201_24586_20130430_032300_inLine +BABEL_OP1_201_24586_20130430_032300_outLine +BABEL_OP1_201_24586_20130430_033306_inLine +BABEL_OP1_201_24586_20130430_033306_outLine +BABEL_OP1_201_24679_20130222_072407_inLine +BABEL_OP1_201_24679_20130222_072407_outLine +BABEL_OP1_201_24982_20130529_044009_inLine +BABEL_OP1_201_24982_20130529_044009_outLine +BABEL_OP1_201_25015_20130501_223825_inLine +BABEL_OP1_201_25015_20130501_223825_outLine +BABEL_OP1_201_25961_20130223_033405_inLine +BABEL_OP1_201_25961_20130223_033405_outLine +BABEL_OP1_201_26388_20130528_030259_inLine +BABEL_OP1_201_26388_20130528_030259_outLine +BABEL_OP1_201_26836_20130528_100100_inLine +BABEL_OP1_201_26836_20130528_100100_outLine +BABEL_OP1_201_26836_20130528_101331_inLine +BABEL_OP1_201_26836_20130528_101331_outLine +BABEL_OP1_201_26999_20130228_090136_inLine +BABEL_OP1_201_26999_20130228_090136_outLine +BABEL_OP1_201_27042_20130701_075011_inLine +BABEL_OP1_201_27042_20130701_075011_outLine +BABEL_OP1_201_27203_20130602_005950_inLine +BABEL_OP1_201_27203_20130602_005950_outLine +BABEL_OP1_201_27590_20130304_072243_inLine +BABEL_OP1_201_27590_20130304_072243_outLine +BABEL_OP1_201_28419_20130528_035005_inLine +BABEL_OP1_201_28419_20130528_035005_outLine +BABEL_OP1_201_28522_20130303_104614_inLine +BABEL_OP1_201_28522_20130303_104614_outLine +BABEL_OP1_201_28600_20130701_051100_inLine +BABEL_OP1_201_28600_20130701_051100_outLine +BABEL_OP1_201_28775_20130529_005204_inLine +BABEL_OP1_201_28775_20130529_005204_outLine +BABEL_OP1_201_28814_20130704_000405_inLine +BABEL_OP1_201_28814_20130704_000405_outLine +BABEL_OP1_201_28945_20130528_094913_inLine +BABEL_OP1_201_28945_20130528_094913_outLine +BABEL_OP1_201_29023_20130530_024701_inLine +BABEL_OP1_201_29023_20130530_024701_outLine +BABEL_OP1_201_29072_20130304_052508_inLine +BABEL_OP1_201_29072_20130304_052508_outLine +BABEL_OP1_201_29168_20130222_015942_inLine +BABEL_OP1_201_29168_20130222_015942_outLine +BABEL_OP1_201_30180_20130528_033242_inLine +BABEL_OP1_201_30180_20130528_033242_outLine +BABEL_OP1_201_30395_20130529_034626_inLine +BABEL_OP1_201_30395_20130529_034626_outLine +BABEL_OP1_201_30432_20130227_084229_inLine +BABEL_OP1_201_30432_20130227_084229_outLine +BABEL_OP1_201_30576_20130527_002801_inLine +BABEL_OP1_201_30576_20130527_002801_outLine +BABEL_OP1_201_31628_20130301_081256_inLine +BABEL_OP1_201_31628_20130301_081256_outLine +BABEL_OP1_201_32097_20130130_021717_inLine +BABEL_OP1_201_32097_20130130_021717_outLine +BABEL_OP1_201_32122_20130529_070011_inLine +BABEL_OP1_201_32122_20130529_070011_outLine +BABEL_OP1_201_32171_20130220_084632_inLine +BABEL_OP1_201_32171_20130220_084632_outLine +BABEL_OP1_201_32708_20130528_093343_inLine +BABEL_OP1_201_32708_20130528_093343_outLine +BABEL_OP1_201_33229_20130429_025144_inLine +BABEL_OP1_201_33229_20130429_025144_outLine +BABEL_OP1_201_33659_20130214_000335_inLine +BABEL_OP1_201_33659_20130214_000335_outLine +BABEL_OP1_201_33806_20130630_224040_inLine +BABEL_OP1_201_33806_20130630_224040_outLine +BABEL_OP1_201_34106_20130305_032650_inLine +BABEL_OP1_201_34106_20130305_032650_outLine +BABEL_OP1_201_34145_20130301_033324_inLine +BABEL_OP1_201_34145_20130301_033324_outLine +BABEL_OP1_201_34197_20130227_065321_inLine +BABEL_OP1_201_34197_20130227_065321_outLine +BABEL_OP1_201_34336_20130527_071806_inLine +BABEL_OP1_201_34336_20130527_071806_outLine +BABEL_OP1_201_34679_20130529_040931_inLine +BABEL_OP1_201_34679_20130529_040931_outLine +BABEL_OP1_201_34826_20130430_025628_inLine +BABEL_OP1_201_34826_20130430_025628_outLine +BABEL_OP1_201_34903_20130302_052444_inLine +BABEL_OP1_201_34903_20130302_052444_outLine +BABEL_OP1_201_35000_20130702_092721_inLine +BABEL_OP1_201_35000_20130702_092721_outLine +BABEL_OP1_201_35008_20130305_114402_inLine +BABEL_OP1_201_35008_20130305_114402_outLine +BABEL_OP1_201_35467_20130321_032230_inLine +BABEL_OP1_201_35467_20130321_032230_outLine +BABEL_OP1_201_36894_20130221_070614_inLine +BABEL_OP1_201_36894_20130221_070614_outLine +BABEL_OP1_201_37598_20130601_032226_inLine +BABEL_OP1_201_37598_20130601_032226_outLine +BABEL_OP1_201_38076_20130302_132339_inLine +BABEL_OP1_201_38076_20130302_132339_outLine +BABEL_OP1_201_38878_20130228_041057_inLine +BABEL_OP1_201_38878_20130228_041057_outLine +BABEL_OP1_201_39426_20130429_085957_inLine +BABEL_OP1_201_39426_20130429_085957_outLine +BABEL_OP1_201_39638_20130126_082343_inLine +BABEL_OP1_201_39638_20130126_082343_outLine +BABEL_OP1_201_41334_20130630_085009_inLine +BABEL_OP1_201_41334_20130630_085009_outLine +BABEL_OP1_201_41469_20130303_034949_inLine +BABEL_OP1_201_41469_20130303_034949_outLine +BABEL_OP1_201_41542_20130429_084921_inLine +BABEL_OP1_201_41542_20130429_084921_outLine +BABEL_OP1_201_41685_20130214_090836_inLine +BABEL_OP1_201_41685_20130214_090836_outLine +BABEL_OP1_201_41720_20130203_053934_inLine +BABEL_OP1_201_41720_20130203_053934_outLine +BABEL_OP1_201_41890_20130430_020800_inLine +BABEL_OP1_201_41890_20130430_020800_outLine +BABEL_OP1_201_42155_20130521_023245_inLine +BABEL_OP1_201_42155_20130521_023245_outLine +BABEL_OP1_201_42497_20130529_040557_inLine +BABEL_OP1_201_42497_20130529_040557_outLine +BABEL_OP1_201_42771_20130516_235914_inLine +BABEL_OP1_201_42771_20130516_235914_outLine +BABEL_OP1_201_42834_20130227_094847_inLine +BABEL_OP1_201_42834_20130227_094847_outLine +BABEL_OP1_201_42991_20130301_104105_inLine +BABEL_OP1_201_42991_20130301_104105_outLine +BABEL_OP1_201_43286_20130301_085932_inLine +BABEL_OP1_201_43286_20130301_085932_outLine +BABEL_OP1_201_43323_20130211_115349_inLine +BABEL_OP1_201_43323_20130211_120743_inLine +BABEL_OP1_201_43323_20130211_120743_outLine +BABEL_OP1_201_43588_20130430_054932_inLine +BABEL_OP1_201_43588_20130430_054932_outLine +BABEL_OP1_201_43784_20130529_104333_inLine +BABEL_OP1_201_43784_20130529_104333_outLine +BABEL_OP1_201_43794_20130603_014105_inLine +BABEL_OP1_201_43794_20130603_014105_outLine +BABEL_OP1_201_44477_20130302_072308_inLine +BABEL_OP1_201_44477_20130302_072308_outLine +BABEL_OP1_201_44477_20130302_073645_inLine +BABEL_OP1_201_44477_20130302_073645_outLine +BABEL_OP1_201_44478_20130502_075027_inLine +BABEL_OP1_201_44478_20130502_075027_outLine +BABEL_OP1_201_44709_20130303_114051_inLine +BABEL_OP1_201_44709_20130303_114051_outLine +BABEL_OP1_201_46066_20130429_123746_inLine +BABEL_OP1_201_46066_20130429_123746_outLine +BABEL_OP1_201_46169_20130702_011629_inLine +BABEL_OP1_201_46169_20130702_011629_outLine +BABEL_OP1_201_46310_20130328_024919_inLine +BABEL_OP1_201_46310_20130328_024919_outLine +BABEL_OP1_201_46550_20130528_065103_inLine +BABEL_OP1_201_46550_20130528_065103_outLine +BABEL_OP1_201_46558_20130220_030534_inLine +BABEL_OP1_201_46558_20130220_030534_outLine +BABEL_OP1_201_46589_20130302_082301_inLine +BABEL_OP1_201_46589_20130302_082301_outLine +BABEL_OP1_201_46681_20130530_033328_inLine +BABEL_OP1_201_46681_20130530_033328_outLine +BABEL_OP1_201_46770_20130429_011947_inLine +BABEL_OP1_201_46770_20130429_011947_outLine +BABEL_OP1_201_46976_20130517_023139_inLine +BABEL_OP1_201_46976_20130517_023139_outLine +BABEL_OP1_201_47802_20130524_044824_inLine +BABEL_OP1_201_47802_20130524_044824_outLine +BABEL_OP1_201_47878_20130522_021958_inLine +BABEL_OP1_201_47878_20130522_021958_outLine +BABEL_OP1_201_48243_20130602_122113_inLine +BABEL_OP1_201_48243_20130602_122113_outLine +BABEL_OP1_201_48299_20130226_120812_inLine +BABEL_OP1_201_48299_20130226_120812_outLine +BABEL_OP1_201_48299_20130226_122743_inLine +BABEL_OP1_201_48299_20130226_122743_outLine +BABEL_OP1_201_49027_20130529_101617_inLine +BABEL_OP1_201_49027_20130529_101617_outLine +BABEL_OP1_201_49216_20130314_070036_inLine +BABEL_OP1_201_49216_20130314_070036_outLine +BABEL_OP1_201_49630_20130306_105833_inLine +BABEL_OP1_201_49630_20130306_105833_outLine +BABEL_OP1_201_49637_20130426_020402_inLine +BABEL_OP1_201_49637_20130426_020402_outLine +BABEL_OP1_201_49768_20130529_082143_inLine +BABEL_OP1_201_49768_20130529_082143_outLine +BABEL_OP1_201_49907_20130529_101707_inLine +BABEL_OP1_201_49907_20130529_101707_outLine +BABEL_OP1_201_49945_20130501_080703_inLine +BABEL_OP1_201_49945_20130501_080703_outLine +BABEL_OP1_201_50549_20130428_053142_inLine +BABEL_OP1_201_50549_20130428_053142_outLine +BABEL_OP1_201_50549_20130428_055313_inLine +BABEL_OP1_201_50549_20130428_055313_outLine +BABEL_OP1_201_50779_20130522_051719_inLine +BABEL_OP1_201_50779_20130522_051719_outLine +BABEL_OP1_201_50810_20130312_055632_inLine +BABEL_OP1_201_50810_20130312_055632_outLine +BABEL_OP1_201_50940_20130309_041526_inLine +BABEL_OP1_201_50940_20130309_041526_outLine +BABEL_OP1_201_51611_20130530_094039_inLine +BABEL_OP1_201_51611_20130530_094039_outLine +BABEL_OP1_201_52301_20130223_024524_inLine +BABEL_OP1_201_52301_20130223_024524_outLine +BABEL_OP1_201_52404_20130301_233232_inLine +BABEL_OP1_201_52404_20130301_233232_outLine +BABEL_OP1_201_52422_20130428_023051_inLine +BABEL_OP1_201_52422_20130428_023051_outLine +BABEL_OP1_201_52490_20130220_051000_inLine +BABEL_OP1_201_52490_20130220_051000_outLine +BABEL_OP1_201_52818_20130301_121852_inLine +BABEL_OP1_201_52818_20130301_121852_outLine +BABEL_OP1_201_55259_20130526_073400_inLine +BABEL_OP1_201_55259_20130526_073400_outLine +BABEL_OP1_201_55267_20130228_064943_inLine +BABEL_OP1_201_55267_20130228_064943_outLine +BABEL_OP1_201_55968_20130314_043319_inLine +BABEL_OP1_201_55968_20130314_043319_outLine +BABEL_OP1_201_55968_20130314_044612_inLine +BABEL_OP1_201_55968_20130314_044612_outLine +BABEL_OP1_201_56023_20130501_081011_inLine +BABEL_OP1_201_56023_20130501_081011_outLine +BABEL_OP1_201_56307_20130301_024958_inLine +BABEL_OP1_201_56307_20130301_024958_outLine +BABEL_OP1_201_57065_20130302_033227_inLine +BABEL_OP1_201_57065_20130302_033227_outLine +BABEL_OP1_201_57233_20130206_090034_inLine +BABEL_OP1_201_57233_20130206_090034_outLine +BABEL_OP1_201_57464_20130428_051858_inLine +BABEL_OP1_201_57464_20130428_051858_outLine +BABEL_OP1_201_57548_20130518_042831_inLine +BABEL_OP1_201_57548_20130518_042831_outLine +BABEL_OP1_201_57678_20130528_022013_inLine +BABEL_OP1_201_57678_20130528_022013_outLine +BABEL_OP1_201_58107_20130518_004334_inLine +BABEL_OP1_201_58107_20130518_004334_outLine +BABEL_OP1_201_58145_20130602_044301_inLine +BABEL_OP1_201_58145_20130602_044301_outLine +BABEL_OP1_201_58313_20130522_055528_inLine +BABEL_OP1_201_58313_20130522_055528_outLine +BABEL_OP1_201_58585_20130429_003422_inLine +BABEL_OP1_201_58585_20130429_003422_outLine +BABEL_OP1_201_58821_20130306_091219_inLine +BABEL_OP1_201_58821_20130306_091219_outLine +BABEL_OP1_201_59039_20130220_090641_inLine +BABEL_OP1_201_59039_20130220_090641_outLine +BABEL_OP1_201_59509_20130227_090836_inLine +BABEL_OP1_201_59509_20130227_090836_outLine +BABEL_OP1_201_59509_20130227_092230_inLine +BABEL_OP1_201_59509_20130227_092230_outLine +BABEL_OP1_201_60115_20130301_114138_inLine +BABEL_OP1_201_60115_20130301_114138_outLine +BABEL_OP1_201_60436_20130503_044737_inLine +BABEL_OP1_201_60436_20130503_044737_outLine +BABEL_OP1_201_60474_20130527_081400_inLine +BABEL_OP1_201_60474_20130527_081400_outLine +BABEL_OP1_201_60661_20130529_023958_inLine +BABEL_OP1_201_60661_20130529_023958_outLine +BABEL_OP1_201_61435_20130430_031742_inLine +BABEL_OP1_201_61435_20130430_031742_outLine +BABEL_OP1_201_61873_20130519_030703_inLine +BABEL_OP1_201_61873_20130519_030703_outLine +BABEL_OP1_201_62014_20130228_083820_inLine +BABEL_OP1_201_62014_20130228_083820_outLine +BABEL_OP1_201_63081_20130226_035431_inLine +BABEL_OP1_201_63081_20130226_035431_outLine +BABEL_OP1_201_63084_20130301_114742_inLine +BABEL_OP1_201_63084_20130301_114742_outLine +BABEL_OP1_201_63307_20130521_235343_inLine +BABEL_OP1_201_63307_20130521_235343_outLine +BABEL_OP1_201_63604_20130412_021112_inLine +BABEL_OP1_201_63604_20130412_021112_outLine +BABEL_OP1_201_64259_20130202_090605_inLine +BABEL_OP1_201_64259_20130202_090605_outLine +BABEL_OP1_201_64398_20130301_084125_inLine +BABEL_OP1_201_64398_20130301_084125_outLine +BABEL_OP1_201_65064_20130521_061233_inLine +BABEL_OP1_201_65064_20130521_061233_outLine +BABEL_OP1_201_65561_20130305_120931_inLine +BABEL_OP1_201_65561_20130305_120931_outLine +BABEL_OP1_201_66045_20130509_044408_inLine +BABEL_OP1_201_66045_20130509_044408_outLine +BABEL_OP1_201_66472_20130517_041032_inLine +BABEL_OP1_201_66472_20130517_041032_outLine +BABEL_OP1_201_67213_20130224_044805_inLine +BABEL_OP1_201_67213_20130224_044805_outLine +BABEL_OP1_201_67283_20130223_012433_inLine +BABEL_OP1_201_67283_20130223_012433_outLine +BABEL_OP1_201_67401_20130522_063044_inLine +BABEL_OP1_201_67401_20130522_063044_outLine +BABEL_OP1_201_67622_20130306_012440_inLine +BABEL_OP1_201_67622_20130306_012440_outLine +BABEL_OP1_201_68068_20130302_042557_inLine +BABEL_OP1_201_68068_20130302_042557_outLine +BABEL_OP1_201_68244_20130228_052832_inLine +BABEL_OP1_201_68244_20130228_052832_outLine +BABEL_OP1_201_68306_20130301_132523_inLine +BABEL_OP1_201_68306_20130301_132523_outLine +BABEL_OP1_201_68748_20130301_051957_inLine +BABEL_OP1_201_68748_20130301_051957_outLine +BABEL_OP1_201_68924_20130228_031746_inLine +BABEL_OP1_201_68924_20130228_031746_outLine +BABEL_OP1_201_69107_20130518_053632_inLine +BABEL_OP1_201_69107_20130518_053632_outLine +BABEL_OP1_201_69574_20130313_015419_inLine +BABEL_OP1_201_69574_20130313_015419_outLine +BABEL_OP1_201_69578_20130509_033949_inLine +BABEL_OP1_201_69578_20130509_033949_outLine +BABEL_OP1_201_69636_20130302_024254_inLine +BABEL_OP1_201_69636_20130302_024254_outLine +BABEL_OP1_201_70343_20130302_035639_inLine +BABEL_OP1_201_70343_20130302_035639_outLine +BABEL_OP1_201_70343_20130302_040518_inLine +BABEL_OP1_201_70343_20130302_040518_outLine +BABEL_OP1_201_70386_20130528_033752_inLine +BABEL_OP1_201_70386_20130528_033752_outLine +BABEL_OP1_201_70794_20130314_065330_inLine +BABEL_OP1_201_70794_20130314_065330_outLine +BABEL_OP1_201_72324_20130227_080108_inLine +BABEL_OP1_201_72324_20130227_080108_outLine +BABEL_OP1_201_72587_20130227_092146_inLine +BABEL_OP1_201_72587_20130227_092146_outLine +BABEL_OP1_201_72844_20130320_030750_inLine +BABEL_OP1_201_72844_20130320_030750_outLine +BABEL_OP1_201_73430_20130306_070252_inLine +BABEL_OP1_201_73430_20130306_070252_outLine +BABEL_OP1_201_73485_20130704_012751_inLine +BABEL_OP1_201_73485_20130704_012751_outLine +BABEL_OP1_201_73511_20130305_064018_inLine +BABEL_OP1_201_73511_20130305_064018_outLine +BABEL_OP1_201_73518_20130427_020953_inLine +BABEL_OP1_201_73518_20130427_020953_outLine +BABEL_OP1_201_73591_20121205_085430_inLine +BABEL_OP1_201_73591_20121205_085430_outLine +BABEL_OP1_201_73591_20121205_091943_inLine +BABEL_OP1_201_73591_20121205_091943_outLine +BABEL_OP1_201_73964_20130502_060046_inLine +BABEL_OP1_201_73964_20130502_060046_outLine +BABEL_OP1_201_74280_20130307_060529_inLine +BABEL_OP1_201_74280_20130307_060529_outLine +BABEL_OP1_201_74728_20130502_015015_inLine +BABEL_OP1_201_74728_20130502_015015_outLine +BABEL_OP1_201_74921_20130302_015536_inLine +BABEL_OP1_201_74921_20130302_015536_outLine +BABEL_OP1_201_74921_20130302_020351_inLine +BABEL_OP1_201_74921_20130302_020351_outLine +BABEL_OP1_201_75064_20130528_032631_inLine +BABEL_OP1_201_75064_20130528_032631_outLine +BABEL_OP1_201_75342_20130305_071206_inLine +BABEL_OP1_201_75342_20130305_071206_outLine +BABEL_OP1_201_75764_20130428_041456_inLine +BABEL_OP1_201_75764_20130428_041456_outLine +BABEL_OP1_201_75993_20130529_053731_inLine +BABEL_OP1_201_75993_20130529_053731_outLine +BABEL_OP1_201_76683_20130524_053916_inLine +BABEL_OP1_201_76683_20130524_053916_outLine +BABEL_OP1_201_77427_20130528_003638_inLine +BABEL_OP1_201_77427_20130528_003638_outLine +BABEL_OP1_201_78116_20130304_074916_inLine +BABEL_OP1_201_78116_20130304_074916_outLine +BABEL_OP1_201_78398_20130529_023517_inLine +BABEL_OP1_201_78398_20130529_023517_outLine +BABEL_OP1_201_78943_20130528_034620_inLine +BABEL_OP1_201_78943_20130528_034620_outLine +BABEL_OP1_201_79129_20130524_031851_inLine +BABEL_OP1_201_79129_20130524_031851_outLine +BABEL_OP1_201_79167_20130303_071948_inLine +BABEL_OP1_201_79167_20130303_071948_outLine +BABEL_OP1_201_79167_20130303_093604_inLine +BABEL_OP1_201_79167_20130303_093604_outLine +BABEL_OP1_201_79429_20130216_152022_inLine +BABEL_OP1_201_79429_20130216_152022_outLine +BABEL_OP1_201_80306_20130509_071053_inLine +BABEL_OP1_201_80306_20130509_071053_outLine +BABEL_OP1_201_81287_20130305_141750_inLine +BABEL_OP1_201_81287_20130305_141750_outLine +BABEL_OP1_201_81392_20130304_082518_inLine +BABEL_OP1_201_81392_20130304_082518_outLine +BABEL_OP1_201_81424_20130304_080620_inLine +BABEL_OP1_201_81424_20130304_080620_outLine +BABEL_OP1_201_81433_20130514_063900_inLine +BABEL_OP1_201_81433_20130514_063900_outLine +BABEL_OP1_201_81810_20130302_043825_inLine +BABEL_OP1_201_81810_20130302_043825_outLine +BABEL_OP1_201_81971_20130227_030618_inLine +BABEL_OP1_201_81971_20130227_030618_outLine +BABEL_OP1_201_82123_20130505_053636_inLine +BABEL_OP1_201_82123_20130505_053636_outLine +BABEL_OP1_201_82138_20130509_063904_inLine +BABEL_OP1_201_82138_20130509_063904_outLine +BABEL_OP1_201_82140_20130510_013208_inLine +BABEL_OP1_201_82140_20130510_013208_outLine +BABEL_OP1_201_82637_20130227_044340_inLine +BABEL_OP1_201_82637_20130227_044340_outLine +BABEL_OP1_201_82904_20130427_005507_inLine +BABEL_OP1_201_82904_20130427_005507_outLine +BABEL_OP1_201_82979_20130529_063602_inLine +BABEL_OP1_201_82979_20130529_063602_outLine +BABEL_OP1_201_83238_20130514_054056_inLine +BABEL_OP1_201_83238_20130514_054056_outLine +BABEL_OP1_201_83430_20130210_094011_inLine +BABEL_OP1_201_83430_20130210_094011_outLine +BABEL_OP1_201_83455_20130511_053045_inLine +BABEL_OP1_201_83455_20130511_053045_outLine +BABEL_OP1_201_83625_20130128_091225_inLine +BABEL_OP1_201_83625_20130128_091225_outLine +BABEL_OP1_201_83651_20130604_075201_inLine +BABEL_OP1_201_83651_20130604_075201_outLine +BABEL_OP1_201_83929_20121205_055436_inLine +BABEL_OP1_201_83929_20121205_055436_outLine +BABEL_OP1_201_83929_20121206_061559_inLine +BABEL_OP1_201_83929_20121206_061559_outLine +BABEL_OP1_201_84547_20130227_041326_inLine +BABEL_OP1_201_84547_20130227_041326_outLine +BABEL_OP1_201_84715_20130429_094324_inLine +BABEL_OP1_201_84715_20130429_094324_outLine +BABEL_OP1_201_84936_20130301_073352_inLine +BABEL_OP1_201_84936_20130301_073352_outLine +BABEL_OP1_201_85010_20130206_122216_inLine +BABEL_OP1_201_85010_20130206_122216_outLine +BABEL_OP1_201_85047_20130510_055057_inLine +BABEL_OP1_201_85047_20130510_055057_outLine +BABEL_OP1_201_85647_20130511_015627_inLine +BABEL_OP1_201_85647_20130511_015627_outLine +BABEL_OP1_201_86191_20130528_045113_inLine +BABEL_OP1_201_86191_20130528_045113_outLine +BABEL_OP1_201_86191_20130528_051540_inLine +BABEL_OP1_201_86191_20130528_051540_outLine +BABEL_OP1_201_86433_20130303_035210_inLine +BABEL_OP1_201_86433_20130303_035210_outLine +BABEL_OP1_201_86467_20130221_031701_inLine +BABEL_OP1_201_86467_20130221_031701_outLine +BABEL_OP1_201_86557_20130306_054158_inLine +BABEL_OP1_201_86557_20130306_054158_outLine +BABEL_OP1_201_86635_20130227_080743_inLine +BABEL_OP1_201_86635_20130227_080743_outLine +BABEL_OP1_201_86676_20130302_034945_inLine +BABEL_OP1_201_86676_20130302_034945_outLine +BABEL_OP1_201_86888_20130301_011747_inLine +BABEL_OP1_201_86888_20130301_011747_outLine +BABEL_OP1_201_87074_20130529_072238_inLine +BABEL_OP1_201_87074_20130529_072238_outLine +BABEL_OP1_201_87179_20130414_223248_inLine +BABEL_OP1_201_87179_20130414_223248_outLine +BABEL_OP1_201_87298_20130530_035908_inLine +BABEL_OP1_201_87298_20130530_035908_outLine +BABEL_OP1_201_87313_20130228_054816_inLine +BABEL_OP1_201_87313_20130228_054816_outLine +BABEL_OP1_201_87731_20130216_084329_inLine +BABEL_OP1_201_87731_20130216_084329_outLine +BABEL_OP1_201_87796_20130531_043218_inLine +BABEL_OP1_201_87796_20130531_043218_outLine +BABEL_OP1_201_88445_20130228_100123_inLine +BABEL_OP1_201_88445_20130228_100123_outLine +BABEL_OP1_201_88661_20130305_103247_inLine +BABEL_OP1_201_88661_20130305_103247_outLine +BABEL_OP1_201_89059_20130429_001658_inLine +BABEL_OP1_201_89059_20130429_001658_outLine +BABEL_OP1_201_89877_20130602_052802_inLine +BABEL_OP1_201_89877_20130602_052802_outLine +BABEL_OP1_201_90347_20130601_020619_inLine +BABEL_OP1_201_90347_20130601_020619_outLine +BABEL_OP1_201_90777_20130530_043440_inLine +BABEL_OP1_201_90777_20130530_043440_outLine +BABEL_OP1_201_91125_20130301_044113_inLine +BABEL_OP1_201_91125_20130301_044113_outLine +BABEL_OP1_201_91891_20130306_084037_inLine +BABEL_OP1_201_91891_20130306_084037_outLine +BABEL_OP1_201_91944_20130529_030733_inLine +BABEL_OP1_201_91944_20130529_030733_outLine +BABEL_OP1_201_91977_20130228_225341_inLine +BABEL_OP1_201_91977_20130228_225341_outLine +BABEL_OP1_201_92509_20130222_064302_inLine +BABEL_OP1_201_92509_20130222_064302_outLine +BABEL_OP1_201_92557_20130428_115801_inLine +BABEL_OP1_201_92557_20130428_115801_outLine +BABEL_OP1_201_92740_20130301_044629_inLine +BABEL_OP1_201_92740_20130301_044629_outLine +BABEL_OP1_201_93222_20130127_012443_inLine +BABEL_OP1_201_93222_20130127_012443_outLine +BABEL_OP1_201_93604_20130502_071337_inLine +BABEL_OP1_201_93604_20130502_071337_outLine +BABEL_OP1_201_93964_20130511_000644_inLine +BABEL_OP1_201_93964_20130511_000644_outLine +BABEL_OP1_201_94025_20130303_091916_inLine +BABEL_OP1_201_94025_20130303_091916_outLine +BABEL_OP1_201_94316_20130503_072805_inLine +BABEL_OP1_201_94316_20130503_072805_outLine +BABEL_OP1_201_94449_20130704_033336_inLine +BABEL_OP1_201_94449_20130704_033336_outLine +BABEL_OP1_201_94487_20130502_053741_inLine +BABEL_OP1_201_94487_20130502_053741_outLine +BABEL_OP1_201_94869_20130313_052715_inLine +BABEL_OP1_201_94869_20130313_052715_outLine +BABEL_OP1_201_95446_20130430_051750_inLine +BABEL_OP1_201_95446_20130430_051750_outLine +BABEL_OP1_201_96059_20130430_034442_inLine +BABEL_OP1_201_96059_20130430_034442_outLine +BABEL_OP1_201_96376_20130704_011157_inLine +BABEL_OP1_201_96376_20130704_011157_outLine +BABEL_OP1_201_96820_20130514_032741_inLine +BABEL_OP1_201_96820_20130514_032741_outLine +BABEL_OP1_201_97363_20130528_063449_inLine +BABEL_OP1_201_97363_20130528_063449_outLine +BABEL_OP1_201_97557_20130228_004756_inLine +BABEL_OP1_201_97557_20130228_004756_outLine +BABEL_OP1_201_99202_20130521_003552_inLine +BABEL_OP1_201_99202_20130521_003552_outLine +BABEL_OP1_201_99955_20130429_001807_inLine +BABEL_OP1_201_99955_20130429_001807_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list b/egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list new file mode 100644 index 00000000000..33da29dd0f7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list @@ -0,0 +1,270 @@ +BABEL_OP1_201_10974_20130512_073026_inLine +BABEL_OP1_201_10974_20130512_073026_outLine +BABEL_OP1_201_11352_20130501_215210_inLine +BABEL_OP1_201_11352_20130501_215210_outLine +BABEL_OP1_201_13040_20130604_010848_inLine +BABEL_OP1_201_13040_20130604_010848_outLine +BABEL_OP1_201_14158_20130301_041642_inLine +BABEL_OP1_201_14158_20130301_041642_outLine +BABEL_OP1_201_15042_20130502_000845_inLine +BABEL_OP1_201_15042_20130502_000845_outLine +BABEL_OP1_201_17573_20130313_093021_inLine +BABEL_OP1_201_17573_20130313_093021_outLine +BABEL_OP1_201_18078_20130430_095821_inLine +BABEL_OP1_201_18078_20130430_095821_outLine +BABEL_OP1_201_19120_20130405_142951_inLine +BABEL_OP1_201_19120_20130405_142951_outLine +BABEL_OP1_201_21581_20130527_033524_inLine +BABEL_OP1_201_21581_20130527_033524_outLine +BABEL_OP1_201_21581_20130527_034908_inLine +BABEL_OP1_201_21581_20130527_034908_outLine +BABEL_OP1_201_22170_20130403_030729_inLine +BABEL_OP1_201_22170_20130403_030729_outLine +BABEL_OP1_201_27478_20130502_010501_inLine +BABEL_OP1_201_27478_20130502_010501_outLine +BABEL_OP1_201_28012_20130427_041255_inLine +BABEL_OP1_201_28012_20130427_041255_outLine +BABEL_OP1_201_28585_20130426_120901_inLine +BABEL_OP1_201_28585_20130426_120901_outLine +BABEL_OP1_201_29039_20130502_123143_inLine +BABEL_OP1_201_29039_20130502_123143_outLine +BABEL_OP1_201_29404_20130428_094208_inLine +BABEL_OP1_201_29404_20130428_094208_outLine +BABEL_OP1_201_29685_20130603_224641_inLine +BABEL_OP1_201_29685_20130603_224641_outLine +BABEL_OP1_201_29777_20130430_071717_inLine +BABEL_OP1_201_29777_20130430_071717_outLine +BABEL_OP1_201_30653_20130501_222756_inLine +BABEL_OP1_201_30653_20130501_222756_outLine +BABEL_OP1_201_31182_20130415_005506_inLine +BABEL_OP1_201_31182_20130415_005506_outLine +BABEL_OP1_201_32872_20130429_221658_inLine +BABEL_OP1_201_32872_20130429_221658_outLine +BABEL_OP1_201_32959_20130323_033657_inLine +BABEL_OP1_201_32959_20130323_033657_outLine +BABEL_OP1_201_35885_20130630_115617_inLine +BABEL_OP1_201_35885_20130630_115617_outLine +BABEL_OP1_201_36059_20130404_104841_inLine +BABEL_OP1_201_36059_20130404_104841_outLine +BABEL_OP1_201_40740_20130429_011150_inLine +BABEL_OP1_201_40740_20130429_011150_outLine +BABEL_OP1_201_41493_20130312_081558_inLine +BABEL_OP1_201_41493_20130312_081558_outLine +BABEL_OP1_201_41920_20130403_050458_inLine +BABEL_OP1_201_41920_20130403_050458_outLine +BABEL_OP1_201_42231_20130306_074634_inLine +BABEL_OP1_201_42231_20130306_074634_outLine +BABEL_OP1_201_42231_20130306_075939_inLine +BABEL_OP1_201_42231_20130306_075939_outLine +BABEL_OP1_201_42600_20130527_055528_inLine +BABEL_OP1_201_42600_20130527_055528_outLine +BABEL_OP1_201_42600_20130527_060503_inLine +BABEL_OP1_201_42600_20130527_060503_outLine +BABEL_OP1_201_42718_20130429_001514_inLine +BABEL_OP1_201_42718_20130429_001514_outLine +BABEL_OP1_201_44420_20130603_050431_inLine +BABEL_OP1_201_44420_20130603_050431_outLine +BABEL_OP1_201_45140_20130429_085359_inLine +BABEL_OP1_201_45140_20130429_085359_outLine +BABEL_OP1_201_45777_20130528_001753_inLine +BABEL_OP1_201_45777_20130528_001753_outLine +BABEL_OP1_201_45908_20130430_062256_inLine +BABEL_OP1_201_45908_20130430_062256_outLine +BABEL_OP1_201_46702_20130308_094852_inLine +BABEL_OP1_201_46702_20130308_094852_outLine +BABEL_OP1_201_48200_20130428_230807_inLine +BABEL_OP1_201_48200_20130428_230807_outLine +BABEL_OP1_201_48399_20130426_031102_inLine +BABEL_OP1_201_48399_20130426_031102_outLine +BABEL_OP1_201_48758_20130415_035720_inLine +BABEL_OP1_201_48758_20130415_035720_outLine +BABEL_OP1_201_49812_20130429_013208_inLine +BABEL_OP1_201_49812_20130429_013208_outLine +BABEL_OP1_201_50745_20130501_232950_inLine +BABEL_OP1_201_50745_20130501_232950_outLine +BABEL_OP1_201_50962_20130529_005739_inLine +BABEL_OP1_201_50962_20130529_005739_outLine +BABEL_OP1_201_50962_20130529_013505_inLine +BABEL_OP1_201_50962_20130529_013505_outLine +BABEL_OP1_201_51417_20130429_013022_inLine +BABEL_OP1_201_51417_20130429_013022_outLine +BABEL_OP1_201_51417_20130429_015210_inLine +BABEL_OP1_201_51417_20130429_015210_outLine +BABEL_OP1_201_52614_20130503_045833_inLine +BABEL_OP1_201_52614_20130503_045833_outLine +BABEL_OP1_201_52614_20130503_051217_inLine +BABEL_OP1_201_52614_20130503_051217_outLine +BABEL_OP1_201_53072_20130430_114228_inLine +BABEL_OP1_201_53072_20130430_114228_outLine +BABEL_OP1_201_53419_20130630_034136_inLine +BABEL_OP1_201_53419_20130630_034136_outLine +BABEL_OP1_201_54040_20130701_030051_inLine +BABEL_OP1_201_54040_20130701_030051_outLine +BABEL_OP1_201_54923_20130512_032825_inLine +BABEL_OP1_201_54923_20130512_032825_outLine +BABEL_OP1_201_55013_20130503_054608_inLine +BABEL_OP1_201_55013_20130503_054608_outLine +BABEL_OP1_201_56198_20130529_062601_inLine +BABEL_OP1_201_56198_20130529_062601_outLine +BABEL_OP1_201_56370_20130406_025411_inLine +BABEL_OP1_201_56370_20130406_025411_outLine +BABEL_OP1_201_56429_20130528_053349_inLine +BABEL_OP1_201_56429_20130528_053349_outLine +BABEL_OP1_201_56684_20130430_033812_inLine +BABEL_OP1_201_56684_20130430_033812_outLine +BABEL_OP1_201_57067_20130428_015420_inLine +BABEL_OP1_201_57067_20130428_015420_outLine +BABEL_OP1_201_57654_20130604_021427_inLine +BABEL_OP1_201_57654_20130604_021427_outLine +BABEL_OP1_201_58815_20130701_072119_inLine +BABEL_OP1_201_58815_20130701_072119_outLine +BABEL_OP1_201_58850_20130529_032635_inLine +BABEL_OP1_201_58850_20130529_032635_outLine +BABEL_OP1_201_59993_20130529_074044_inLine +BABEL_OP1_201_59993_20130529_074044_outLine +BABEL_OP1_201_60836_20130603_224729_inLine +BABEL_OP1_201_60836_20130603_224729_outLine +BABEL_OP1_201_62430_20130428_025620_inLine +BABEL_OP1_201_62430_20130428_025620_outLine +BABEL_OP1_201_62852_20130303_042827_inLine +BABEL_OP1_201_62852_20130303_042827_outLine +BABEL_OP1_201_63220_20130227_082602_inLine +BABEL_OP1_201_63220_20130227_082602_outLine +BABEL_OP1_201_63523_20130501_123402_inLine +BABEL_OP1_201_63523_20130501_123402_outLine +BABEL_OP1_201_64796_20130131_073304_inLine +BABEL_OP1_201_64796_20130131_073304_outLine +BABEL_OP1_201_65298_20130427_075419_inLine +BABEL_OP1_201_65298_20130427_075419_outLine +BABEL_OP1_201_66026_20130414_055206_inLine +BABEL_OP1_201_66026_20130414_055206_outLine +BABEL_OP1_201_66837_20130325_095909_inLine +BABEL_OP1_201_66837_20130325_095909_outLine +BABEL_OP1_201_66959_20130326_091943_inLine +BABEL_OP1_201_66959_20130326_091943_outLine +BABEL_OP1_201_67373_20130528_075634_inLine +BABEL_OP1_201_67373_20130528_075634_outLine +BABEL_OP1_201_71038_20130430_020855_inLine +BABEL_OP1_201_71038_20130430_020855_outLine +BABEL_OP1_201_71067_20130228_114156_inLine +BABEL_OP1_201_71067_20130228_114156_outLine +BABEL_OP1_201_71282_20130428_011003_inLine +BABEL_OP1_201_71282_20130428_011003_outLine +BABEL_OP1_201_71333_20130527_094400_inLine +BABEL_OP1_201_71333_20130527_094400_outLine +BABEL_OP1_201_71704_20130604_005411_inLine +BABEL_OP1_201_71704_20130604_005411_outLine +BABEL_OP1_201_71780_20130528_070831_inLine +BABEL_OP1_201_71780_20130528_070831_outLine +BABEL_OP1_201_73119_20130529_084814_inLine +BABEL_OP1_201_73119_20130529_084814_outLine +BABEL_OP1_201_74111_20130415_122650_inLine +BABEL_OP1_201_74111_20130415_122650_outLine +BABEL_OP1_201_74253_20130324_094324_inLine +BABEL_OP1_201_74253_20130324_094324_outLine +BABEL_OP1_201_74455_20130429_223748_inLine +BABEL_OP1_201_74455_20130429_223748_outLine +BABEL_OP1_201_75261_20130428_072427_inLine +BABEL_OP1_201_75261_20130428_072427_outLine +BABEL_OP1_201_76372_20130406_002653_inLine +BABEL_OP1_201_76372_20130406_002653_outLine +BABEL_OP1_201_79107_20130704_020050_inLine +BABEL_OP1_201_79107_20130704_020050_outLine +BABEL_OP1_201_80655_20130429_014151_inLine +BABEL_OP1_201_80655_20130429_014151_outLine +BABEL_OP1_201_80721_20130324_011204_inLine +BABEL_OP1_201_80721_20130324_011204_outLine +BABEL_OP1_201_81213_20130604_060123_inLine +BABEL_OP1_201_81213_20130604_060123_outLine +BABEL_OP1_201_82361_20130429_234744_inLine +BABEL_OP1_201_82361_20130429_234744_outLine +BABEL_OP1_201_82966_20130702_014841_inLine +BABEL_OP1_201_82966_20130702_014841_outLine +BABEL_OP1_201_83062_20130428_080508_inLine +BABEL_OP1_201_83062_20130428_080508_outLine +BABEL_OP1_201_83062_20130428_081244_inLine +BABEL_OP1_201_83062_20130428_081244_outLine +BABEL_OP1_201_83545_20130503_013151_inLine +BABEL_OP1_201_83545_20130503_013151_outLine +BABEL_OP1_201_84061_20130528_013733_inLine +BABEL_OP1_201_84061_20130528_013733_outLine +BABEL_OP1_201_85028_20130413_093438_inLine +BABEL_OP1_201_85028_20130413_093438_outLine +BABEL_OP1_201_85248_20130429_023317_inLine +BABEL_OP1_201_85248_20130429_023317_outLine +BABEL_OP1_201_87693_20130528_083347_inLine +BABEL_OP1_201_87693_20130528_083347_outLine +BABEL_OP1_201_88686_20130306_035740_inLine +BABEL_OP1_201_88686_20130306_035740_outLine +BABEL_OP1_201_88686_20130306_040902_inLine +BABEL_OP1_201_88686_20130306_040902_outLine +BABEL_OP1_201_89330_20130630_075430_inLine +BABEL_OP1_201_89330_20130630_075430_outLine +BABEL_OP1_201_89330_20130630_075936_inLine +BABEL_OP1_201_89330_20130630_075936_outLine +BABEL_OP1_201_89372_20130312_074918_inLine +BABEL_OP1_201_89372_20130312_074918_outLine +BABEL_OP1_201_89560_20130415_124517_inLine +BABEL_OP1_201_89560_20130415_124517_outLine +BABEL_OP1_201_89665_20130603_230819_inLine +BABEL_OP1_201_89665_20130603_230819_outLine +BABEL_OP1_201_89794_20130301_115507_inLine +BABEL_OP1_201_89794_20130301_115507_outLine +BABEL_OP1_201_89794_20130303_105823_inLine +BABEL_OP1_201_89794_20130303_105823_outLine +BABEL_OP1_201_90417_20130520_032334_inLine +BABEL_OP1_201_90417_20130520_032334_outLine +BABEL_OP1_201_90935_20130604_012414_inLine +BABEL_OP1_201_90935_20130604_012414_outLine +BABEL_OP1_201_91372_20130704_010321_inLine +BABEL_OP1_201_91372_20130704_010321_outLine +BABEL_OP1_201_91581_20130313_100349_inLine +BABEL_OP1_201_91581_20130313_100349_outLine +BABEL_OP1_201_91825_20130226_051913_inLine +BABEL_OP1_201_91825_20130226_051913_outLine +BABEL_OP1_201_92096_20130406_072054_inLine +BABEL_OP1_201_92096_20130406_072054_outLine +BABEL_OP1_201_92356_20130428_015350_inLine +BABEL_OP1_201_92356_20130428_015350_outLine +BABEL_OP1_201_92757_20130604_084623_inLine +BABEL_OP1_201_92757_20130604_084623_outLine +BABEL_OP1_201_92886_20130528_023229_inLine +BABEL_OP1_201_92886_20130528_023229_outLine +BABEL_OP1_201_92941_20130527_095346_inLine +BABEL_OP1_201_92941_20130527_095346_outLine +BABEL_OP1_201_93320_20130630_082741_inLine +BABEL_OP1_201_93320_20130630_082741_outLine +BABEL_OP1_201_93475_20130530_101306_inLine +BABEL_OP1_201_93475_20130530_101306_outLine +BABEL_OP1_201_93946_20130406_073121_inLine +BABEL_OP1_201_93946_20130406_073121_outLine +BABEL_OP1_201_94044_20130429_080249_inLine +BABEL_OP1_201_94044_20130429_080249_outLine +BABEL_OP1_201_95467_20130630_224512_inLine +BABEL_OP1_201_95467_20130630_224512_outLine +BABEL_OP1_201_96088_20130429_045832_inLine +BABEL_OP1_201_96088_20130429_045832_outLine +BABEL_OP1_201_96446_20130426_023651_inLine +BABEL_OP1_201_96446_20130426_023651_outLine +BABEL_OP1_201_97097_20130502_025744_inLine +BABEL_OP1_201_97097_20130502_025744_outLine +BABEL_OP1_201_97264_20130429_083940_inLine +BABEL_OP1_201_97264_20130429_083940_outLine +BABEL_OP1_201_97988_20130320_082635_inLine +BABEL_OP1_201_97988_20130320_082635_outLine +BABEL_OP1_201_98506_20130430_082503_inLine +BABEL_OP1_201_98506_20130430_082503_outLine +BABEL_OP1_201_98678_20130403_061826_inLine +BABEL_OP1_201_98678_20130403_061826_outLine +BABEL_OP1_201_98909_20130529_002845_inLine +BABEL_OP1_201_98909_20130529_002845_outLine +BABEL_OP1_201_98909_20130529_003625_inLine +BABEL_OP1_201_98909_20130529_003625_outLine +BABEL_OP1_201_98909_20130529_004310_inLine +BABEL_OP1_201_98909_20130529_004310_outLine +BABEL_OP1_201_98909_20130529_004845_inLine +BABEL_OP1_201_98909_20130529_004845_outLine +BABEL_OP1_201_99516_20130319_061728_inLine +BABEL_OP1_201_99516_20130319_061728_outLine +BABEL_OP1_201_99516_20130320_023645_inLine +BABEL_OP1_201_99516_20130320_023645_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/dev.list b/egs/babel/s5d/conf/lists/202-swahili/dev.list new file mode 100644 index 00000000000..21ae20c66d7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/dev.list @@ -0,0 +1,142 @@ +BABEL_OP2_202_10524_20131009_200043_inLine +BABEL_OP2_202_10524_20131009_200043_outLine +BABEL_OP2_202_12635_20131101_212012_inLine +BABEL_OP2_202_12635_20131101_212012_outLine +BABEL_OP2_202_12635_20131101_213218_inLine +BABEL_OP2_202_12635_20131101_213218_outLine +BABEL_OP2_202_14814_20140205_210842_inLine +BABEL_OP2_202_14814_20140205_210842_outLine +BABEL_OP2_202_15420_20140210_010333_inLine +BABEL_OP2_202_15420_20140210_010333_outLine +BABEL_OP2_202_16249_20131202_232723_inLine +BABEL_OP2_202_16249_20131202_232723_outLine +BABEL_OP2_202_17115_20140218_210921_inLine +BABEL_OP2_202_17115_20140218_210921_outLine +BABEL_OP2_202_18766_20140218_222017_inLine +BABEL_OP2_202_18766_20140218_222017_outLine +BABEL_OP2_202_24239_20140206_191516_inLine +BABEL_OP2_202_24239_20140206_191516_outLine +BABEL_OP2_202_24290_20140219_000423_inLine +BABEL_OP2_202_24290_20140219_000423_outLine +BABEL_OP2_202_25085_20140219_185114_inLine +BABEL_OP2_202_25085_20140219_185114_outLine +BABEL_OP2_202_25242_20131203_015232_inLine +BABEL_OP2_202_25242_20131203_015232_outLine +BABEL_OP2_202_27478_20140209_224101_inLine +BABEL_OP2_202_27478_20140209_224101_outLine +BABEL_OP2_202_29633_20131009_175514_inLine +BABEL_OP2_202_29633_20131009_175514_outLine +BABEL_OP2_202_29663_20131208_035816_inLine +BABEL_OP2_202_29663_20131208_035816_outLine +BABEL_OP2_202_32287_20131207_203757_inLine +BABEL_OP2_202_32287_20131207_203757_outLine +BABEL_OP2_202_33273_20130219_205419_inLine +BABEL_OP2_202_33273_20130219_205419_outLine +BABEL_OP2_202_33273_20130219_224915_inLine +BABEL_OP2_202_33273_20130219_224915_outLine +BABEL_OP2_202_34197_20121228_201800_inLine +BABEL_OP2_202_34197_20121228_201800_outLine +BABEL_OP2_202_38588_20130228_211322_inLine +BABEL_OP2_202_38588_20130228_211322_outLine +BABEL_OP2_202_39893_20140115_023429_inLine +BABEL_OP2_202_39893_20140115_023429_outLine +BABEL_OP2_202_44309_20140220_184116_inLine +BABEL_OP2_202_44309_20140220_184116_outLine +BABEL_OP2_202_44478_20131011_041636_inLine +BABEL_OP2_202_44478_20131011_041636_outLine +BABEL_OP2_202_45459_20131012_022245_inLine +BABEL_OP2_202_45459_20131012_022245_outLine +BABEL_OP2_202_46169_20131128_183232_inLine +BABEL_OP2_202_46169_20131128_183232_outLine +BABEL_OP2_202_46169_20131128_184600_inLine +BABEL_OP2_202_46169_20131128_184600_outLine +BABEL_OP2_202_46681_20130109_191412_inLine +BABEL_OP2_202_46681_20130109_191412_outLine +BABEL_OP2_202_47405_20131215_233528_inLine +BABEL_OP2_202_47405_20131215_233528_outLine +BABEL_OP2_202_48844_20130108_190416_inLine +BABEL_OP2_202_48844_20130108_190416_outLine +BABEL_OP2_202_52265_20140123_235252_inLine +BABEL_OP2_202_52265_20140123_235252_outLine +BABEL_OP2_202_53957_20131031_012125_inLine +BABEL_OP2_202_53957_20131031_012125_outLine +BABEL_OP2_202_54046_20140121_184347_inLine +BABEL_OP2_202_54046_20140121_184347_outLine +BABEL_OP2_202_55042_20131217_033729_inLine +BABEL_OP2_202_55042_20131217_033729_outLine +BABEL_OP2_202_55106_20131215_030617_inLine +BABEL_OP2_202_55106_20131215_030617_outLine +BABEL_OP2_202_55902_20140121_230205_inLine +BABEL_OP2_202_55902_20140121_230205_outLine +BABEL_OP2_202_59091_20140130_225624_inLine +BABEL_OP2_202_59091_20140130_225624_outLine +BABEL_OP2_202_59549_20131003_203701_inLine +BABEL_OP2_202_59549_20131003_203701_outLine +BABEL_OP2_202_59549_20131003_204655_inLine +BABEL_OP2_202_59549_20131003_204655_outLine +BABEL_OP2_202_60650_20131126_234235_inLine +BABEL_OP2_202_60650_20131126_234235_outLine +BABEL_OP2_202_61440_20140128_015556_inLine +BABEL_OP2_202_61440_20140128_015556_outLine +BABEL_OP2_202_63084_20130801_014407_inLine +BABEL_OP2_202_63084_20130801_014407_outLine +BABEL_OP2_202_63084_20130801_015957_inLine +BABEL_OP2_202_63084_20130801_015957_outLine +BABEL_OP2_202_63336_20140129_004138_inLine +BABEL_OP2_202_63336_20140129_004138_outLine +BABEL_OP2_202_63484_20140128_234153_inLine +BABEL_OP2_202_63484_20140128_234153_outLine +BABEL_OP2_202_63604_20121231_193706_inLine +BABEL_OP2_202_63604_20121231_193706_outLine +BABEL_OP2_202_63787_20130108_202518_inLine +BABEL_OP2_202_63787_20130108_202518_outLine +BABEL_OP2_202_63787_20130108_203416_inLine +BABEL_OP2_202_63787_20130108_203416_outLine +BABEL_OP2_202_66177_20140201_213827_inLine +BABEL_OP2_202_66177_20140201_213827_outLine +BABEL_OP2_202_66822_20130219_222318_inLine +BABEL_OP2_202_66822_20130219_222318_outLine +BABEL_OP2_202_66822_20130219_225918_inLine +BABEL_OP2_202_66822_20130219_225918_outLine +BABEL_OP2_202_68384_20131031_003533_inLine +BABEL_OP2_202_68384_20131031_003533_outLine +BABEL_OP2_202_68924_20130924_231821_inLine +BABEL_OP2_202_68924_20130924_231821_outLine +BABEL_OP2_202_69964_20131012_170534_inLine +BABEL_OP2_202_69964_20131012_170534_outLine +BABEL_OP2_202_72040_20131002_213605_inLine +BABEL_OP2_202_72040_20131002_213605_outLine +BABEL_OP2_202_73258_20130215_190454_inLine +BABEL_OP2_202_73258_20130215_190454_outLine +BABEL_OP2_202_73301_20140226_185528_inLine +BABEL_OP2_202_73301_20140226_185528_outLine +BABEL_OP2_202_73819_20130911_163458_inLine +BABEL_OP2_202_73819_20130911_163458_outLine +BABEL_OP2_202_73819_20130927_003321_inLine +BABEL_OP2_202_73819_20130927_003321_outLine +BABEL_OP2_202_75993_20140115_210258_inLine +BABEL_OP2_202_75993_20140115_210258_outLine +BABEL_OP2_202_76756_20130417_204823_inLine +BABEL_OP2_202_76756_20130417_204823_outLine +BABEL_OP2_202_76756_20130417_210400_inLine +BABEL_OP2_202_76756_20130417_210400_outLine +BABEL_OP2_202_77990_20131007_063102_inLine +BABEL_OP2_202_77990_20131007_063102_outLine +BABEL_OP2_202_82637_20121227_193227_inLine +BABEL_OP2_202_82637_20121227_193227_outLine +BABEL_OP2_202_82637_20121227_205425_inLine +BABEL_OP2_202_82637_20121227_205425_outLine +BABEL_OP2_202_84177_20131208_021104_inLine +BABEL_OP2_202_84177_20131208_021104_outLine +BABEL_OP2_202_88260_20130227_194941_inLine +BABEL_OP2_202_88260_20130227_194941_outLine +BABEL_OP2_202_88661_20130801_192922_inLine +BABEL_OP2_202_88661_20130801_192922_outLine +BABEL_OP2_202_90080_20140319_222809_inLine +BABEL_OP2_202_90080_20140319_222809_outLine +BABEL_OP2_202_92740_20130923_235638_inLine +BABEL_OP2_202_92740_20130923_235638_outLine +BABEL_OP2_202_98311_20130109_191639_inLine +BABEL_OP2_202_98311_20130109_191639_outLine +BABEL_OP2_202_98311_20130109_195922_inLine +BABEL_OP2_202_98311_20130109_195922_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/eval.list b/egs/babel/s5d/conf/lists/202-swahili/eval.list new file mode 100644 index 00000000000..8fb4fe490bf --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/eval.list @@ -0,0 +1,963 @@ +BABEL_OP2_202_10019_20130928_235503_inLine +BABEL_OP2_202_10019_20130928_235503_outLine +BABEL_OP2_202_10416_20130215_183832_inLine +BABEL_OP2_202_10416_20130215_183832_outLine +BABEL_OP2_202_11681_20131005_155822_inLine +BABEL_OP2_202_11681_20131005_155822_outLine +BABEL_OP2_202_11723_20131130_201430_inLine +BABEL_OP2_202_11723_20131130_201430_outLine +BABEL_OP2_202_11797_20130104_222532_inLine +BABEL_OP2_202_11797_20130104_222532_outLine +BABEL_OP2_202_12220_20130312_022037_inLine +BABEL_OP2_202_12220_20130312_022037_outLine +BABEL_OP2_202_12321_20140210_015215_inLine +BABEL_OP2_202_12321_20140210_015215_outLine +BABEL_OP2_202_12606_20131010_030508_inLine +BABEL_OP2_202_12606_20131010_030508_outLine +BABEL_OP2_202_13040_20131005_180024_inLine +BABEL_OP2_202_13040_20131005_180024_outLine +BABEL_OP2_202_13909_20140207_075853_inLine +BABEL_OP2_202_13909_20140207_075853_outLine +BABEL_OP2_202_13929_20140205_042603_inLine +BABEL_OP2_202_13929_20140205_042603_outLine +BABEL_OP2_202_14137_20131219_015746_inLine +BABEL_OP2_202_14137_20131219_015746_outLine +BABEL_OP2_202_14141_20131009_061849_inLine +BABEL_OP2_202_14141_20131009_061849_outLine +BABEL_OP2_202_14179_20130926_175610_inLine +BABEL_OP2_202_14179_20130926_175610_outLine +BABEL_OP2_202_14228_20131017_195830_inLine +BABEL_OP2_202_14228_20131017_195830_outLine +BABEL_OP2_202_14229_20140208_071149_inLine +BABEL_OP2_202_14229_20140208_071149_outLine +BABEL_OP2_202_14440_20130503_203601_inLine +BABEL_OP2_202_14440_20130503_203601_outLine +BABEL_OP2_202_14440_20130503_204507_inLine +BABEL_OP2_202_14440_20130503_204507_outLine +BABEL_OP2_202_14537_20131016_202630_inLine +BABEL_OP2_202_14537_20131016_202630_outLine +BABEL_OP2_202_14725_20130104_004026_inLine +BABEL_OP2_202_14725_20130104_004026_outLine +BABEL_OP2_202_14807_20140207_040450_inLine +BABEL_OP2_202_14807_20140207_040450_outLine +BABEL_OP2_202_15902_20130108_191503_inLine +BABEL_OP2_202_15902_20130108_191503_outLine +BABEL_OP2_202_16056_20130105_232626_inLine +BABEL_OP2_202_16056_20130105_232626_outLine +BABEL_OP2_202_16056_20130105_235157_inLine +BABEL_OP2_202_16056_20130105_235157_outLine +BABEL_OP2_202_16407_20131203_231519_inLine +BABEL_OP2_202_16407_20131203_231519_outLine +BABEL_OP2_202_16467_20131101_192502_inLine +BABEL_OP2_202_16467_20131101_192502_outLine +BABEL_OP2_202_16475_20130222_200416_inLine +BABEL_OP2_202_16475_20130222_200416_outLine +BABEL_OP2_202_16787_20130220_000429_inLine +BABEL_OP2_202_16787_20130220_000429_outLine +BABEL_OP2_202_17280_20130312_211445_inLine +BABEL_OP2_202_17280_20130312_211445_outLine +BABEL_OP2_202_17440_20131018_012538_inLine +BABEL_OP2_202_17440_20131018_012538_outLine +BABEL_OP2_202_17511_20140205_051449_inLine +BABEL_OP2_202_17511_20140205_051449_outLine +BABEL_OP2_202_17751_20140207_220944_inLine +BABEL_OP2_202_17751_20140207_220944_outLine +BABEL_OP2_202_17881_20131010_011054_inLine +BABEL_OP2_202_17881_20131010_011054_outLine +BABEL_OP2_202_17923_20131004_055753_inLine +BABEL_OP2_202_17923_20131004_055753_outLine +BABEL_OP2_202_18291_20140207_215404_inLine +BABEL_OP2_202_18291_20140207_215404_outLine +BABEL_OP2_202_18380_20130213_000457_inLine +BABEL_OP2_202_18380_20130213_000457_outLine +BABEL_OP2_202_18731_20131128_043434_inLine +BABEL_OP2_202_18731_20131128_043434_outLine +BABEL_OP2_202_19440_20131129_002711_inLine +BABEL_OP2_202_19440_20131129_002711_outLine +BABEL_OP2_202_19444_20131128_200206_inLine +BABEL_OP2_202_19444_20131128_200206_outLine +BABEL_OP2_202_19461_20131129_203023_inLine +BABEL_OP2_202_19461_20131129_203023_outLine +BABEL_OP2_202_19545_20130927_190707_inLine +BABEL_OP2_202_19545_20130927_190707_outLine +BABEL_OP2_202_19621_20130930_034444_inLine +BABEL_OP2_202_19621_20130930_034444_outLine +BABEL_OP2_202_19663_20130220_221050_inLine +BABEL_OP2_202_19663_20130220_221050_outLine +BABEL_OP2_202_19699_20131127_214845_inLine +BABEL_OP2_202_19699_20131127_214845_outLine +BABEL_OP2_202_20738_20131029_183614_inLine +BABEL_OP2_202_20738_20131029_183614_outLine +BABEL_OP2_202_20896_20131220_001523_inLine +BABEL_OP2_202_20896_20131220_001523_outLine +BABEL_OP2_202_20985_20130920_011520_inLine +BABEL_OP2_202_20985_20130920_011520_outLine +BABEL_OP2_202_21029_20131004_003216_inLine +BABEL_OP2_202_21029_20131004_003216_outLine +BABEL_OP2_202_21029_20131004_003949_inLine +BABEL_OP2_202_21029_20131004_003949_outLine +BABEL_OP2_202_21393_20140209_001300_inLine +BABEL_OP2_202_21393_20140209_001300_outLine +BABEL_OP2_202_21435_20131010_024821_inLine +BABEL_OP2_202_21435_20131010_024821_outLine +BABEL_OP2_202_21794_20130219_010105_inLine +BABEL_OP2_202_21794_20130219_010105_outLine +BABEL_OP2_202_22021_20131203_222315_inLine +BABEL_OP2_202_22021_20131203_222315_outLine +BABEL_OP2_202_22021_20131203_223002_inLine +BABEL_OP2_202_22021_20131203_223002_outLine +BABEL_OP2_202_22321_20130104_190713_inLine +BABEL_OP2_202_22321_20130104_190713_outLine +BABEL_OP2_202_22591_20131212_031002_inLine +BABEL_OP2_202_22591_20131212_031002_outLine +BABEL_OP2_202_22641_20131011_032157_inLine +BABEL_OP2_202_22641_20131011_032157_outLine +BABEL_OP2_202_23260_20131011_024723_inLine +BABEL_OP2_202_23260_20131011_024723_outLine +BABEL_OP2_202_23355_20131128_005023_inLine +BABEL_OP2_202_23355_20131128_005023_outLine +BABEL_OP2_202_23505_20130107_235621_inLine +BABEL_OP2_202_23505_20130107_235621_outLine +BABEL_OP2_202_23700_20131202_222611_inLine +BABEL_OP2_202_23700_20131202_222611_outLine +BABEL_OP2_202_23731_20130930_020336_inLine +BABEL_OP2_202_23731_20130930_020336_outLine +BABEL_OP2_202_23893_20140207_234800_inLine +BABEL_OP2_202_23893_20140207_234800_outLine +BABEL_OP2_202_23983_20131012_000311_inLine +BABEL_OP2_202_23983_20131012_000311_outLine +BABEL_OP2_202_23995_20131101_185025_inLine +BABEL_OP2_202_23995_20131101_185025_outLine +BABEL_OP2_202_24044_20140108_215521_inLine +BABEL_OP2_202_24044_20140108_215521_outLine +BABEL_OP2_202_24044_20140108_220416_inLine +BABEL_OP2_202_24044_20140108_220416_outLine +BABEL_OP2_202_24221_20131120_203727_inLine +BABEL_OP2_202_24221_20131120_203727_outLine +BABEL_OP2_202_24231_20131012_020132_inLine +BABEL_OP2_202_24231_20131012_020132_outLine +BABEL_OP2_202_24270_20130111_201422_inLine +BABEL_OP2_202_24270_20130111_201422_outLine +BABEL_OP2_202_24323_20130221_203951_inLine +BABEL_OP2_202_24323_20130221_203951_outLine +BABEL_OP2_202_24924_20140207_235730_inLine +BABEL_OP2_202_24924_20140207_235730_outLine +BABEL_OP2_202_25012_20131201_002441_inLine +BABEL_OP2_202_25012_20131201_002441_outLine +BABEL_OP2_202_25961_20130103_204145_inLine +BABEL_OP2_202_25961_20130103_204145_outLine +BABEL_OP2_202_26869_20131216_035718_inLine +BABEL_OP2_202_26869_20131216_035718_outLine +BABEL_OP2_202_28190_20131101_213802_inLine +BABEL_OP2_202_28190_20131101_213802_outLine +BABEL_OP2_202_28422_20130924_010422_inLine +BABEL_OP2_202_28422_20130924_010422_outLine +BABEL_OP2_202_28775_20131004_012212_inLine +BABEL_OP2_202_28775_20131004_012212_outLine +BABEL_OP2_202_28945_20131218_230914_inLine +BABEL_OP2_202_28945_20131218_230914_outLine +BABEL_OP2_202_28945_20131218_232558_inLine +BABEL_OP2_202_28945_20131218_232558_outLine +BABEL_OP2_202_29072_20130930_024744_inLine +BABEL_OP2_202_29072_20130930_024744_outLine +BABEL_OP2_202_29135_20121228_185350_inLine +BABEL_OP2_202_29135_20121228_185350_outLine +BABEL_OP2_202_29208_20130220_203235_inLine +BABEL_OP2_202_29208_20130220_203235_outLine +BABEL_OP2_202_29352_20140209_224923_inLine +BABEL_OP2_202_29352_20140209_224923_outLine +BABEL_OP2_202_29416_20131031_232830_inLine +BABEL_OP2_202_29416_20131031_232830_outLine +BABEL_OP2_202_29643_20131016_010339_inLine +BABEL_OP2_202_29643_20131016_010339_outLine +BABEL_OP2_202_30013_20130930_190028_inLine +BABEL_OP2_202_30013_20130930_190028_outLine +BABEL_OP2_202_30058_20131009_163633_inLine +BABEL_OP2_202_30058_20131009_163633_outLine +BABEL_OP2_202_30180_20130311_225750_inLine +BABEL_OP2_202_30180_20130311_225750_outLine +BABEL_OP2_202_30250_20121228_195004_inLine +BABEL_OP2_202_30250_20121228_195004_outLine +BABEL_OP2_202_30250_20121228_195937_inLine +BABEL_OP2_202_30250_20121228_195937_outLine +BABEL_OP2_202_30497_20131010_013817_inLine +BABEL_OP2_202_30497_20131010_013817_outLine +BABEL_OP2_202_30720_20140204_222643_inLine +BABEL_OP2_202_30720_20140204_222643_outLine +BABEL_OP2_202_31074_20131211_235228_inLine +BABEL_OP2_202_31074_20131211_235228_outLine +BABEL_OP2_202_31484_20130912_200823_inLine +BABEL_OP2_202_31484_20130912_200823_outLine +BABEL_OP2_202_31668_20131128_224714_inLine +BABEL_OP2_202_31668_20131128_224714_outLine +BABEL_OP2_202_31992_20130108_181649_inLine +BABEL_OP2_202_31992_20130108_181649_outLine +BABEL_OP2_202_32244_20131012_215830_inLine +BABEL_OP2_202_32244_20131012_215830_outLine +BABEL_OP2_202_32708_20131002_201520_inLine +BABEL_OP2_202_32708_20131002_201520_outLine +BABEL_OP2_202_32727_20131218_221722_inLine +BABEL_OP2_202_32727_20131218_221722_outLine +BABEL_OP2_202_32832_20140108_220009_inLine +BABEL_OP2_202_32832_20140108_220009_outLine +BABEL_OP2_202_32861_20140109_212532_inLine +BABEL_OP2_202_32861_20140109_212532_outLine +BABEL_OP2_202_33111_20131009_060839_inLine +BABEL_OP2_202_33111_20131009_060839_outLine +BABEL_OP2_202_33355_20130107_235044_inLine +BABEL_OP2_202_33355_20130107_235044_outLine +BABEL_OP2_202_33672_20131005_004220_inLine +BABEL_OP2_202_33672_20131005_004220_outLine +BABEL_OP2_202_33933_20131129_215148_inLine +BABEL_OP2_202_33933_20131129_215148_outLine +BABEL_OP2_202_34208_20131203_234204_inLine +BABEL_OP2_202_34208_20131203_234204_outLine +BABEL_OP2_202_34328_20130212_200451_inLine +BABEL_OP2_202_34328_20130212_200451_outLine +BABEL_OP2_202_34336_20131001_231731_inLine +BABEL_OP2_202_34336_20131001_231731_outLine +BABEL_OP2_202_34477_20131001_021709_inLine +BABEL_OP2_202_34477_20131001_021709_outLine +BABEL_OP2_202_34679_20131004_003331_inLine +BABEL_OP2_202_34679_20131004_003331_outLine +BABEL_OP2_202_34899_20140222_024004_inLine +BABEL_OP2_202_34899_20140222_024004_outLine +BABEL_OP2_202_35420_20131129_013142_inLine +BABEL_OP2_202_35420_20131129_013142_outLine +BABEL_OP2_202_35583_20131220_024235_inLine +BABEL_OP2_202_35583_20131220_024235_outLine +BABEL_OP2_202_36039_20131009_024333_inLine +BABEL_OP2_202_36039_20131009_024333_outLine +BABEL_OP2_202_36059_20131009_055135_inLine +BABEL_OP2_202_36059_20131009_055135_outLine +BABEL_OP2_202_36341_20121228_171758_inLine +BABEL_OP2_202_36341_20121228_171758_outLine +BABEL_OP2_202_36505_20131029_215901_inLine +BABEL_OP2_202_36505_20131029_215901_outLine +BABEL_OP2_202_36632_20131216_000901_inLine +BABEL_OP2_202_36632_20131216_000901_outLine +BABEL_OP2_202_36900_20140114_232052_inLine +BABEL_OP2_202_36900_20140114_232052_outLine +BABEL_OP2_202_37007_20140115_001317_inLine +BABEL_OP2_202_37007_20140115_001317_outLine +BABEL_OP2_202_37228_20140114_192926_inLine +BABEL_OP2_202_37228_20140114_192926_outLine +BABEL_OP2_202_37594_20131130_000553_inLine +BABEL_OP2_202_37594_20131130_000553_outLine +BABEL_OP2_202_38125_20131012_003034_inLine +BABEL_OP2_202_38125_20131012_003034_outLine +BABEL_OP2_202_38340_20131002_224306_inLine +BABEL_OP2_202_38340_20131002_224306_outLine +BABEL_OP2_202_38431_20140115_013439_inLine +BABEL_OP2_202_38431_20140115_013439_outLine +BABEL_OP2_202_38664_20130315_010258_inLine +BABEL_OP2_202_38664_20130315_010258_outLine +BABEL_OP2_202_38741_20131003_014930_inLine +BABEL_OP2_202_38741_20131003_014930_outLine +BABEL_OP2_202_38878_20130422_220726_inLine +BABEL_OP2_202_38878_20130422_220726_outLine +BABEL_OP2_202_38979_20131129_213001_inLine +BABEL_OP2_202_38979_20131129_213001_outLine +BABEL_OP2_202_39059_20140114_231114_inLine +BABEL_OP2_202_39059_20140114_231114_outLine +BABEL_OP2_202_39099_20131012_024904_inLine +BABEL_OP2_202_39099_20131012_024904_outLine +BABEL_OP2_202_39277_20131127_183847_inLine +BABEL_OP2_202_39277_20131127_183847_outLine +BABEL_OP2_202_39848_20130218_212103_inLine +BABEL_OP2_202_39848_20130218_212103_outLine +BABEL_OP2_202_39927_20131220_231538_inLine +BABEL_OP2_202_39927_20131220_231538_outLine +BABEL_OP2_202_40092_20131216_000009_inLine +BABEL_OP2_202_40092_20131216_000009_outLine +BABEL_OP2_202_40196_20140115_000031_inLine +BABEL_OP2_202_40196_20140115_000031_outLine +BABEL_OP2_202_40713_20131002_202108_inLine +BABEL_OP2_202_40713_20131002_202108_outLine +BABEL_OP2_202_41097_20130425_211559_inLine +BABEL_OP2_202_41097_20130425_211559_outLine +BABEL_OP2_202_41100_20130109_000954_inLine +BABEL_OP2_202_41100_20130109_000954_outLine +BABEL_OP2_202_41109_20140114_212001_inLine +BABEL_OP2_202_41109_20140114_212001_outLine +BABEL_OP2_202_41272_20131011_014240_inLine +BABEL_OP2_202_41272_20131011_014240_outLine +BABEL_OP2_202_41400_20140221_225008_inLine +BABEL_OP2_202_41400_20140221_225008_outLine +BABEL_OP2_202_41682_20131128_174746_inLine +BABEL_OP2_202_41682_20131128_174746_outLine +BABEL_OP2_202_41685_20131128_221337_inLine +BABEL_OP2_202_41685_20131128_221337_outLine +BABEL_OP2_202_41692_20131013_044515_inLine +BABEL_OP2_202_41692_20131013_044515_outLine +BABEL_OP2_202_41741_20130108_182526_inLine +BABEL_OP2_202_41741_20130108_182526_outLine +BABEL_OP2_202_41920_20130103_233550_inLine +BABEL_OP2_202_41920_20130103_233550_outLine +BABEL_OP2_202_42497_20131003_225108_inLine +BABEL_OP2_202_42497_20131003_225108_outLine +BABEL_OP2_202_42848_20131015_223830_inLine +BABEL_OP2_202_42848_20131015_223830_outLine +BABEL_OP2_202_42883_20131016_211736_inLine +BABEL_OP2_202_42883_20131016_211736_outLine +BABEL_OP2_202_43074_20140227_192107_inLine +BABEL_OP2_202_43074_20140227_192107_outLine +BABEL_OP2_202_43368_20130930_024429_inLine +BABEL_OP2_202_43368_20130930_024429_outLine +BABEL_OP2_202_43388_20130215_192049_inLine +BABEL_OP2_202_43388_20130215_192049_outLine +BABEL_OP2_202_43588_20131011_193321_inLine +BABEL_OP2_202_43588_20131011_193321_outLine +BABEL_OP2_202_43789_20130213_194416_inLine +BABEL_OP2_202_43789_20130213_194416_outLine +BABEL_OP2_202_44114_20140221_202130_inLine +BABEL_OP2_202_44114_20140221_202130_outLine +BABEL_OP2_202_44619_20131003_023727_inLine +BABEL_OP2_202_44619_20131003_023727_outLine +BABEL_OP2_202_44678_20131128_010554_inLine +BABEL_OP2_202_44678_20131128_010554_outLine +BABEL_OP2_202_44681_20131218_213752_inLine +BABEL_OP2_202_44681_20131218_213752_outLine +BABEL_OP2_202_44681_20131218_214913_inLine +BABEL_OP2_202_44681_20131218_214913_outLine +BABEL_OP2_202_45121_20131014_040623_inLine +BABEL_OP2_202_45121_20131014_040623_outLine +BABEL_OP2_202_45374_20131217_005647_inLine +BABEL_OP2_202_45374_20131217_005647_outLine +BABEL_OP2_202_45560_20130105_195053_inLine +BABEL_OP2_202_45560_20130105_195053_outLine +BABEL_OP2_202_45642_20130109_010614_inLine +BABEL_OP2_202_45642_20130109_010614_outLine +BABEL_OP2_202_45697_20131029_204657_inLine +BABEL_OP2_202_45697_20131029_204657_outLine +BABEL_OP2_202_45699_20131202_203725_inLine +BABEL_OP2_202_45699_20131202_203725_outLine +BABEL_OP2_202_45770_20130105_212856_inLine +BABEL_OP2_202_45770_20130105_212856_outLine +BABEL_OP2_202_45777_20130930_215344_inLine +BABEL_OP2_202_45777_20130930_215344_outLine +BABEL_OP2_202_45777_20130930_220539_inLine +BABEL_OP2_202_45777_20130930_220539_outLine +BABEL_OP2_202_45851_20131011_210832_inLine +BABEL_OP2_202_45851_20131011_210832_outLine +BABEL_OP2_202_46008_20131017_180431_inLine +BABEL_OP2_202_46008_20131017_180431_outLine +BABEL_OP2_202_46333_20140225_002629_inLine +BABEL_OP2_202_46333_20140225_002629_outLine +BABEL_OP2_202_46535_20131202_194843_inLine +BABEL_OP2_202_46535_20131202_194843_outLine +BABEL_OP2_202_46712_20140130_195615_inLine +BABEL_OP2_202_46712_20140130_195615_outLine +BABEL_OP2_202_46905_20131130_194813_inLine +BABEL_OP2_202_46905_20131130_194813_outLine +BABEL_OP2_202_46974_20130729_181547_inLine +BABEL_OP2_202_46974_20130729_181547_outLine +BABEL_OP2_202_47215_20131005_012123_inLine +BABEL_OP2_202_47215_20131005_012123_outLine +BABEL_OP2_202_47283_20131003_211344_inLine +BABEL_OP2_202_47283_20131003_211344_outLine +BABEL_OP2_202_47959_20131004_035713_inLine +BABEL_OP2_202_47959_20131004_035713_outLine +BABEL_OP2_202_48016_20140220_204253_inLine +BABEL_OP2_202_48016_20140220_204253_outLine +BABEL_OP2_202_48024_20131202_232637_inLine +BABEL_OP2_202_48024_20131202_232637_outLine +BABEL_OP2_202_48610_20130107_203818_inLine +BABEL_OP2_202_48610_20130107_203818_outLine +BABEL_OP2_202_48663_20140222_012910_inLine +BABEL_OP2_202_48663_20140222_012910_outLine +BABEL_OP2_202_48758_20131009_051338_inLine +BABEL_OP2_202_48758_20131009_051338_outLine +BABEL_OP2_202_48789_20130212_210506_inLine +BABEL_OP2_202_48789_20130212_210506_outLine +BABEL_OP2_202_49197_20130222_010455_inLine +BABEL_OP2_202_49197_20130222_010455_outLine +BABEL_OP2_202_49637_20130103_203801_inLine +BABEL_OP2_202_49637_20130103_203801_outLine +BABEL_OP2_202_49767_20140221_192835_inLine +BABEL_OP2_202_49767_20140221_192835_outLine +BABEL_OP2_202_50090_20130425_175113_inLine +BABEL_OP2_202_50090_20130425_175113_outLine +BABEL_OP2_202_50175_20131010_020435_inLine +BABEL_OP2_202_50175_20131010_020435_outLine +BABEL_OP2_202_50565_20121228_195128_inLine +BABEL_OP2_202_50565_20121228_195128_outLine +BABEL_OP2_202_50601_20130911_001026_inLine +BABEL_OP2_202_50601_20130911_001026_outLine +BABEL_OP2_202_50630_20130926_021713_inLine +BABEL_OP2_202_50630_20130926_021713_outLine +BABEL_OP2_202_50745_20131010_001443_inLine +BABEL_OP2_202_50745_20131010_001443_outLine +BABEL_OP2_202_50779_20130911_004921_inLine +BABEL_OP2_202_50779_20130911_004921_outLine +BABEL_OP2_202_50958_20130219_215809_inLine +BABEL_OP2_202_50958_20130219_215809_outLine +BABEL_OP2_202_50962_20131002_203346_inLine +BABEL_OP2_202_50962_20131002_203346_outLine +BABEL_OP2_202_51414_20131012_225839_inLine +BABEL_OP2_202_51414_20131012_225839_outLine +BABEL_OP2_202_51530_20131012_011011_inLine +BABEL_OP2_202_51530_20131012_011011_outLine +BABEL_OP2_202_51701_20140123_205529_inLine +BABEL_OP2_202_51701_20140123_205529_outLine +BABEL_OP2_202_52058_20131128_233329_inLine +BABEL_OP2_202_52058_20131128_233329_outLine +BABEL_OP2_202_52070_20140124_231122_inLine +BABEL_OP2_202_52070_20140124_231122_outLine +BABEL_OP2_202_52222_20131126_183055_inLine +BABEL_OP2_202_52222_20131126_183055_outLine +BABEL_OP2_202_52301_20140122_233501_inLine +BABEL_OP2_202_52301_20140122_233501_outLine +BABEL_OP2_202_52447_20131014_001157_inLine +BABEL_OP2_202_52447_20131014_001157_outLine +BABEL_OP2_202_52483_20140123_011106_inLine +BABEL_OP2_202_52483_20140123_011106_outLine +BABEL_OP2_202_52614_20131011_162942_inLine +BABEL_OP2_202_52614_20131011_162942_outLine +BABEL_OP2_202_52717_20130107_190619_inLine +BABEL_OP2_202_52717_20130107_190619_outLine +BABEL_OP2_202_52725_20131009_155625_inLine +BABEL_OP2_202_52725_20131009_155625_outLine +BABEL_OP2_202_52804_20131006_224625_inLine +BABEL_OP2_202_52804_20131006_224625_outLine +BABEL_OP2_202_53068_20131130_195134_inLine +BABEL_OP2_202_53068_20131130_195134_outLine +BABEL_OP2_202_53206_20131129_004718_inLine +BABEL_OP2_202_53206_20131129_004718_outLine +BABEL_OP2_202_53419_20140123_230101_inLine +BABEL_OP2_202_53419_20140123_230101_outLine +BABEL_OP2_202_53441_20131207_225909_inLine +BABEL_OP2_202_53441_20131207_225909_outLine +BABEL_OP2_202_53492_20131010_185348_inLine +BABEL_OP2_202_53492_20131010_185348_outLine +BABEL_OP2_202_53665_20131010_234640_inLine +BABEL_OP2_202_53665_20131010_234640_outLine +BABEL_OP2_202_54160_20130102_205447_inLine +BABEL_OP2_202_54160_20130102_205447_outLine +BABEL_OP2_202_54162_20130318_213750_inLine +BABEL_OP2_202_54162_20130318_213750_outLine +BABEL_OP2_202_54594_20131129_200245_inLine +BABEL_OP2_202_54594_20131129_200245_outLine +BABEL_OP2_202_54735_20140125_031824_inLine +BABEL_OP2_202_54735_20140125_031824_outLine +BABEL_OP2_202_54923_20140125_010451_inLine +BABEL_OP2_202_54923_20140125_010451_outLine +BABEL_OP2_202_55013_20131011_155605_inLine +BABEL_OP2_202_55013_20131011_155605_outLine +BABEL_OP2_202_55742_20140115_203307_inLine +BABEL_OP2_202_55742_20140115_203307_outLine +BABEL_OP2_202_55818_20130108_192939_inLine +BABEL_OP2_202_55818_20130108_192939_outLine +BABEL_OP2_202_56198_20131003_013538_inLine +BABEL_OP2_202_56198_20131003_013538_outLine +BABEL_OP2_202_56213_20140122_225210_inLine +BABEL_OP2_202_56213_20140122_225210_outLine +BABEL_OP2_202_56370_20130104_200151_inLine +BABEL_OP2_202_56370_20130104_200151_outLine +BABEL_OP2_202_56523_20130222_213416_inLine +BABEL_OP2_202_56523_20130222_213416_outLine +BABEL_OP2_202_56677_20140121_210326_inLine +BABEL_OP2_202_56677_20140121_210326_outLine +BABEL_OP2_202_56743_20130225_194854_inLine +BABEL_OP2_202_56743_20130225_194854_outLine +BABEL_OP2_202_56826_20131126_175456_inLine +BABEL_OP2_202_56826_20131126_175456_outLine +BABEL_OP2_202_57067_20140125_030302_inLine +BABEL_OP2_202_57067_20140125_030302_outLine +BABEL_OP2_202_57093_20131001_005041_inLine +BABEL_OP2_202_57093_20131001_005041_outLine +BABEL_OP2_202_57548_20130928_000636_inLine +BABEL_OP2_202_57548_20130928_000636_outLine +BABEL_OP2_202_57609_20130110_214448_inLine +BABEL_OP2_202_57609_20130110_214448_outLine +BABEL_OP2_202_57650_20131031_222920_inLine +BABEL_OP2_202_57650_20131031_222920_outLine +BABEL_OP2_202_57650_20131031_224035_inLine +BABEL_OP2_202_57650_20131031_224035_outLine +BABEL_OP2_202_57678_20140130_211104_inLine +BABEL_OP2_202_57678_20140130_211104_outLine +BABEL_OP2_202_57919_20131204_003234_inLine +BABEL_OP2_202_57919_20131204_003234_outLine +BABEL_OP2_202_58061_20131128_202231_inLine +BABEL_OP2_202_58061_20131128_202231_outLine +BABEL_OP2_202_58815_20131029_230825_inLine +BABEL_OP2_202_58815_20131029_230825_outLine +BABEL_OP2_202_58850_20130222_005155_inLine +BABEL_OP2_202_58850_20130222_005155_outLine +BABEL_OP2_202_58926_20131005_000157_inLine +BABEL_OP2_202_58926_20131005_000157_outLine +BABEL_OP2_202_59039_20131130_232650_inLine +BABEL_OP2_202_59039_20131130_232650_outLine +BABEL_OP2_202_59307_20131009_070225_inLine +BABEL_OP2_202_59307_20131009_070225_outLine +BABEL_OP2_202_59898_20130103_222102_inLine +BABEL_OP2_202_59898_20130103_222102_outLine +BABEL_OP2_202_60026_20130105_231529_inLine +BABEL_OP2_202_60026_20130105_231529_outLine +BABEL_OP2_202_60026_20130105_232525_inLine +BABEL_OP2_202_60026_20130105_232525_outLine +BABEL_OP2_202_60115_20130924_002929_inLine +BABEL_OP2_202_60115_20130924_002929_outLine +BABEL_OP2_202_60310_20131030_231919_inLine +BABEL_OP2_202_60310_20131030_231919_outLine +BABEL_OP2_202_60498_20131012_205044_inLine +BABEL_OP2_202_60498_20131012_205044_outLine +BABEL_OP2_202_60538_20130107_185811_inLine +BABEL_OP2_202_60538_20130107_185811_outLine +BABEL_OP2_202_60626_20131003_025140_inLine +BABEL_OP2_202_60626_20131003_025140_outLine +BABEL_OP2_202_60661_20131004_193207_inLine +BABEL_OP2_202_60661_20131004_193207_outLine +BABEL_OP2_202_60836_20131006_231246_inLine +BABEL_OP2_202_60836_20131006_231246_outLine +BABEL_OP2_202_61348_20130423_213656_inLine +BABEL_OP2_202_61348_20130423_213656_outLine +BABEL_OP2_202_61831_20140129_223655_inLine +BABEL_OP2_202_61831_20140129_223655_outLine +BABEL_OP2_202_61963_20140130_004249_inLine +BABEL_OP2_202_61963_20140130_004249_outLine +BABEL_OP2_202_62014_20130422_215514_inLine +BABEL_OP2_202_62014_20130422_215514_outLine +BABEL_OP2_202_62155_20131010_030043_inLine +BABEL_OP2_202_62155_20131010_030043_outLine +BABEL_OP2_202_62177_20131101_224431_inLine +BABEL_OP2_202_62177_20131101_224431_outLine +BABEL_OP2_202_62200_20130221_201143_inLine +BABEL_OP2_202_62200_20130221_201143_outLine +BABEL_OP2_202_62289_20131012_021114_inLine +BABEL_OP2_202_62289_20131012_021114_outLine +BABEL_OP2_202_62434_20130104_004333_inLine +BABEL_OP2_202_62434_20130104_004333_outLine +BABEL_OP2_202_62434_20130104_005350_inLine +BABEL_OP2_202_62434_20130104_005350_outLine +BABEL_OP2_202_62545_20131127_195440_inLine +BABEL_OP2_202_62545_20131127_195440_outLine +BABEL_OP2_202_62734_20130930_165147_inLine +BABEL_OP2_202_62734_20130930_165147_outLine +BABEL_OP2_202_62835_20130212_190421_inLine +BABEL_OP2_202_62835_20130212_190421_outLine +BABEL_OP2_202_63481_20121229_212430_inLine +BABEL_OP2_202_63481_20121229_212430_outLine +BABEL_OP2_202_63511_20140202_013550_inLine +BABEL_OP2_202_63511_20140202_013550_outLine +BABEL_OP2_202_63730_20140128_213539_inLine +BABEL_OP2_202_63730_20140128_213539_outLine +BABEL_OP2_202_63906_20140127_191132_inLine +BABEL_OP2_202_63906_20140127_191132_outLine +BABEL_OP2_202_63938_20140129_203743_inLine +BABEL_OP2_202_63938_20140129_203743_outLine +BABEL_OP2_202_64350_20130109_214951_inLine +BABEL_OP2_202_64350_20130109_214951_outLine +BABEL_OP2_202_64350_20130109_234646_inLine +BABEL_OP2_202_64350_20130109_234646_outLine +BABEL_OP2_202_64350_20130110_000149_inLine +BABEL_OP2_202_64350_20130110_000149_outLine +BABEL_OP2_202_64638_20130923_221504_inLine +BABEL_OP2_202_64638_20130923_221504_outLine +BABEL_OP2_202_64768_20130930_231452_inLine +BABEL_OP2_202_64768_20130930_231452_outLine +BABEL_OP2_202_64902_20131010_043148_inLine +BABEL_OP2_202_64902_20131010_043148_outLine +BABEL_OP2_202_65298_20131031_213621_inLine +BABEL_OP2_202_65298_20131031_213621_outLine +BABEL_OP2_202_65477_20130219_211638_inLine +BABEL_OP2_202_65477_20130219_211638_outLine +BABEL_OP2_202_65639_20131128_205641_inLine +BABEL_OP2_202_65639_20131128_205641_outLine +BABEL_OP2_202_65640_20131010_034809_inLine +BABEL_OP2_202_65640_20131010_034809_outLine +BABEL_OP2_202_65882_20131004_204102_inLine +BABEL_OP2_202_65882_20131004_204102_outLine +BABEL_OP2_202_65882_20131004_205447_inLine +BABEL_OP2_202_65882_20131004_205447_outLine +BABEL_OP2_202_66026_20140226_195114_inLine +BABEL_OP2_202_66026_20140226_195114_outLine +BABEL_OP2_202_66472_20130214_204424_inLine +BABEL_OP2_202_66472_20130214_204424_outLine +BABEL_OP2_202_66519_20130930_020855_inLine +BABEL_OP2_202_66519_20130930_020855_outLine +BABEL_OP2_202_66837_20131030_220432_inLine +BABEL_OP2_202_66837_20131030_220432_outLine +BABEL_OP2_202_66959_20131018_194733_inLine +BABEL_OP2_202_66959_20131018_194733_outLine +BABEL_OP2_202_66967_20130103_220521_inLine +BABEL_OP2_202_66967_20130103_220521_outLine +BABEL_OP2_202_67085_20131016_193800_inLine +BABEL_OP2_202_67085_20131016_193800_outLine +BABEL_OP2_202_67152_20131129_224301_inLine +BABEL_OP2_202_67152_20131129_224301_outLine +BABEL_OP2_202_67373_20131004_205550_inLine +BABEL_OP2_202_67373_20131004_205550_outLine +BABEL_OP2_202_67389_20140131_211249_inLine +BABEL_OP2_202_67389_20140131_211249_outLine +BABEL_OP2_202_67794_20131003_192439_inLine +BABEL_OP2_202_67794_20131003_192439_outLine +BABEL_OP2_202_67842_20131003_222534_inLine +BABEL_OP2_202_67842_20131003_222534_outLine +BABEL_OP2_202_67999_20140201_200014_inLine +BABEL_OP2_202_67999_20140201_200014_outLine +BABEL_OP2_202_68059_20140125_192613_inLine +BABEL_OP2_202_68059_20140125_192613_outLine +BABEL_OP2_202_68182_20131031_193507_inLine +BABEL_OP2_202_68182_20131031_193507_outLine +BABEL_OP2_202_68306_20130729_224017_inLine +BABEL_OP2_202_68306_20130729_224017_outLine +BABEL_OP2_202_68627_20130219_230718_inLine +BABEL_OP2_202_68627_20130219_230718_outLine +BABEL_OP2_202_68908_20131130_010731_inLine +BABEL_OP2_202_68908_20131130_010731_outLine +BABEL_OP2_202_69090_20131127_230541_inLine +BABEL_OP2_202_69090_20131127_230541_outLine +BABEL_OP2_202_69107_20130927_174817_inLine +BABEL_OP2_202_69107_20130927_174817_outLine +BABEL_OP2_202_69574_20131005_172205_inLine +BABEL_OP2_202_69574_20131005_172205_outLine +BABEL_OP2_202_69633_20130801_191800_inLine +BABEL_OP2_202_69633_20130801_191800_outLine +BABEL_OP2_202_69885_20131011_031936_inLine +BABEL_OP2_202_69885_20131011_031936_outLine +BABEL_OP2_202_69972_20140129_230607_inLine +BABEL_OP2_202_69972_20140129_230607_outLine +BABEL_OP2_202_69992_20130108_193548_inLine +BABEL_OP2_202_69992_20130108_193548_outLine +BABEL_OP2_202_70257_20131130_202722_inLine +BABEL_OP2_202_70257_20131130_202722_outLine +BABEL_OP2_202_70526_20131012_045553_inLine +BABEL_OP2_202_70526_20131012_045553_outLine +BABEL_OP2_202_70716_20131012_032544_inLine +BABEL_OP2_202_70716_20131012_032544_outLine +BABEL_OP2_202_71038_20140306_165543_inLine +BABEL_OP2_202_71038_20140306_165543_outLine +BABEL_OP2_202_71047_20140303_233000_inLine +BABEL_OP2_202_71047_20140303_233000_outLine +BABEL_OP2_202_71189_20131010_061651_inLine +BABEL_OP2_202_71189_20131010_061651_outLine +BABEL_OP2_202_71282_20131030_163454_inLine +BABEL_OP2_202_71282_20131030_163454_outLine +BABEL_OP2_202_71419_20131130_200448_inLine +BABEL_OP2_202_71419_20131130_200448_outLine +BABEL_OP2_202_71460_20131218_192638_inLine +BABEL_OP2_202_71460_20131218_192638_outLine +BABEL_OP2_202_71559_20140311_230424_inLine +BABEL_OP2_202_71559_20140311_230424_outLine +BABEL_OP2_202_71704_20130109_185345_inLine +BABEL_OP2_202_71704_20130109_185345_outLine +BABEL_OP2_202_71780_20131003_034729_inLine +BABEL_OP2_202_71780_20131003_034729_outLine +BABEL_OP2_202_72654_20130929_175728_inLine +BABEL_OP2_202_72654_20130929_175728_outLine +BABEL_OP2_202_72733_20131018_230438_inLine +BABEL_OP2_202_72733_20131018_230438_outLine +BABEL_OP2_202_73042_20130109_205002_inLine +BABEL_OP2_202_73042_20130109_205002_outLine +BABEL_OP2_202_73072_20130105_235040_inLine +BABEL_OP2_202_73072_20130105_235040_outLine +BABEL_OP2_202_73485_20131011_183811_inLine +BABEL_OP2_202_73485_20131011_183811_outLine +BABEL_OP2_202_73485_20131011_184857_inLine +BABEL_OP2_202_73485_20131011_184857_outLine +BABEL_OP2_202_73757_20130319_022121_inLine +BABEL_OP2_202_73757_20130319_022121_outLine +BABEL_OP2_202_73964_20131011_010642_inLine +BABEL_OP2_202_73964_20131011_010642_outLine +BABEL_OP2_202_74111_20131018_223020_inLine +BABEL_OP2_202_74111_20131018_223020_outLine +BABEL_OP2_202_74455_20131201_010424_inLine +BABEL_OP2_202_74455_20131201_010424_outLine +BABEL_OP2_202_74641_20130927_171309_inLine +BABEL_OP2_202_74641_20130927_171309_outLine +BABEL_OP2_202_74728_20131011_175203_inLine +BABEL_OP2_202_74728_20131011_175203_outLine +BABEL_OP2_202_74886_20130104_222216_inLine +BABEL_OP2_202_74886_20130104_222216_outLine +BABEL_OP2_202_75365_20131017_020033_inLine +BABEL_OP2_202_75365_20131017_020033_outLine +BABEL_OP2_202_75465_20140227_020909_inLine +BABEL_OP2_202_75465_20140227_020909_outLine +BABEL_OP2_202_75869_20131010_054546_inLine +BABEL_OP2_202_75869_20131010_054546_outLine +BABEL_OP2_202_75981_20131017_182656_inLine +BABEL_OP2_202_75981_20131017_182656_outLine +BABEL_OP2_202_76155_20130214_225045_inLine +BABEL_OP2_202_76155_20130214_225045_outLine +BABEL_OP2_202_76155_20130214_231141_inLine +BABEL_OP2_202_76155_20130214_231141_outLine +BABEL_OP2_202_76155_20130214_233751_inLine +BABEL_OP2_202_76155_20130214_233751_outLine +BABEL_OP2_202_76218_20130215_211824_inLine +BABEL_OP2_202_76218_20130215_211824_outLine +BABEL_OP2_202_76372_20131010_032300_inLine +BABEL_OP2_202_76372_20131010_032300_outLine +BABEL_OP2_202_76773_20131004_211703_inLine +BABEL_OP2_202_76773_20131004_211703_outLine +BABEL_OP2_202_77139_20121228_190704_inLine +BABEL_OP2_202_77139_20121228_190704_outLine +BABEL_OP2_202_77730_20130108_005804_inLine +BABEL_OP2_202_77730_20130108_005804_outLine +BABEL_OP2_202_78116_20130730_032152_inLine +BABEL_OP2_202_78116_20130730_032152_outLine +BABEL_OP2_202_78161_20131128_013256_inLine +BABEL_OP2_202_78161_20131128_013256_outLine +BABEL_OP2_202_78254_20140315_200641_inLine +BABEL_OP2_202_79131_20131011_031533_inLine +BABEL_OP2_202_79131_20131011_031533_outLine +BABEL_OP2_202_79167_20130801_173136_inLine +BABEL_OP2_202_79167_20130801_173136_outLine +BABEL_OP2_202_79505_20140304_011515_inLine +BABEL_OP2_202_79505_20140304_011515_outLine +BABEL_OP2_202_79590_20130214_233631_inLine +BABEL_OP2_202_79590_20130214_233631_outLine +BABEL_OP2_202_79820_20131002_224612_inLine +BABEL_OP2_202_79820_20131002_224612_outLine +BABEL_OP2_202_79858_20131007_202121_inLine +BABEL_OP2_202_79858_20131007_202121_outLine +BABEL_OP2_202_80241_20131208_061751_inLine +BABEL_OP2_202_80241_20131208_061751_outLine +BABEL_OP2_202_80577_20131101_002029_inLine +BABEL_OP2_202_80577_20131101_002029_outLine +BABEL_OP2_202_80721_20131018_215413_inLine +BABEL_OP2_202_80721_20131018_215413_outLine +BABEL_OP2_202_81424_20130731_174939_inLine +BABEL_OP2_202_81424_20130731_174939_outLine +BABEL_OP2_202_81427_20130930_033601_inLine +BABEL_OP2_202_81427_20130930_033601_outLine +BABEL_OP2_202_81427_20130930_034540_inLine +BABEL_OP2_202_81427_20130930_034540_outLine +BABEL_OP2_202_81581_20131130_234413_inLine +BABEL_OP2_202_81581_20131130_234413_outLine +BABEL_OP2_202_81674_20131129_201042_inLine +BABEL_OP2_202_81674_20131129_201042_outLine +BABEL_OP2_202_81810_20130731_202723_inLine +BABEL_OP2_202_81810_20130731_202723_outLine +BABEL_OP2_202_81854_20131016_235937_inLine +BABEL_OP2_202_81854_20131016_235937_outLine +BABEL_OP2_202_82089_20130213_201744_inLine +BABEL_OP2_202_82089_20130213_201744_outLine +BABEL_OP2_202_82140_20130411_203406_inLine +BABEL_OP2_202_82140_20130411_203406_outLine +BABEL_OP2_202_82145_20131009_152735_inLine +BABEL_OP2_202_82145_20131009_152735_outLine +BABEL_OP2_202_82145_20131010_055122_inLine +BABEL_OP2_202_82145_20131010_055122_outLine +BABEL_OP2_202_82863_20130213_003624_inLine +BABEL_OP2_202_82863_20130213_003624_outLine +BABEL_OP2_202_82979_20131002_205506_inLine +BABEL_OP2_202_82979_20131002_205506_outLine +BABEL_OP2_202_83062_20131129_191922_inLine +BABEL_OP2_202_83062_20131129_191922_outLine +BABEL_OP2_202_83935_20130801_192224_inLine +BABEL_OP2_202_83935_20130801_192224_outLine +BABEL_OP2_202_83935_20130801_194402_inLine +BABEL_OP2_202_83935_20130801_194402_outLine +BABEL_OP2_202_84061_20130929_235409_inLine +BABEL_OP2_202_84061_20130929_235409_outLine +BABEL_OP2_202_84079_20131208_050702_inLine +BABEL_OP2_202_84079_20131208_050702_outLine +BABEL_OP2_202_84125_20121222_184258_inLine +BABEL_OP2_202_84125_20121222_184258_outLine +BABEL_OP2_202_84125_20121222_185218_inLine +BABEL_OP2_202_84125_20121222_185218_outLine +BABEL_OP2_202_84327_20130730_193322_inLine +BABEL_OP2_202_84327_20130730_193322_outLine +BABEL_OP2_202_84605_20131003_053508_inLine +BABEL_OP2_202_84605_20131003_053508_outLine +BABEL_OP2_202_84737_20131031_211648_inLine +BABEL_OP2_202_84737_20131031_211648_outLine +BABEL_OP2_202_84815_20131018_211832_inLine +BABEL_OP2_202_84815_20131018_211832_outLine +BABEL_OP2_202_84823_20131031_020506_inLine +BABEL_OP2_202_84823_20131031_020506_outLine +BABEL_OP2_202_85048_20130911_014859_inLine +BABEL_OP2_202_85048_20130911_014859_outLine +BABEL_OP2_202_85179_20131101_192951_inLine +BABEL_OP2_202_85179_20131101_192951_outLine +BABEL_OP2_202_85248_20131030_022406_inLine +BABEL_OP2_202_85248_20131030_022406_outLine +BABEL_OP2_202_85322_20130108_190627_inLine +BABEL_OP2_202_85322_20130108_190627_outLine +BABEL_OP2_202_85322_20130108_191905_inLine +BABEL_OP2_202_85322_20130108_191905_outLine +BABEL_OP2_202_85325_20131011_181734_inLine +BABEL_OP2_202_85325_20131011_181734_outLine +BABEL_OP2_202_85439_20131012_024821_inLine +BABEL_OP2_202_85439_20131012_024821_outLine +BABEL_OP2_202_86467_20121231_205911_inLine +BABEL_OP2_202_86467_20121231_205911_outLine +BABEL_OP2_202_86472_20130803_213443_inLine +BABEL_OP2_202_86472_20130803_213443_outLine +BABEL_OP2_202_86826_20131015_204931_inLine +BABEL_OP2_202_86826_20131015_204931_outLine +BABEL_OP2_202_86830_20131031_221935_inLine +BABEL_OP2_202_86830_20131031_221935_outLine +BABEL_OP2_202_87074_20140114_001320_inLine +BABEL_OP2_202_87074_20140114_001320_outLine +BABEL_OP2_202_87470_20130225_202639_inLine +BABEL_OP2_202_87470_20130225_202639_outLine +BABEL_OP2_202_87545_20131012_025318_inLine +BABEL_OP2_202_87545_20131012_025318_outLine +BABEL_OP2_202_87866_20131215_203616_inLine +BABEL_OP2_202_87866_20131215_203616_outLine +BABEL_OP2_202_87871_20131031_222231_inLine +BABEL_OP2_202_87871_20131031_222231_outLine +BABEL_OP2_202_87921_20131017_204018_inLine +BABEL_OP2_202_87921_20131017_204018_outLine +BABEL_OP2_202_88372_20131012_023925_inLine +BABEL_OP2_202_88372_20131012_023925_outLine +BABEL_OP2_202_88550_20131017_004344_inLine +BABEL_OP2_202_88550_20131017_004344_outLine +BABEL_OP2_202_88550_20131017_005456_inLine +BABEL_OP2_202_88550_20131017_005456_outLine +BABEL_OP2_202_88601_20130212_205048_inLine +BABEL_OP2_202_88601_20130212_205048_outLine +BABEL_OP2_202_88873_20131004_003616_inLine +BABEL_OP2_202_88873_20131004_003616_outLine +BABEL_OP2_202_89226_20131203_030320_inLine +BABEL_OP2_202_89226_20131203_030320_outLine +BABEL_OP2_202_89560_20131018_222518_inLine +BABEL_OP2_202_89560_20131018_222518_outLine +BABEL_OP2_202_89650_20131202_204623_inLine +BABEL_OP2_202_89650_20131202_204623_outLine +BABEL_OP2_202_89718_20131203_002623_inLine +BABEL_OP2_202_89718_20131203_002623_outLine +BABEL_OP2_202_89888_20130109_184456_inLine +BABEL_OP2_202_89888_20130109_184456_outLine +BABEL_OP2_202_90935_20130226_232117_inLine +BABEL_OP2_202_90935_20130226_232117_outLine +BABEL_OP2_202_91189_20131017_013603_inLine +BABEL_OP2_202_91189_20131017_013603_outLine +BABEL_OP2_202_91336_20130318_212106_inLine +BABEL_OP2_202_91336_20130318_212106_outLine +BABEL_OP2_202_91411_20131130_013112_inLine +BABEL_OP2_202_91411_20131130_013112_outLine +BABEL_OP2_202_91581_20131018_012025_inLine +BABEL_OP2_202_91581_20131018_012025_outLine +BABEL_OP2_202_91808_20131204_000439_inLine +BABEL_OP2_202_91808_20131204_000439_outLine +BABEL_OP2_202_91930_20131009_204054_inLine +BABEL_OP2_202_91930_20131009_204054_outLine +BABEL_OP2_202_91971_20131203_013031_inLine +BABEL_OP2_202_91971_20131203_013031_outLine +BABEL_OP2_202_91977_20130803_020205_inLine +BABEL_OP2_202_91977_20130803_020205_outLine +BABEL_OP2_202_92096_20131010_010207_inLine +BABEL_OP2_202_92096_20131010_010207_outLine +BABEL_OP2_202_92356_20140319_233703_inLine +BABEL_OP2_202_92356_20140319_233703_outLine +BABEL_OP2_202_92459_20131001_210517_inLine +BABEL_OP2_202_92459_20131001_210517_outLine +BABEL_OP2_202_92509_20121228_220632_inLine +BABEL_OP2_202_92509_20121228_220632_outLine +BABEL_OP2_202_92698_20130930_170131_inLine +BABEL_OP2_202_92698_20130930_170131_outLine +BABEL_OP2_202_92698_20130930_171329_inLine +BABEL_OP2_202_92698_20130930_171329_outLine +BABEL_OP2_202_92757_20131012_012455_inLine +BABEL_OP2_202_92757_20131012_012455_outLine +BABEL_OP2_202_92809_20131010_013656_inLine +BABEL_OP2_202_92809_20131010_013656_outLine +BABEL_OP2_202_92941_20131001_030226_inLine +BABEL_OP2_202_92941_20131001_030226_outLine +BABEL_OP2_202_93411_20130411_203410_inLine +BABEL_OP2_202_93411_20130411_203410_outLine +BABEL_OP2_202_93475_20140115_204518_inLine +BABEL_OP2_202_93475_20140115_204518_outLine +BABEL_OP2_202_93515_20131012_015923_inLine +BABEL_OP2_202_93515_20131012_015923_outLine +BABEL_OP2_202_93861_20130417_181331_inLine +BABEL_OP2_202_93861_20130417_181331_outLine +BABEL_OP2_202_93861_20130417_184517_inLine +BABEL_OP2_202_93861_20130417_184517_outLine +BABEL_OP2_202_93946_20131018_213959_inLine +BABEL_OP2_202_93946_20131018_213959_outLine +BABEL_OP2_202_93964_20130411_173113_inLine +BABEL_OP2_202_93964_20130411_173113_outLine +BABEL_OP2_202_93964_20130411_174717_inLine +BABEL_OP2_202_93964_20130411_174717_outLine +BABEL_OP2_202_94044_20131127_234911_inLine +BABEL_OP2_202_94044_20131127_234911_outLine +BABEL_OP2_202_94166_20131101_013342_inLine +BABEL_OP2_202_94166_20131101_013342_outLine +BABEL_OP2_202_94212_20131129_213734_inLine +BABEL_OP2_202_94212_20131129_213734_outLine +BABEL_OP2_202_94442_20131014_165222_inLine +BABEL_OP2_202_94442_20131014_165222_outLine +BABEL_OP2_202_94465_20131018_014837_inLine +BABEL_OP2_202_94465_20131018_014837_outLine +BABEL_OP2_202_94487_20131011_165627_inLine +BABEL_OP2_202_94487_20131011_165627_outLine +BABEL_OP2_202_94713_20131130_020453_inLine +BABEL_OP2_202_94713_20131130_020453_outLine +BABEL_OP2_202_94745_20130807_024052_inLine +BABEL_OP2_202_94745_20130807_024052_outLine +BABEL_OP2_202_95269_20130228_201037_inLine +BABEL_OP2_202_95269_20130228_201037_outLine +BABEL_OP2_202_95583_20130104_184957_inLine +BABEL_OP2_202_95583_20130104_184957_outLine +BABEL_OP2_202_95598_20130207_212051_inLine +BABEL_OP2_202_95598_20130207_212051_outLine +BABEL_OP2_202_95677_20131216_002743_inLine +BABEL_OP2_202_95677_20131216_002743_outLine +BABEL_OP2_202_95942_20131009_231612_inLine +BABEL_OP2_202_95942_20131009_231612_outLine +BABEL_OP2_202_95966_20130216_005201_inLine +BABEL_OP2_202_95966_20130216_005201_outLine +BABEL_OP2_202_95966_20130216_010600_inLine +BABEL_OP2_202_95966_20130216_010600_outLine +BABEL_OP2_202_96041_20140317_233707_inLine +BABEL_OP2_202_96041_20140317_233707_outLine +BABEL_OP2_202_96059_20131012_001057_inLine +BABEL_OP2_202_96059_20131012_001057_outLine +BABEL_OP2_202_96077_20131215_014408_inLine +BABEL_OP2_202_96077_20131215_014408_outLine +BABEL_OP2_202_96158_20131127_202846_inLine +BABEL_OP2_202_96158_20131127_202846_outLine +BABEL_OP2_202_96190_20140114_004611_inLine +BABEL_OP2_202_96190_20140114_004611_outLine +BABEL_OP2_202_96205_20130213_183412_inLine +BABEL_OP2_202_96205_20130213_183412_outLine +BABEL_OP2_202_96405_20131002_203007_inLine +BABEL_OP2_202_96405_20131002_203007_outLine +BABEL_OP2_202_96446_20130103_231919_inLine +BABEL_OP2_202_96446_20130103_231919_outLine +BABEL_OP2_202_96446_20130103_232611_inLine +BABEL_OP2_202_96446_20130103_232611_outLine +BABEL_OP2_202_96934_20131001_205011_inLine +BABEL_OP2_202_96934_20131001_205011_outLine +BABEL_OP2_202_97097_20131010_022340_inLine +BABEL_OP2_202_97097_20131010_022340_outLine +BABEL_OP2_202_97448_20131202_225423_inLine +BABEL_OP2_202_97448_20131202_225423_outLine +BABEL_OP2_202_97896_20130222_200148_inLine +BABEL_OP2_202_97896_20130222_200148_outLine +BABEL_OP2_202_97896_20130222_201339_inLine +BABEL_OP2_202_97896_20130222_201339_outLine +BABEL_OP2_202_97988_20131017_202448_inLine +BABEL_OP2_202_97988_20131017_202448_outLine +BABEL_OP2_202_98165_20130928_235834_inLine +BABEL_OP2_202_98165_20130928_235834_outLine +BABEL_OP2_202_98165_20130929_001916_inLine +BABEL_OP2_202_98165_20130929_001916_outLine +BABEL_OP2_202_98255_20131130_002114_inLine +BABEL_OP2_202_98255_20131130_002114_outLine +BABEL_OP2_202_98365_20130912_012649_inLine +BABEL_OP2_202_98365_20130912_012649_outLine +BABEL_OP2_202_98365_20130912_013735_inLine +BABEL_OP2_202_98365_20130912_013735_outLine +BABEL_OP2_202_98489_20140113_195524_inLine +BABEL_OP2_202_98489_20140113_195524_outLine +BABEL_OP2_202_98506_20131009_055751_inLine +BABEL_OP2_202_98506_20131009_055751_outLine +BABEL_OP2_202_98565_20131204_010715_inLine +BABEL_OP2_202_98565_20131204_010715_outLine +BABEL_OP2_202_98888_20130214_225058_inLine +BABEL_OP2_202_98888_20130214_225058_outLine +BABEL_OP2_202_99202_20130111_190008_inLine +BABEL_OP2_202_99202_20130111_190008_outLine +BABEL_OP2_202_99487_20130109_013911_inLine +BABEL_OP2_202_99487_20130109_013911_outLine +BABEL_OP2_202_99920_20130109_211943_inLine +BABEL_OP2_202_99920_20130109_211943_outLine +BABEL_OP2_202_99952_20131016_024323_inLine +BABEL_OP2_202_99952_20131016_024323_outLine +BABEL_OP2_202_99975_20131127_204148_inLine +BABEL_OP2_202_99975_20131127_204148_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/evalpart1.list b/egs/babel/s5d/conf/lists/202-swahili/evalpart1.list new file mode 100644 index 00000000000..c01647b6d12 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/evalpart1.list @@ -0,0 +1,196 @@ +BABEL_OP2_202_10019_20130928_235503_inLine +BABEL_OP2_202_10019_20130928_235503_outLine +BABEL_OP2_202_10416_20130215_183832_inLine +BABEL_OP2_202_10416_20130215_183832_outLine +BABEL_OP2_202_12321_20140210_015215_inLine +BABEL_OP2_202_12321_20140210_015215_outLine +BABEL_OP2_202_13040_20131005_180024_inLine +BABEL_OP2_202_13040_20131005_180024_outLine +BABEL_OP2_202_13929_20140205_042603_inLine +BABEL_OP2_202_13929_20140205_042603_outLine +BABEL_OP2_202_14537_20131016_202630_inLine +BABEL_OP2_202_14537_20131016_202630_outLine +BABEL_OP2_202_16407_20131203_231519_inLine +BABEL_OP2_202_16407_20131203_231519_outLine +BABEL_OP2_202_16787_20130220_000429_inLine +BABEL_OP2_202_16787_20130220_000429_outLine +BABEL_OP2_202_17511_20140205_051449_inLine +BABEL_OP2_202_17511_20140205_051449_outLine +BABEL_OP2_202_19545_20130927_190707_inLine +BABEL_OP2_202_19545_20130927_190707_outLine +BABEL_OP2_202_20738_20131029_183614_inLine +BABEL_OP2_202_20738_20131029_183614_outLine +BABEL_OP2_202_20896_20131220_001523_inLine +BABEL_OP2_202_20896_20131220_001523_outLine +BABEL_OP2_202_21794_20130219_010105_inLine +BABEL_OP2_202_21794_20130219_010105_outLine +BABEL_OP2_202_22641_20131011_032157_inLine +BABEL_OP2_202_22641_20131011_032157_outLine +BABEL_OP2_202_23355_20131128_005023_inLine +BABEL_OP2_202_23355_20131128_005023_outLine +BABEL_OP2_202_23731_20130930_020336_inLine +BABEL_OP2_202_23731_20130930_020336_outLine +BABEL_OP2_202_24924_20140207_235730_inLine +BABEL_OP2_202_24924_20140207_235730_outLine +BABEL_OP2_202_26869_20131216_035718_inLine +BABEL_OP2_202_26869_20131216_035718_outLine +BABEL_OP2_202_28422_20130924_010422_inLine +BABEL_OP2_202_28422_20130924_010422_outLine +BABEL_OP2_202_30250_20121228_195004_inLine +BABEL_OP2_202_30250_20121228_195004_outLine +BABEL_OP2_202_30250_20121228_195937_inLine +BABEL_OP2_202_30250_20121228_195937_outLine +BABEL_OP2_202_30497_20131010_013817_inLine +BABEL_OP2_202_30497_20131010_013817_outLine +BABEL_OP2_202_31484_20130912_200823_inLine +BABEL_OP2_202_31484_20130912_200823_outLine +BABEL_OP2_202_32832_20140108_220009_inLine +BABEL_OP2_202_32832_20140108_220009_outLine +BABEL_OP2_202_36505_20131029_215901_inLine +BABEL_OP2_202_36505_20131029_215901_outLine +BABEL_OP2_202_38664_20130315_010258_inLine +BABEL_OP2_202_38664_20130315_010258_outLine +BABEL_OP2_202_38741_20131003_014930_inLine +BABEL_OP2_202_38741_20131003_014930_outLine +BABEL_OP2_202_39277_20131127_183847_inLine +BABEL_OP2_202_39277_20131127_183847_outLine +BABEL_OP2_202_41109_20140114_212001_inLine +BABEL_OP2_202_41109_20140114_212001_outLine +BABEL_OP2_202_44678_20131128_010554_inLine +BABEL_OP2_202_44678_20131128_010554_outLine +BABEL_OP2_202_44681_20131218_213752_inLine +BABEL_OP2_202_44681_20131218_213752_outLine +BABEL_OP2_202_44681_20131218_214913_inLine +BABEL_OP2_202_44681_20131218_214913_outLine +BABEL_OP2_202_45777_20130930_215344_inLine +BABEL_OP2_202_45777_20130930_215344_outLine +BABEL_OP2_202_45777_20130930_220539_inLine +BABEL_OP2_202_45777_20130930_220539_outLine +BABEL_OP2_202_46333_20140225_002629_inLine +BABEL_OP2_202_46333_20140225_002629_outLine +BABEL_OP2_202_46974_20130729_181547_inLine +BABEL_OP2_202_46974_20130729_181547_outLine +BABEL_OP2_202_47959_20131004_035713_inLine +BABEL_OP2_202_47959_20131004_035713_outLine +BABEL_OP2_202_48016_20140220_204253_inLine +BABEL_OP2_202_48016_20140220_204253_outLine +BABEL_OP2_202_48758_20131009_051338_inLine +BABEL_OP2_202_48758_20131009_051338_outLine +BABEL_OP2_202_49637_20130103_203801_inLine +BABEL_OP2_202_49637_20130103_203801_outLine +BABEL_OP2_202_50630_20130926_021713_inLine +BABEL_OP2_202_50630_20130926_021713_outLine +BABEL_OP2_202_50958_20130219_215809_inLine +BABEL_OP2_202_50958_20130219_215809_outLine +BABEL_OP2_202_50962_20131002_203346_inLine +BABEL_OP2_202_50962_20131002_203346_outLine +BABEL_OP2_202_51414_20131012_225839_inLine +BABEL_OP2_202_51414_20131012_225839_outLine +BABEL_OP2_202_52070_20140124_231122_inLine +BABEL_OP2_202_52070_20140124_231122_outLine +BABEL_OP2_202_52222_20131126_183055_inLine +BABEL_OP2_202_52222_20131126_183055_outLine +BABEL_OP2_202_52447_20131014_001157_inLine +BABEL_OP2_202_52447_20131014_001157_outLine +BABEL_OP2_202_52614_20131011_162942_inLine +BABEL_OP2_202_52614_20131011_162942_outLine +BABEL_OP2_202_53206_20131129_004718_inLine +BABEL_OP2_202_53206_20131129_004718_outLine +BABEL_OP2_202_55742_20140115_203307_inLine +BABEL_OP2_202_55742_20140115_203307_outLine +BABEL_OP2_202_56523_20130222_213416_inLine +BABEL_OP2_202_56523_20130222_213416_outLine +BABEL_OP2_202_57650_20131031_222920_inLine +BABEL_OP2_202_57650_20131031_222920_outLine +BABEL_OP2_202_57650_20131031_224035_inLine +BABEL_OP2_202_57650_20131031_224035_outLine +BABEL_OP2_202_60626_20131003_025140_inLine +BABEL_OP2_202_60626_20131003_025140_outLine +BABEL_OP2_202_62155_20131010_030043_inLine +BABEL_OP2_202_62155_20131010_030043_outLine +BABEL_OP2_202_62434_20130104_004333_inLine +BABEL_OP2_202_62434_20130104_004333_outLine +BABEL_OP2_202_62434_20130104_005350_inLine +BABEL_OP2_202_62434_20130104_005350_outLine +BABEL_OP2_202_62835_20130212_190421_inLine +BABEL_OP2_202_62835_20130212_190421_outLine +BABEL_OP2_202_63481_20121229_212430_inLine +BABEL_OP2_202_63481_20121229_212430_outLine +BABEL_OP2_202_63511_20140202_013550_inLine +BABEL_OP2_202_63511_20140202_013550_outLine +BABEL_OP2_202_64638_20130923_221504_inLine +BABEL_OP2_202_64638_20130923_221504_outLine +BABEL_OP2_202_66959_20131018_194733_inLine +BABEL_OP2_202_66959_20131018_194733_outLine +BABEL_OP2_202_66967_20130103_220521_inLine +BABEL_OP2_202_66967_20130103_220521_outLine +BABEL_OP2_202_67373_20131004_205550_inLine +BABEL_OP2_202_67373_20131004_205550_outLine +BABEL_OP2_202_67794_20131003_192439_inLine +BABEL_OP2_202_67794_20131003_192439_outLine +BABEL_OP2_202_69090_20131127_230541_inLine +BABEL_OP2_202_69090_20131127_230541_outLine +BABEL_OP2_202_69972_20140129_230607_inLine +BABEL_OP2_202_69972_20140129_230607_outLine +BABEL_OP2_202_71282_20131030_163454_inLine +BABEL_OP2_202_71282_20131030_163454_outLine +BABEL_OP2_202_71704_20130109_185345_inLine +BABEL_OP2_202_71704_20130109_185345_outLine +BABEL_OP2_202_73072_20130105_235040_inLine +BABEL_OP2_202_73072_20130105_235040_outLine +BABEL_OP2_202_74111_20131018_223020_inLine +BABEL_OP2_202_74111_20131018_223020_outLine +BABEL_OP2_202_74641_20130927_171309_inLine +BABEL_OP2_202_74641_20130927_171309_outLine +BABEL_OP2_202_76773_20131004_211703_inLine +BABEL_OP2_202_76773_20131004_211703_outLine +BABEL_OP2_202_83062_20131129_191922_inLine +BABEL_OP2_202_83062_20131129_191922_outLine +BABEL_OP2_202_84327_20130730_193322_inLine +BABEL_OP2_202_84327_20130730_193322_outLine +BABEL_OP2_202_87545_20131012_025318_inLine +BABEL_OP2_202_87545_20131012_025318_outLine +BABEL_OP2_202_89718_20131203_002623_inLine +BABEL_OP2_202_89718_20131203_002623_outLine +BABEL_OP2_202_90935_20130226_232117_inLine +BABEL_OP2_202_90935_20130226_232117_outLine +BABEL_OP2_202_91930_20131009_204054_inLine +BABEL_OP2_202_91930_20131009_204054_outLine +BABEL_OP2_202_91971_20131203_013031_inLine +BABEL_OP2_202_91971_20131203_013031_outLine +BABEL_OP2_202_92698_20130930_170131_inLine +BABEL_OP2_202_92698_20130930_170131_outLine +BABEL_OP2_202_92698_20130930_171329_inLine +BABEL_OP2_202_92698_20130930_171329_outLine +BABEL_OP2_202_93861_20130417_181331_inLine +BABEL_OP2_202_93861_20130417_181331_outLine +BABEL_OP2_202_93861_20130417_184517_inLine +BABEL_OP2_202_93861_20130417_184517_outLine +BABEL_OP2_202_93946_20131018_213959_inLine +BABEL_OP2_202_93946_20131018_213959_outLine +BABEL_OP2_202_94166_20131101_013342_inLine +BABEL_OP2_202_94166_20131101_013342_outLine +BABEL_OP2_202_94212_20131129_213734_inLine +BABEL_OP2_202_94212_20131129_213734_outLine +BABEL_OP2_202_95966_20130216_005201_inLine +BABEL_OP2_202_95966_20130216_005201_outLine +BABEL_OP2_202_95966_20130216_010600_inLine +BABEL_OP2_202_95966_20130216_010600_outLine +BABEL_OP2_202_96041_20140317_233707_inLine +BABEL_OP2_202_96041_20140317_233707_outLine +BABEL_OP2_202_96059_20131012_001057_inLine +BABEL_OP2_202_96059_20131012_001057_outLine +BABEL_OP2_202_96205_20130213_183412_inLine +BABEL_OP2_202_96205_20130213_183412_outLine +BABEL_OP2_202_96934_20131001_205011_inLine +BABEL_OP2_202_96934_20131001_205011_outLine +BABEL_OP2_202_97097_20131010_022340_inLine +BABEL_OP2_202_97097_20131010_022340_outLine +BABEL_OP2_202_97448_20131202_225423_inLine +BABEL_OP2_202_97448_20131202_225423_outLine +BABEL_OP2_202_98255_20131130_002114_inLine +BABEL_OP2_202_98255_20131130_002114_outLine +BABEL_OP2_202_98888_20130214_225058_inLine +BABEL_OP2_202_98888_20130214_225058_outLine +BABEL_OP2_202_99487_20130109_013911_inLine +BABEL_OP2_202_99487_20130109_013911_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/sub-train.list b/egs/babel/s5d/conf/lists/202-swahili/sub-train.list new file mode 100644 index 00000000000..ec4d25cd88a --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/sub-train.list @@ -0,0 +1,128 @@ +BABEL_OP2_202_11859_20140206_193130_inLine +BABEL_OP2_202_11859_20140206_193130_outLine +BABEL_OP2_202_14719_20131126_223914_inLine +BABEL_OP2_202_14719_20131126_223914_outLine +BABEL_OP2_202_16838_20140204_225359_inLine +BABEL_OP2_202_16838_20140204_225359_outLine +BABEL_OP2_202_21206_20140207_213800_inLine +BABEL_OP2_202_21206_20140207_213800_outLine +BABEL_OP2_202_24501_20140205_231355_inLine +BABEL_OP2_202_24501_20140205_231355_outLine +BABEL_OP2_202_27189_20131216_001758_inLine +BABEL_OP2_202_27189_20131216_001758_outLine +BABEL_OP2_202_28522_20130925_000938_inLine +BABEL_OP2_202_28522_20130925_000938_outLine +BABEL_OP2_202_28644_20140205_001525_inLine +BABEL_OP2_202_28644_20140205_001525_outLine +BABEL_OP2_202_30280_20140220_001618_inLine +BABEL_OP2_202_30280_20140220_001618_outLine +BABEL_OP2_202_30432_20130502_210534_inLine +BABEL_OP2_202_30432_20130502_210534_outLine +BABEL_OP2_202_30432_20130503_175016_inLine +BABEL_OP2_202_30432_20130503_175016_outLine +BABEL_OP2_202_30645_20130108_200114_inLine +BABEL_OP2_202_30645_20130108_200114_outLine +BABEL_OP2_202_32837_20131101_203319_inLine +BABEL_OP2_202_32837_20131101_203319_outLine +BABEL_OP2_202_35609_20140220_193923_inLine +BABEL_OP2_202_35609_20140220_193923_outLine +BABEL_OP2_202_38963_20131215_232437_inLine +BABEL_OP2_202_38963_20131215_232437_outLine +BABEL_OP2_202_43395_20140220_223151_inLine +BABEL_OP2_202_43395_20140220_223151_outLine +BABEL_OP2_202_46770_20140223_234733_inLine +BABEL_OP2_202_46770_20140223_234733_outLine +BABEL_OP2_202_48243_20131009_224543_inLine +BABEL_OP2_202_48243_20131009_224543_outLine +BABEL_OP2_202_48422_20140225_220708_inLine +BABEL_OP2_202_48422_20140225_220708_outLine +BABEL_OP2_202_51156_20131216_015429_inLine +BABEL_OP2_202_51156_20131216_015429_outLine +BABEL_OP2_202_51484_20140123_220444_inLine +BABEL_OP2_202_51484_20140123_220444_outLine +BABEL_OP2_202_51611_20130109_194912_inLine +BABEL_OP2_202_51611_20130109_194912_outLine +BABEL_OP2_202_53063_20140124_000041_inLine +BABEL_OP2_202_53063_20140124_000041_outLine +BABEL_OP2_202_54074_20140123_205035_inLine +BABEL_OP2_202_54074_20140123_205035_outLine +BABEL_OP2_202_54841_20140122_195114_inLine +BABEL_OP2_202_54841_20140122_195114_outLine +BABEL_OP2_202_54841_20140122_200157_inLine +BABEL_OP2_202_54841_20140122_200157_outLine +BABEL_OP2_202_55259_20130930_023554_inLine +BABEL_OP2_202_55259_20130930_023554_outLine +BABEL_OP2_202_55349_20131010_002325_inLine +BABEL_OP2_202_55349_20131010_002325_outLine +BABEL_OP2_202_56306_20140122_204419_inLine +BABEL_OP2_202_56306_20140122_204419_outLine +BABEL_OP2_202_56465_20140122_194039_inLine +BABEL_OP2_202_56465_20140122_194039_outLine +BABEL_OP2_202_57782_20140129_231340_inLine +BABEL_OP2_202_57782_20140129_231340_outLine +BABEL_OP2_202_59720_20130930_032445_inLine +BABEL_OP2_202_59720_20130930_032445_outLine +BABEL_OP2_202_60477_20140201_200420_inLine +BABEL_OP2_202_60477_20140201_200420_outLine +BABEL_OP2_202_60778_20131201_233949_inLine +BABEL_OP2_202_60778_20131201_233949_outLine +BABEL_OP2_202_61040_20140227_003457_inLine +BABEL_OP2_202_61040_20140227_003457_outLine +BABEL_OP2_202_63670_20140130_231139_inLine +BABEL_OP2_202_63670_20140130_231139_outLine +BABEL_OP2_202_65466_20131010_013521_inLine +BABEL_OP2_202_65466_20131010_013521_outLine +BABEL_OP2_202_66001_20130107_194345_inLine +BABEL_OP2_202_66001_20130107_194345_outLine +BABEL_OP2_202_66045_20130410_204151_inLine +BABEL_OP2_202_66045_20130410_204151_outLine +BABEL_OP2_202_66045_20130410_211501_inLine +BABEL_OP2_202_66045_20130410_211501_outLine +BABEL_OP2_202_67401_20130912_043928_inLine +BABEL_OP2_202_67401_20130912_043928_outLine +BABEL_OP2_202_67964_20140125_232737_inLine +BABEL_OP2_202_67964_20140125_232737_outLine +BABEL_OP2_202_68748_20130803_201133_inLine +BABEL_OP2_202_68748_20130803_201133_outLine +BABEL_OP2_202_71976_20131128_193641_inLine +BABEL_OP2_202_71976_20131128_193641_outLine +BABEL_OP2_202_74121_20130220_195721_inLine +BABEL_OP2_202_74121_20130220_195721_outLine +BABEL_OP2_202_74121_20130220_201735_inLine +BABEL_OP2_202_74121_20130220_201735_outLine +BABEL_OP2_202_75064_20140226_232411_inLine +BABEL_OP2_202_75064_20140226_232411_outLine +BABEL_OP2_202_75261_20140311_002541_inLine +BABEL_OP2_202_75261_20140311_002541_outLine +BABEL_OP2_202_75812_20131127_193133_inLine +BABEL_OP2_202_75812_20131127_193133_outLine +BABEL_OP2_202_76499_20130412_201900_inLine +BABEL_OP2_202_76499_20130412_201900_outLine +BABEL_OP2_202_77033_20140312_034901_inLine +BABEL_OP2_202_77033_20140312_034901_outLine +BABEL_OP2_202_79045_20140310_212332_inLine +BABEL_OP2_202_79045_20140310_212332_outLine +BABEL_OP2_202_80306_20130928_232209_inLine +BABEL_OP2_202_80306_20130928_232209_outLine +BABEL_OP2_202_80989_20131016_213255_inLine +BABEL_OP2_202_80989_20131016_213255_outLine +BABEL_OP2_202_81622_20130218_232606_inLine +BABEL_OP2_202_81622_20130218_232606_outLine +BABEL_OP2_202_83625_20131130_222251_inLine +BABEL_OP2_202_83625_20131130_222251_outLine +BABEL_OP2_202_84194_20131130_024921_inLine +BABEL_OP2_202_84194_20131130_024921_outLine +BABEL_OP2_202_84408_20130306_184336_inLine +BABEL_OP2_202_84408_20130306_184336_outLine +BABEL_OP2_202_84768_20130107_194303_inLine +BABEL_OP2_202_84768_20130107_194303_outLine +BABEL_OP2_202_87305_20131016_225546_inLine +BABEL_OP2_202_87305_20131016_225546_outLine +BABEL_OP2_202_89695_20130215_224831_inLine +BABEL_OP2_202_89695_20130215_224831_outLine +BABEL_OP2_202_90740_20131120_195825_inLine +BABEL_OP2_202_90740_20131120_195825_outLine +BABEL_OP2_202_91478_20131127_031740_inLine +BABEL_OP2_202_91478_20131127_031740_outLine +BABEL_OP2_202_95231_20131128_211454_inLine +BABEL_OP2_202_95231_20131128_211454_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list new file mode 100644 index 00000000000..6f18d1b31d9 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list @@ -0,0 +1,397 @@ +BABEL_OP2_202_10002_20131130_011225_inLine +BABEL_OP2_202_10002_20131130_011225_outLine +BABEL_OP2_202_10184_20130214_193710_inLine +BABEL_OP2_202_10184_20130214_193710_outLine +BABEL_OP2_202_10464_20131203_215404_inLine +BABEL_OP2_202_10464_20131203_215404_outLine +BABEL_OP2_202_10647_20131009_183755_inLine +BABEL_OP2_202_10647_20131009_183755_outLine +BABEL_OP2_202_10966_20131219_004736_inLine +BABEL_OP2_202_10966_20131219_004736_outLine +BABEL_OP2_202_11310_20131220_011737_inLine +BABEL_OP2_202_11310_20131220_011737_outLine +BABEL_OP2_202_11352_20131120_175331_inLine +BABEL_OP2_202_11352_20131120_175331_outLine +BABEL_OP2_202_11528_20131126_194053_inLine +BABEL_OP2_202_11528_20131126_194053_outLine +BABEL_OP2_202_12846_20140207_070059_inLine +BABEL_OP2_202_12846_20140207_070059_outLine +BABEL_OP2_202_12846_20140207_072228_inLine +BABEL_OP2_202_12846_20140207_072228_outLine +BABEL_OP2_202_13126_20131010_154341_inLine +BABEL_OP2_202_13126_20131010_154341_outLine +BABEL_OP2_202_13189_20131218_191846_inLine +BABEL_OP2_202_13189_20131218_191846_outLine +BABEL_OP2_202_13490_20130410_232045_inLine +BABEL_OP2_202_13490_20130410_232045_outLine +BABEL_OP2_202_13561_20130927_174413_inLine +BABEL_OP2_202_13561_20130927_174413_outLine +BABEL_OP2_202_14929_20130215_230011_inLine +BABEL_OP2_202_14929_20130215_230011_outLine +BABEL_OP2_202_15024_20130211_211646_inLine +BABEL_OP2_202_15024_20130211_211646_outLine +BABEL_OP2_202_15281_20131017_173858_inLine +BABEL_OP2_202_15281_20131017_173858_outLine +BABEL_OP2_202_16149_20130108_192505_inLine +BABEL_OP2_202_16149_20130108_192505_outLine +BABEL_OP2_202_16839_20131218_202752_inLine +BABEL_OP2_202_16839_20131218_202752_outLine +BABEL_OP2_202_16886_20130219_213720_inLine +BABEL_OP2_202_16886_20130219_213720_outLine +BABEL_OP2_202_17472_20131128_215323_inLine +BABEL_OP2_202_17472_20131128_215323_outLine +BABEL_OP2_202_18242_20131203_010326_inLine +BABEL_OP2_202_18242_20131203_010326_outLine +BABEL_OP2_202_18490_20140109_200346_inLine +BABEL_OP2_202_18490_20140109_200346_outLine +BABEL_OP2_202_18566_20140209_233124_inLine +BABEL_OP2_202_18566_20140209_233124_outLine +BABEL_OP2_202_19589_20131016_205832_inLine +BABEL_OP2_202_19589_20131016_205832_outLine +BABEL_OP2_202_19877_20131011_005357_inLine +BABEL_OP2_202_19877_20131011_005357_outLine +BABEL_OP2_202_21624_20131009_200818_inLine +BABEL_OP2_202_21624_20131009_200818_outLine +BABEL_OP2_202_21807_20130926_194526_inLine +BABEL_OP2_202_21807_20130926_194526_outLine +BABEL_OP2_202_22643_20131126_221057_inLine +BABEL_OP2_202_22643_20131126_221057_outLine +BABEL_OP2_202_22918_20131031_201038_inLine +BABEL_OP2_202_22918_20131031_201038_outLine +BABEL_OP2_202_23092_20131018_200124_inLine +BABEL_OP2_202_23092_20131018_200124_outLine +BABEL_OP2_202_23153_20130220_213017_inLine +BABEL_OP2_202_23153_20130220_213017_outLine +BABEL_OP2_202_23190_20130308_215320_inLine +BABEL_OP2_202_23190_20130308_215320_outLine +BABEL_OP2_202_23195_20140205_001534_inLine +BABEL_OP2_202_23195_20140205_001534_outLine +BABEL_OP2_202_24010_20140204_221739_inLine +BABEL_OP2_202_24010_20140204_221739_outLine +BABEL_OP2_202_24241_20140218_231626_inLine +BABEL_OP2_202_24241_20140218_231626_outLine +BABEL_OP2_202_24779_20140205_002210_inLine +BABEL_OP2_202_24779_20140205_002210_outLine +BABEL_OP2_202_24982_20131219_225432_inLine +BABEL_OP2_202_24982_20131219_225432_outLine +BABEL_OP2_202_25698_20140208_030726_inLine +BABEL_OP2_202_25698_20140208_030726_outLine +BABEL_OP2_202_25719_20140217_232330_inLine +BABEL_OP2_202_25719_20140217_232330_outLine +BABEL_OP2_202_26507_20131030_200210_inLine +BABEL_OP2_202_26507_20131030_200210_outLine +BABEL_OP2_202_27042_20140209_012004_inLine +BABEL_OP2_202_27042_20140209_012004_outLine +BABEL_OP2_202_27367_20131127_225822_inLine +BABEL_OP2_202_27367_20131127_225822_outLine +BABEL_OP2_202_28303_20130930_225539_inLine +BABEL_OP2_202_28303_20130930_225539_outLine +BABEL_OP2_202_28595_20140219_174344_inLine +BABEL_OP2_202_28595_20140219_174344_outLine +BABEL_OP2_202_29439_20131009_210851_inLine +BABEL_OP2_202_29439_20131009_210851_outLine +BABEL_OP2_202_29482_20140204_232809_inLine +BABEL_OP2_202_29482_20140204_232809_outLine +BABEL_OP2_202_29482_20140204_234658_inLine +BABEL_OP2_202_29482_20140204_234658_outLine +BABEL_OP2_202_30098_20140210_002512_inLine +BABEL_OP2_202_30098_20140210_002512_outLine +BABEL_OP2_202_30461_20140219_222004_inLine +BABEL_OP2_202_30461_20140219_222004_outLine +BABEL_OP2_202_31184_20130213_182811_inLine +BABEL_OP2_202_31184_20130213_182811_outLine +BABEL_OP2_202_31184_20130213_183600_inLine +BABEL_OP2_202_31184_20130213_183600_outLine +BABEL_OP2_202_31919_20131010_181805_inLine +BABEL_OP2_202_31919_20131010_181805_outLine +BABEL_OP2_202_32998_20131221_004354_inLine +BABEL_OP2_202_32998_20131221_004354_outLine +BABEL_OP2_202_33424_20131012_231429_inLine +BABEL_OP2_202_33424_20131012_231429_outLine +BABEL_OP2_202_33497_20130429_202650_inLine +BABEL_OP2_202_33497_20130429_202650_outLine +BABEL_OP2_202_33497_20130429_204336_inLine +BABEL_OP2_202_33497_20130429_204336_outLine +BABEL_OP2_202_33913_20131218_215213_inLine +BABEL_OP2_202_33913_20131218_215213_outLine +BABEL_OP2_202_34064_20131220_013348_inLine +BABEL_OP2_202_34064_20131220_013348_outLine +BABEL_OP2_202_34410_20131119_191059_inLine +BABEL_OP2_202_34410_20131119_191059_outLine +BABEL_OP2_202_34486_20131016_193212_inLine +BABEL_OP2_202_34486_20131016_193212_outLine +BABEL_OP2_202_34586_20131219_235945_inLine +BABEL_OP2_202_34586_20131219_235945_outLine +BABEL_OP2_202_34826_20131220_013036_inLine +BABEL_OP2_202_34826_20131220_013036_outLine +BABEL_OP2_202_34860_20131202_205952_inLine +BABEL_OP2_202_34860_20131202_205952_outLine +BABEL_OP2_202_35139_20131003_221114_inLine +BABEL_OP2_202_35139_20131003_221114_outLine +BABEL_OP2_202_36642_20140114_203343_inLine +BABEL_OP2_202_36642_20140114_203343_outLine +BABEL_OP2_202_36894_20121228_180620_inLine +BABEL_OP2_202_36894_20121228_180620_outLine +BABEL_OP2_202_37285_20130730_214031_inLine +BABEL_OP2_202_37285_20130730_214031_outLine +BABEL_OP2_202_39006_20140115_012801_inLine +BABEL_OP2_202_39006_20140115_012801_outLine +BABEL_OP2_202_40557_20131018_015314_inLine +BABEL_OP2_202_40557_20131018_015314_outLine +BABEL_OP2_202_40565_20130725_183219_inLine +BABEL_OP2_202_40565_20130725_183219_outLine +BABEL_OP2_202_41542_20131029_200308_inLine +BABEL_OP2_202_41542_20131029_200308_outLine +BABEL_OP2_202_41598_20140225_031321_inLine +BABEL_OP2_202_41598_20140225_031321_outLine +BABEL_OP2_202_41720_20131129_192607_inLine +BABEL_OP2_202_41720_20131129_192607_outLine +BABEL_OP2_202_41720_20131129_194102_inLine +BABEL_OP2_202_41720_20131129_194102_outLine +BABEL_OP2_202_42309_20140221_210458_inLine +BABEL_OP2_202_42309_20140221_210458_outLine +BABEL_OP2_202_42434_20130930_235132_inLine +BABEL_OP2_202_42434_20130930_235132_outLine +BABEL_OP2_202_42434_20131001_001757_inLine +BABEL_OP2_202_42434_20131001_001757_outLine +BABEL_OP2_202_42991_20130801_010705_inLine +BABEL_OP2_202_42991_20130801_010705_outLine +BABEL_OP2_202_43794_20131015_230636_inLine +BABEL_OP2_202_43794_20131015_230636_outLine +BABEL_OP2_202_46041_20131018_224852_inLine +BABEL_OP2_202_46041_20131018_224852_outLine +BABEL_OP2_202_46261_20130213_203255_inLine +BABEL_OP2_202_46261_20130213_203255_outLine +BABEL_OP2_202_46550_20131003_205134_inLine +BABEL_OP2_202_46550_20131003_205134_outLine +BABEL_OP2_202_46688_20130108_003601_inLine +BABEL_OP2_202_46688_20130108_003601_outLine +BABEL_OP2_202_46757_20130726_172556_inLine +BABEL_OP2_202_46757_20130726_172556_outLine +BABEL_OP2_202_46976_20130214_203921_inLine +BABEL_OP2_202_46976_20130214_203921_outLine +BABEL_OP2_202_47186_20131101_211007_inLine +BABEL_OP2_202_47186_20131101_211007_outLine +BABEL_OP2_202_47823_20131017_214917_inLine +BABEL_OP2_202_47823_20131017_214917_outLine +BABEL_OP2_202_47866_20131010_061153_inLine +BABEL_OP2_202_47866_20131010_061153_outLine +BABEL_OP2_202_48299_20131130_233044_inLine +BABEL_OP2_202_48399_20131005_030007_outLine +BABEL_OP2_202_49437_20131031_193108_inLine +BABEL_OP2_202_49437_20131031_193108_outLine +BABEL_OP2_202_49630_20130731_234235_inLine +BABEL_OP2_202_49630_20130731_234235_outLine +BABEL_OP2_202_49739_20131127_171846_inLine +BABEL_OP2_202_49739_20131127_171846_outLine +BABEL_OP2_202_49768_20131001_222725_inLine +BABEL_OP2_202_49768_20131001_222725_outLine +BABEL_OP2_202_49907_20131003_213256_inLine +BABEL_OP2_202_49907_20131003_213256_outLine +BABEL_OP2_202_50186_20131216_004336_inLine +BABEL_OP2_202_50186_20131216_004336_outLine +BABEL_OP2_202_52246_20140125_011930_inLine +BABEL_OP2_202_52246_20140125_011930_outLine +BABEL_OP2_202_52272_20130103_193203_inLine +BABEL_OP2_202_52272_20130103_193203_outLine +BABEL_OP2_202_52422_20140123_223352_inLine +BABEL_OP2_202_52422_20140123_223352_outLine +BABEL_OP2_202_53758_20131203_003849_inLine +BABEL_OP2_202_53758_20131203_003849_outLine +BABEL_OP2_202_54066_20140121_223255_inLine +BABEL_OP2_202_54066_20140121_223255_outLine +BABEL_OP2_202_54530_20140125_000633_inLine +BABEL_OP2_202_54530_20140125_000633_outLine +BABEL_OP2_202_54634_20140121_201449_inLine +BABEL_OP2_202_54634_20140121_201449_outLine +BABEL_OP2_202_55381_20140123_030341_inLine +BABEL_OP2_202_55381_20140123_030341_outLine +BABEL_OP2_202_56023_20140124_213010_inLine +BABEL_OP2_202_56023_20140124_213010_outLine +BABEL_OP2_202_56331_20140124_212336_inLine +BABEL_OP2_202_56331_20140124_212336_outLine +BABEL_OP2_202_56606_20140123_202633_inLine +BABEL_OP2_202_56606_20140123_202633_outLine +BABEL_OP2_202_56951_20131130_192609_inLine +BABEL_OP2_202_56951_20131130_192609_outLine +BABEL_OP2_202_57233_20131120_235941_inLine +BABEL_OP2_202_57233_20131120_235941_outLine +BABEL_OP2_202_58103_20130930_045229_inLine +BABEL_OP2_202_58103_20130930_045229_outLine +BABEL_OP2_202_58107_20130927_165258_inLine +BABEL_OP2_202_58107_20130927_165258_outLine +BABEL_OP2_202_58489_20140131_214025_inLine +BABEL_OP2_202_58489_20140131_214025_outLine +BABEL_OP2_202_58821_20130730_183731_inLine +BABEL_OP2_202_58821_20130730_183731_outLine +BABEL_OP2_202_59028_20140131_212747_inLine +BABEL_OP2_202_59028_20140131_212747_outLine +BABEL_OP2_202_59402_20140201_222141_inLine +BABEL_OP2_202_59402_20140201_222141_outLine +BABEL_OP2_202_59402_20140201_222847_inLine +BABEL_OP2_202_59402_20140201_222847_outLine +BABEL_OP2_202_60474_20140120_222223_inLine +BABEL_OP2_202_60474_20140120_222223_outLine +BABEL_OP2_202_61438_20131129_231819_inLine +BABEL_OP2_202_61438_20131129_231819_outLine +BABEL_OP2_202_61438_20131129_233030_inLine +BABEL_OP2_202_61438_20131129_233030_outLine +BABEL_OP2_202_61873_20130111_181915_inLine +BABEL_OP2_202_61873_20130111_181915_outLine +BABEL_OP2_202_62047_20140129_020943_inLine +BABEL_OP2_202_62047_20140129_020943_outLine +BABEL_OP2_202_62360_20131014_211636_inLine +BABEL_OP2_202_62360_20131014_211636_outLine +BABEL_OP2_202_62714_20131101_225706_inLine +BABEL_OP2_202_62714_20131101_225706_outLine +BABEL_OP2_202_63490_20131203_234940_inLine +BABEL_OP2_202_63490_20131203_234940_outLine +BABEL_OP2_202_63920_20131215_021712_inLine +BABEL_OP2_202_63920_20131215_021712_outLine +BABEL_OP2_202_64688_20140126_004157_inLine +BABEL_OP2_202_64688_20140126_004157_outLine +BABEL_OP2_202_65048_20140128_174534_inLine +BABEL_OP2_202_65048_20140128_174534_outLine +BABEL_OP2_202_65336_20140131_001312_inLine +BABEL_OP2_202_65336_20140131_001312_outLine +BABEL_OP2_202_65913_20140127_181419_inLine +BABEL_OP2_202_65913_20140127_181419_outLine +BABEL_OP2_202_66305_20140126_020747_inLine +BABEL_OP2_202_66305_20140126_020747_outLine +BABEL_OP2_202_66641_20131127_183344_inLine +BABEL_OP2_202_66641_20131127_183344_outLine +BABEL_OP2_202_66916_20121229_203810_inLine +BABEL_OP2_202_66916_20121229_203810_outLine +BABEL_OP2_202_66916_20121229_211053_inLine +BABEL_OP2_202_66916_20121229_211053_outLine +BABEL_OP2_202_68289_20131128_012756_inLine +BABEL_OP2_202_68289_20131128_012756_outLine +BABEL_OP2_202_68854_20131012_000134_inLine +BABEL_OP2_202_68854_20131012_000134_outLine +BABEL_OP2_202_69937_20140131_034019_inLine +BABEL_OP2_202_69937_20140131_034019_outLine +BABEL_OP2_202_71566_20140311_213752_inLine +BABEL_OP2_202_71566_20140311_213752_outLine +BABEL_OP2_202_72324_20130423_161716_inLine +BABEL_OP2_202_72324_20130423_161716_outLine +BABEL_OP2_202_73005_20131012_011254_inLine +BABEL_OP2_202_73005_20131012_011254_outLine +BABEL_OP2_202_73022_20140226_210050_inLine +BABEL_OP2_202_73022_20140226_210050_outLine +BABEL_OP2_202_73518_20140304_001655_inLine +BABEL_OP2_202_73518_20140304_001655_outLine +BABEL_OP2_202_74667_20130227_180657_inLine +BABEL_OP2_202_74667_20130227_180657_outLine +BABEL_OP2_202_75930_20131202_213433_inLine +BABEL_OP2_202_75930_20131202_213433_outLine +BABEL_OP2_202_76126_20131031_183234_inLine +BABEL_OP2_202_76126_20131031_183234_outLine +BABEL_OP2_202_76444_20131018_000013_inLine +BABEL_OP2_202_76444_20131018_000013_outLine +BABEL_OP2_202_77146_20121229_203404_inLine +BABEL_OP2_202_77146_20121229_203404_outLine +BABEL_OP2_202_78482_20140311_014827_inLine +BABEL_OP2_202_78482_20140311_014827_outLine +BABEL_OP2_202_79367_20130107_224252_inLine +BABEL_OP2_202_79367_20130107_224252_outLine +BABEL_OP2_202_79973_20131130_184708_inLine +BABEL_OP2_202_79973_20131130_184708_outLine +BABEL_OP2_202_79995_20140227_030446_inLine +BABEL_OP2_202_79995_20140227_030446_outLine +BABEL_OP2_202_80134_20131202_174756_inLine +BABEL_OP2_202_80134_20131202_174756_outLine +BABEL_OP2_202_80383_20131207_013517_inLine +BABEL_OP2_202_80383_20131207_013517_outLine +BABEL_OP2_202_81149_20131010_010411_inLine +BABEL_OP2_202_81149_20131010_010411_outLine +BABEL_OP2_202_82123_20131130_004859_inLine +BABEL_OP2_202_82123_20131130_004859_outLine +BABEL_OP2_202_82138_20130415_225929_inLine +BABEL_OP2_202_82138_20130415_225929_outLine +BABEL_OP2_202_82425_20130108_181846_inLine +BABEL_OP2_202_82425_20130108_181846_outLine +BABEL_OP2_202_82473_20131004_202625_inLine +BABEL_OP2_202_82473_20131004_202625_outLine +BABEL_OP2_202_82496_20130105_232830_inLine +BABEL_OP2_202_82496_20130105_232830_outLine +BABEL_OP2_202_82622_20131007_171417_inLine +BABEL_OP2_202_82622_20131007_171417_outLine +BABEL_OP2_202_83609_20131128_022206_inLine +BABEL_OP2_202_83609_20131128_022206_outLine +BABEL_OP2_202_83651_20131003_212624_inLine +BABEL_OP2_202_83651_20131003_212624_outLine +BABEL_OP2_202_84077_20131130_195755_inLine +BABEL_OP2_202_84077_20131130_195755_outLine +BABEL_OP2_202_84466_20131010_040505_inLine +BABEL_OP2_202_84466_20131010_040505_outLine +BABEL_OP2_202_84469_20131018_212735_inLine +BABEL_OP2_202_84469_20131018_212735_outLine +BABEL_OP2_202_86156_20131030_001706_inLine +BABEL_OP2_202_86156_20131030_001706_outLine +BABEL_OP2_202_87179_20140320_165556_inLine +BABEL_OP2_202_87179_20140320_165556_outLine +BABEL_OP2_202_88776_20130107_192204_inLine +BABEL_OP2_202_88776_20130107_192204_outLine +BABEL_OP2_202_88776_20130107_195623_inLine +BABEL_OP2_202_88776_20130107_195623_outLine +BABEL_OP2_202_88783_20131018_191706_inLine +BABEL_OP2_202_88783_20131018_191706_outLine +BABEL_OP2_202_88865_20140319_212413_inLine +BABEL_OP2_202_88865_20140319_212413_outLine +BABEL_OP2_202_89665_20140320_004314_inLine +BABEL_OP2_202_89665_20140320_004314_outLine +BABEL_OP2_202_90347_20130912_005052_inLine +BABEL_OP2_202_90347_20130912_005052_outLine +BABEL_OP2_202_90572_20131009_190400_inLine +BABEL_OP2_202_90572_20131009_190400_outLine +BABEL_OP2_202_90737_20130213_201303_inLine +BABEL_OP2_202_90737_20130213_201303_outLine +BABEL_OP2_202_90739_20130222_223815_inLine +BABEL_OP2_202_90739_20130222_223815_outLine +BABEL_OP2_202_91080_20130429_213558_inLine +BABEL_OP2_202_91080_20130429_213558_outLine +BABEL_OP2_202_91891_20130803_000104_inLine +BABEL_OP2_202_91891_20130803_000104_outLine +BABEL_OP2_202_92065_20140109_204802_inLine +BABEL_OP2_202_92065_20140109_204802_outLine +BABEL_OP2_202_92440_20131203_195407_inLine +BABEL_OP2_202_92440_20131203_195407_outLine +BABEL_OP2_202_92440_20131203_200046_inLine +BABEL_OP2_202_92440_20131203_200046_outLine +BABEL_OP2_202_92527_20130225_184732_inLine +BABEL_OP2_202_92527_20130225_184732_outLine +BABEL_OP2_202_93153_20131003_212947_inLine +BABEL_OP2_202_93153_20131003_212947_outLine +BABEL_OP2_202_93153_20131003_213722_inLine +BABEL_OP2_202_93153_20131003_213722_outLine +BABEL_OP2_202_93222_20131126_211540_inLine +BABEL_OP2_202_93222_20131126_211540_outLine +BABEL_OP2_202_94333_20130105_202651_inLine +BABEL_OP2_202_94333_20130105_202651_outLine +BABEL_OP2_202_94449_20131011_205657_inLine +BABEL_OP2_202_94449_20131011_205657_outLine +BABEL_OP2_202_94869_20121219_204921_inLine +BABEL_OP2_202_94869_20121219_204921_outLine +BABEL_OP2_202_95077_20140320_014923_inLine +BABEL_OP2_202_95077_20140320_014923_outLine +BABEL_OP2_202_96376_20131011_024111_inLine +BABEL_OP2_202_96376_20131011_024111_outLine +BABEL_OP2_202_96680_20131130_202936_inLine +BABEL_OP2_202_96680_20131130_202936_outLine +BABEL_OP2_202_96690_20130220_210217_inLine +BABEL_OP2_202_96690_20130220_210217_outLine +BABEL_OP2_202_96808_20131012_212254_inLine +BABEL_OP2_202_96808_20131012_212254_outLine +BABEL_OP2_202_97220_20140319_193818_inLine +BABEL_OP2_202_97220_20140319_193818_outLine +BABEL_OP2_202_97363_20131002_203133_inLine +BABEL_OP2_202_97363_20131002_203133_outLine +BABEL_OP2_202_97363_20131003_002739_inLine +BABEL_OP2_202_97363_20131003_002739_outLine +BABEL_OP2_202_97373_20130730_151855_inLine +BABEL_OP2_202_99401_20130108_001107_inLine +BABEL_OP2_202_99401_20130108_001107_outLine +BABEL_OP2_202_99594_20130220_222308_inLine +BABEL_OP2_202_99594_20130220_222308_outLine +BABEL_OP2_202_99883_20131120_212150_inLine +BABEL_OP2_202_99883_20131120_212150_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/training.list b/egs/babel/s5d/conf/lists/202-swahili/training.list new file mode 100644 index 00000000000..1f0477cdd00 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/training.list @@ -0,0 +1,525 @@ +BABEL_OP2_202_10002_20131130_011225_inLine +BABEL_OP2_202_10002_20131130_011225_outLine +BABEL_OP2_202_10184_20130214_193710_inLine +BABEL_OP2_202_10184_20130214_193710_outLine +BABEL_OP2_202_10464_20131203_215404_inLine +BABEL_OP2_202_10464_20131203_215404_outLine +BABEL_OP2_202_10647_20131009_183755_inLine +BABEL_OP2_202_10647_20131009_183755_outLine +BABEL_OP2_202_10966_20131219_004736_inLine +BABEL_OP2_202_10966_20131219_004736_outLine +BABEL_OP2_202_11310_20131220_011737_inLine +BABEL_OP2_202_11310_20131220_011737_outLine +BABEL_OP2_202_11352_20131120_175331_inLine +BABEL_OP2_202_11352_20131120_175331_outLine +BABEL_OP2_202_11528_20131126_194053_inLine +BABEL_OP2_202_11528_20131126_194053_outLine +BABEL_OP2_202_11859_20140206_193130_inLine +BABEL_OP2_202_11859_20140206_193130_outLine +BABEL_OP2_202_12846_20140207_070059_inLine +BABEL_OP2_202_12846_20140207_070059_outLine +BABEL_OP2_202_12846_20140207_072228_inLine +BABEL_OP2_202_12846_20140207_072228_outLine +BABEL_OP2_202_13126_20131010_154341_inLine +BABEL_OP2_202_13126_20131010_154341_outLine +BABEL_OP2_202_13189_20131218_191846_inLine +BABEL_OP2_202_13189_20131218_191846_outLine +BABEL_OP2_202_13490_20130410_232045_inLine +BABEL_OP2_202_13490_20130410_232045_outLine +BABEL_OP2_202_13561_20130927_174413_inLine +BABEL_OP2_202_13561_20130927_174413_outLine +BABEL_OP2_202_14719_20131126_223914_inLine +BABEL_OP2_202_14719_20131126_223914_outLine +BABEL_OP2_202_14929_20130215_230011_inLine +BABEL_OP2_202_14929_20130215_230011_outLine +BABEL_OP2_202_15024_20130211_211646_inLine +BABEL_OP2_202_15024_20130211_211646_outLine +BABEL_OP2_202_15281_20131017_173858_inLine +BABEL_OP2_202_15281_20131017_173858_outLine +BABEL_OP2_202_16149_20130108_192505_inLine +BABEL_OP2_202_16149_20130108_192505_outLine +BABEL_OP2_202_16838_20140204_225359_inLine +BABEL_OP2_202_16838_20140204_225359_outLine +BABEL_OP2_202_16839_20131218_202752_inLine +BABEL_OP2_202_16839_20131218_202752_outLine +BABEL_OP2_202_16886_20130219_213720_inLine +BABEL_OP2_202_16886_20130219_213720_outLine +BABEL_OP2_202_17472_20131128_215323_inLine +BABEL_OP2_202_17472_20131128_215323_outLine +BABEL_OP2_202_18242_20131203_010326_inLine +BABEL_OP2_202_18242_20131203_010326_outLine +BABEL_OP2_202_18490_20140109_200346_inLine +BABEL_OP2_202_18490_20140109_200346_outLine +BABEL_OP2_202_18566_20140209_233124_inLine +BABEL_OP2_202_18566_20140209_233124_outLine +BABEL_OP2_202_19589_20131016_205832_inLine +BABEL_OP2_202_19589_20131016_205832_outLine +BABEL_OP2_202_19877_20131011_005357_inLine +BABEL_OP2_202_19877_20131011_005357_outLine +BABEL_OP2_202_21206_20140207_213800_inLine +BABEL_OP2_202_21206_20140207_213800_outLine +BABEL_OP2_202_21624_20131009_200818_inLine +BABEL_OP2_202_21624_20131009_200818_outLine +BABEL_OP2_202_21807_20130926_194526_inLine +BABEL_OP2_202_21807_20130926_194526_outLine +BABEL_OP2_202_22643_20131126_221057_inLine +BABEL_OP2_202_22643_20131126_221057_outLine +BABEL_OP2_202_22918_20131031_201038_inLine +BABEL_OP2_202_22918_20131031_201038_outLine +BABEL_OP2_202_23092_20131018_200124_inLine +BABEL_OP2_202_23092_20131018_200124_outLine +BABEL_OP2_202_23153_20130220_213017_inLine +BABEL_OP2_202_23153_20130220_213017_outLine +BABEL_OP2_202_23190_20130308_215320_inLine +BABEL_OP2_202_23190_20130308_215320_outLine +BABEL_OP2_202_23195_20140205_001534_inLine +BABEL_OP2_202_23195_20140205_001534_outLine +BABEL_OP2_202_24010_20140204_221739_inLine +BABEL_OP2_202_24010_20140204_221739_outLine +BABEL_OP2_202_24241_20140218_231626_inLine +BABEL_OP2_202_24241_20140218_231626_outLine +BABEL_OP2_202_24501_20140205_231355_inLine +BABEL_OP2_202_24501_20140205_231355_outLine +BABEL_OP2_202_24779_20140205_002210_inLine +BABEL_OP2_202_24779_20140205_002210_outLine +BABEL_OP2_202_24982_20131219_225432_inLine +BABEL_OP2_202_24982_20131219_225432_outLine +BABEL_OP2_202_25698_20140208_030726_inLine +BABEL_OP2_202_25698_20140208_030726_outLine +BABEL_OP2_202_25719_20140217_232330_inLine +BABEL_OP2_202_25719_20140217_232330_outLine +BABEL_OP2_202_26507_20131030_200210_inLine +BABEL_OP2_202_26507_20131030_200210_outLine +BABEL_OP2_202_27042_20140209_012004_inLine +BABEL_OP2_202_27042_20140209_012004_outLine +BABEL_OP2_202_27189_20131216_001758_inLine +BABEL_OP2_202_27189_20131216_001758_outLine +BABEL_OP2_202_27367_20131127_225822_inLine +BABEL_OP2_202_27367_20131127_225822_outLine +BABEL_OP2_202_28303_20130930_225539_inLine +BABEL_OP2_202_28303_20130930_225539_outLine +BABEL_OP2_202_28522_20130925_000938_inLine +BABEL_OP2_202_28522_20130925_000938_outLine +BABEL_OP2_202_28595_20140219_174344_inLine +BABEL_OP2_202_28595_20140219_174344_outLine +BABEL_OP2_202_28644_20140205_001525_inLine +BABEL_OP2_202_28644_20140205_001525_outLine +BABEL_OP2_202_29439_20131009_210851_inLine +BABEL_OP2_202_29439_20131009_210851_outLine +BABEL_OP2_202_29482_20140204_232809_inLine +BABEL_OP2_202_29482_20140204_232809_outLine +BABEL_OP2_202_29482_20140204_234658_inLine +BABEL_OP2_202_29482_20140204_234658_outLine +BABEL_OP2_202_30098_20140210_002512_inLine +BABEL_OP2_202_30098_20140210_002512_outLine +BABEL_OP2_202_30280_20140220_001618_inLine +BABEL_OP2_202_30280_20140220_001618_outLine +BABEL_OP2_202_30432_20130502_210534_inLine +BABEL_OP2_202_30432_20130502_210534_outLine +BABEL_OP2_202_30432_20130503_175016_inLine +BABEL_OP2_202_30432_20130503_175016_outLine +BABEL_OP2_202_30461_20140219_222004_inLine +BABEL_OP2_202_30461_20140219_222004_outLine +BABEL_OP2_202_30645_20130108_200114_inLine +BABEL_OP2_202_30645_20130108_200114_outLine +BABEL_OP2_202_31184_20130213_182811_inLine +BABEL_OP2_202_31184_20130213_182811_outLine +BABEL_OP2_202_31184_20130213_183600_inLine +BABEL_OP2_202_31184_20130213_183600_outLine +BABEL_OP2_202_31919_20131010_181805_inLine +BABEL_OP2_202_31919_20131010_181805_outLine +BABEL_OP2_202_32837_20131101_203319_inLine +BABEL_OP2_202_32837_20131101_203319_outLine +BABEL_OP2_202_32998_20131221_004354_inLine +BABEL_OP2_202_32998_20131221_004354_outLine +BABEL_OP2_202_33424_20131012_231429_inLine +BABEL_OP2_202_33424_20131012_231429_outLine +BABEL_OP2_202_33497_20130429_202650_inLine +BABEL_OP2_202_33497_20130429_202650_outLine +BABEL_OP2_202_33497_20130429_204336_inLine +BABEL_OP2_202_33497_20130429_204336_outLine +BABEL_OP2_202_33913_20131218_215213_inLine +BABEL_OP2_202_33913_20131218_215213_outLine +BABEL_OP2_202_34064_20131220_013348_inLine +BABEL_OP2_202_34064_20131220_013348_outLine +BABEL_OP2_202_34410_20131119_191059_inLine +BABEL_OP2_202_34410_20131119_191059_outLine +BABEL_OP2_202_34486_20131016_193212_inLine +BABEL_OP2_202_34486_20131016_193212_outLine +BABEL_OP2_202_34586_20131219_235945_inLine +BABEL_OP2_202_34586_20131219_235945_outLine +BABEL_OP2_202_34826_20131220_013036_inLine +BABEL_OP2_202_34826_20131220_013036_outLine +BABEL_OP2_202_34860_20131202_205952_inLine +BABEL_OP2_202_34860_20131202_205952_outLine +BABEL_OP2_202_35139_20131003_221114_inLine +BABEL_OP2_202_35139_20131003_221114_outLine +BABEL_OP2_202_35609_20140220_193923_inLine +BABEL_OP2_202_35609_20140220_193923_outLine +BABEL_OP2_202_36642_20140114_203343_inLine +BABEL_OP2_202_36642_20140114_203343_outLine +BABEL_OP2_202_36894_20121228_180620_inLine +BABEL_OP2_202_36894_20121228_180620_outLine +BABEL_OP2_202_37285_20130730_214031_inLine +BABEL_OP2_202_37285_20130730_214031_outLine +BABEL_OP2_202_38963_20131215_232437_inLine +BABEL_OP2_202_38963_20131215_232437_outLine +BABEL_OP2_202_39006_20140115_012801_inLine +BABEL_OP2_202_39006_20140115_012801_outLine +BABEL_OP2_202_40557_20131018_015314_inLine +BABEL_OP2_202_40557_20131018_015314_outLine +BABEL_OP2_202_40565_20130725_183219_inLine +BABEL_OP2_202_40565_20130725_183219_outLine +BABEL_OP2_202_41542_20131029_200308_inLine +BABEL_OP2_202_41542_20131029_200308_outLine +BABEL_OP2_202_41598_20140225_031321_inLine +BABEL_OP2_202_41598_20140225_031321_outLine +BABEL_OP2_202_41720_20131129_192607_inLine +BABEL_OP2_202_41720_20131129_192607_outLine +BABEL_OP2_202_41720_20131129_194102_inLine +BABEL_OP2_202_41720_20131129_194102_outLine +BABEL_OP2_202_42309_20140221_210458_inLine +BABEL_OP2_202_42309_20140221_210458_outLine +BABEL_OP2_202_42434_20130930_235132_inLine +BABEL_OP2_202_42434_20130930_235132_outLine +BABEL_OP2_202_42434_20131001_001757_inLine +BABEL_OP2_202_42434_20131001_001757_outLine +BABEL_OP2_202_42991_20130801_010705_inLine +BABEL_OP2_202_42991_20130801_010705_outLine +BABEL_OP2_202_43395_20140220_223151_inLine +BABEL_OP2_202_43395_20140220_223151_outLine +BABEL_OP2_202_43794_20131015_230636_inLine +BABEL_OP2_202_43794_20131015_230636_outLine +BABEL_OP2_202_46041_20131018_224852_inLine +BABEL_OP2_202_46041_20131018_224852_outLine +BABEL_OP2_202_46261_20130213_203255_inLine +BABEL_OP2_202_46261_20130213_203255_outLine +BABEL_OP2_202_46550_20131003_205134_inLine +BABEL_OP2_202_46550_20131003_205134_outLine +BABEL_OP2_202_46688_20130108_003601_inLine +BABEL_OP2_202_46688_20130108_003601_outLine +BABEL_OP2_202_46757_20130726_172556_inLine +BABEL_OP2_202_46757_20130726_172556_outLine +BABEL_OP2_202_46770_20140223_234733_inLine +BABEL_OP2_202_46770_20140223_234733_outLine +BABEL_OP2_202_46976_20130214_203921_inLine +BABEL_OP2_202_46976_20130214_203921_outLine +BABEL_OP2_202_47186_20131101_211007_inLine +BABEL_OP2_202_47186_20131101_211007_outLine +BABEL_OP2_202_47823_20131017_214917_inLine +BABEL_OP2_202_47823_20131017_214917_outLine +BABEL_OP2_202_47866_20131010_061153_inLine +BABEL_OP2_202_47866_20131010_061153_outLine +BABEL_OP2_202_48243_20131009_224543_inLine +BABEL_OP2_202_48243_20131009_224543_outLine +BABEL_OP2_202_48299_20131130_233044_inLine +BABEL_OP2_202_48399_20131005_030007_outLine +BABEL_OP2_202_48422_20140225_220708_inLine +BABEL_OP2_202_48422_20140225_220708_outLine +BABEL_OP2_202_49437_20131031_193108_inLine +BABEL_OP2_202_49437_20131031_193108_outLine +BABEL_OP2_202_49630_20130731_234235_inLine +BABEL_OP2_202_49630_20130731_234235_outLine +BABEL_OP2_202_49739_20131127_171846_inLine +BABEL_OP2_202_49739_20131127_171846_outLine +BABEL_OP2_202_49768_20131001_222725_inLine +BABEL_OP2_202_49768_20131001_222725_outLine +BABEL_OP2_202_49907_20131003_213256_inLine +BABEL_OP2_202_49907_20131003_213256_outLine +BABEL_OP2_202_50186_20131216_004336_inLine +BABEL_OP2_202_50186_20131216_004336_outLine +BABEL_OP2_202_51156_20131216_015429_inLine +BABEL_OP2_202_51156_20131216_015429_outLine +BABEL_OP2_202_51484_20140123_220444_inLine +BABEL_OP2_202_51484_20140123_220444_outLine +BABEL_OP2_202_51611_20130109_194912_inLine +BABEL_OP2_202_51611_20130109_194912_outLine +BABEL_OP2_202_52246_20140125_011930_inLine +BABEL_OP2_202_52246_20140125_011930_outLine +BABEL_OP2_202_52272_20130103_193203_inLine +BABEL_OP2_202_52272_20130103_193203_outLine +BABEL_OP2_202_52422_20140123_223352_inLine +BABEL_OP2_202_52422_20140123_223352_outLine +BABEL_OP2_202_53063_20140124_000041_inLine +BABEL_OP2_202_53063_20140124_000041_outLine +BABEL_OP2_202_53758_20131203_003849_inLine +BABEL_OP2_202_53758_20131203_003849_outLine +BABEL_OP2_202_54066_20140121_223255_inLine +BABEL_OP2_202_54066_20140121_223255_outLine +BABEL_OP2_202_54074_20140123_205035_inLine +BABEL_OP2_202_54074_20140123_205035_outLine +BABEL_OP2_202_54530_20140125_000633_inLine +BABEL_OP2_202_54530_20140125_000633_outLine +BABEL_OP2_202_54634_20140121_201449_inLine +BABEL_OP2_202_54634_20140121_201449_outLine +BABEL_OP2_202_54841_20140122_195114_inLine +BABEL_OP2_202_54841_20140122_195114_outLine +BABEL_OP2_202_54841_20140122_200157_inLine +BABEL_OP2_202_54841_20140122_200157_outLine +BABEL_OP2_202_55259_20130930_023554_inLine +BABEL_OP2_202_55259_20130930_023554_outLine +BABEL_OP2_202_55349_20131010_002325_inLine +BABEL_OP2_202_55349_20131010_002325_outLine +BABEL_OP2_202_55381_20140123_030341_inLine +BABEL_OP2_202_55381_20140123_030341_outLine +BABEL_OP2_202_56023_20140124_213010_inLine +BABEL_OP2_202_56023_20140124_213010_outLine +BABEL_OP2_202_56306_20140122_204419_inLine +BABEL_OP2_202_56306_20140122_204419_outLine +BABEL_OP2_202_56331_20140124_212336_inLine +BABEL_OP2_202_56331_20140124_212336_outLine +BABEL_OP2_202_56465_20140122_194039_inLine +BABEL_OP2_202_56465_20140122_194039_outLine +BABEL_OP2_202_56606_20140123_202633_inLine +BABEL_OP2_202_56606_20140123_202633_outLine +BABEL_OP2_202_56951_20131130_192609_inLine +BABEL_OP2_202_56951_20131130_192609_outLine +BABEL_OP2_202_57233_20131120_235941_inLine +BABEL_OP2_202_57233_20131120_235941_outLine +BABEL_OP2_202_57782_20140129_231340_inLine +BABEL_OP2_202_57782_20140129_231340_outLine +BABEL_OP2_202_58103_20130930_045229_inLine +BABEL_OP2_202_58103_20130930_045229_outLine +BABEL_OP2_202_58107_20130927_165258_inLine +BABEL_OP2_202_58107_20130927_165258_outLine +BABEL_OP2_202_58489_20140131_214025_inLine +BABEL_OP2_202_58489_20140131_214025_outLine +BABEL_OP2_202_58821_20130730_183731_inLine +BABEL_OP2_202_58821_20130730_183731_outLine +BABEL_OP2_202_59028_20140131_212747_inLine +BABEL_OP2_202_59028_20140131_212747_outLine +BABEL_OP2_202_59402_20140201_222141_inLine +BABEL_OP2_202_59402_20140201_222141_outLine +BABEL_OP2_202_59402_20140201_222847_inLine +BABEL_OP2_202_59402_20140201_222847_outLine +BABEL_OP2_202_59720_20130930_032445_inLine +BABEL_OP2_202_59720_20130930_032445_outLine +BABEL_OP2_202_60474_20140120_222223_inLine +BABEL_OP2_202_60474_20140120_222223_outLine +BABEL_OP2_202_60477_20140201_200420_inLine +BABEL_OP2_202_60477_20140201_200420_outLine +BABEL_OP2_202_60778_20131201_233949_inLine +BABEL_OP2_202_60778_20131201_233949_outLine +BABEL_OP2_202_61040_20140227_003457_inLine +BABEL_OP2_202_61040_20140227_003457_outLine +BABEL_OP2_202_61438_20131129_231819_inLine +BABEL_OP2_202_61438_20131129_231819_outLine +BABEL_OP2_202_61438_20131129_233030_inLine +BABEL_OP2_202_61438_20131129_233030_outLine +BABEL_OP2_202_61873_20130111_181915_inLine +BABEL_OP2_202_61873_20130111_181915_outLine +BABEL_OP2_202_62047_20140129_020943_inLine +BABEL_OP2_202_62047_20140129_020943_outLine +BABEL_OP2_202_62360_20131014_211636_inLine +BABEL_OP2_202_62360_20131014_211636_outLine +BABEL_OP2_202_62714_20131101_225706_inLine +BABEL_OP2_202_62714_20131101_225706_outLine +BABEL_OP2_202_63490_20131203_234940_inLine +BABEL_OP2_202_63490_20131203_234940_outLine +BABEL_OP2_202_63670_20140130_231139_inLine +BABEL_OP2_202_63670_20140130_231139_outLine +BABEL_OP2_202_63920_20131215_021712_inLine +BABEL_OP2_202_63920_20131215_021712_outLine +BABEL_OP2_202_64688_20140126_004157_inLine +BABEL_OP2_202_64688_20140126_004157_outLine +BABEL_OP2_202_65048_20140128_174534_inLine +BABEL_OP2_202_65048_20140128_174534_outLine +BABEL_OP2_202_65336_20140131_001312_inLine +BABEL_OP2_202_65336_20140131_001312_outLine +BABEL_OP2_202_65466_20131010_013521_inLine +BABEL_OP2_202_65466_20131010_013521_outLine +BABEL_OP2_202_65913_20140127_181419_inLine +BABEL_OP2_202_65913_20140127_181419_outLine +BABEL_OP2_202_66001_20130107_194345_inLine +BABEL_OP2_202_66001_20130107_194345_outLine +BABEL_OP2_202_66045_20130410_204151_inLine +BABEL_OP2_202_66045_20130410_204151_outLine +BABEL_OP2_202_66045_20130410_211501_inLine +BABEL_OP2_202_66045_20130410_211501_outLine +BABEL_OP2_202_66305_20140126_020747_inLine +BABEL_OP2_202_66305_20140126_020747_outLine +BABEL_OP2_202_66641_20131127_183344_inLine +BABEL_OP2_202_66641_20131127_183344_outLine +BABEL_OP2_202_66916_20121229_203810_inLine +BABEL_OP2_202_66916_20121229_203810_outLine +BABEL_OP2_202_66916_20121229_211053_inLine +BABEL_OP2_202_66916_20121229_211053_outLine +BABEL_OP2_202_67401_20130912_043928_inLine +BABEL_OP2_202_67401_20130912_043928_outLine +BABEL_OP2_202_67964_20140125_232737_inLine +BABEL_OP2_202_67964_20140125_232737_outLine +BABEL_OP2_202_68289_20131128_012756_inLine +BABEL_OP2_202_68289_20131128_012756_outLine +BABEL_OP2_202_68748_20130803_201133_inLine +BABEL_OP2_202_68748_20130803_201133_outLine +BABEL_OP2_202_68854_20131012_000134_inLine +BABEL_OP2_202_68854_20131012_000134_outLine +BABEL_OP2_202_69937_20140131_034019_inLine +BABEL_OP2_202_69937_20140131_034019_outLine +BABEL_OP2_202_71566_20140311_213752_inLine +BABEL_OP2_202_71566_20140311_213752_outLine +BABEL_OP2_202_71976_20131128_193641_inLine +BABEL_OP2_202_71976_20131128_193641_outLine +BABEL_OP2_202_72324_20130423_161716_inLine +BABEL_OP2_202_72324_20130423_161716_outLine +BABEL_OP2_202_73005_20131012_011254_inLine +BABEL_OP2_202_73005_20131012_011254_outLine +BABEL_OP2_202_73022_20140226_210050_inLine +BABEL_OP2_202_73022_20140226_210050_outLine +BABEL_OP2_202_73518_20140304_001655_inLine +BABEL_OP2_202_73518_20140304_001655_outLine +BABEL_OP2_202_74121_20130220_195721_inLine +BABEL_OP2_202_74121_20130220_195721_outLine +BABEL_OP2_202_74121_20130220_201735_inLine +BABEL_OP2_202_74121_20130220_201735_outLine +BABEL_OP2_202_74667_20130227_180657_inLine +BABEL_OP2_202_74667_20130227_180657_outLine +BABEL_OP2_202_75064_20140226_232411_inLine +BABEL_OP2_202_75064_20140226_232411_outLine +BABEL_OP2_202_75261_20140311_002541_inLine +BABEL_OP2_202_75261_20140311_002541_outLine +BABEL_OP2_202_75812_20131127_193133_inLine +BABEL_OP2_202_75812_20131127_193133_outLine +BABEL_OP2_202_75930_20131202_213433_inLine +BABEL_OP2_202_75930_20131202_213433_outLine +BABEL_OP2_202_76126_20131031_183234_inLine +BABEL_OP2_202_76126_20131031_183234_outLine +BABEL_OP2_202_76444_20131018_000013_inLine +BABEL_OP2_202_76444_20131018_000013_outLine +BABEL_OP2_202_76499_20130412_201900_inLine +BABEL_OP2_202_76499_20130412_201900_outLine +BABEL_OP2_202_77033_20140312_034901_inLine +BABEL_OP2_202_77033_20140312_034901_outLine +BABEL_OP2_202_77146_20121229_203404_inLine +BABEL_OP2_202_77146_20121229_203404_outLine +BABEL_OP2_202_78482_20140311_014827_inLine +BABEL_OP2_202_78482_20140311_014827_outLine +BABEL_OP2_202_79045_20140310_212332_inLine +BABEL_OP2_202_79045_20140310_212332_outLine +BABEL_OP2_202_79367_20130107_224252_inLine +BABEL_OP2_202_79367_20130107_224252_outLine +BABEL_OP2_202_79973_20131130_184708_inLine +BABEL_OP2_202_79973_20131130_184708_outLine +BABEL_OP2_202_79995_20140227_030446_inLine +BABEL_OP2_202_79995_20140227_030446_outLine +BABEL_OP2_202_80134_20131202_174756_inLine +BABEL_OP2_202_80134_20131202_174756_outLine +BABEL_OP2_202_80306_20130928_232209_inLine +BABEL_OP2_202_80306_20130928_232209_outLine +BABEL_OP2_202_80383_20131207_013517_inLine +BABEL_OP2_202_80383_20131207_013517_outLine +BABEL_OP2_202_80989_20131016_213255_inLine +BABEL_OP2_202_80989_20131016_213255_outLine +BABEL_OP2_202_81149_20131010_010411_inLine +BABEL_OP2_202_81149_20131010_010411_outLine +BABEL_OP2_202_81622_20130218_232606_inLine +BABEL_OP2_202_81622_20130218_232606_outLine +BABEL_OP2_202_82123_20131130_004859_inLine +BABEL_OP2_202_82123_20131130_004859_outLine +BABEL_OP2_202_82138_20130415_225929_inLine +BABEL_OP2_202_82138_20130415_225929_outLine +BABEL_OP2_202_82425_20130108_181846_inLine +BABEL_OP2_202_82425_20130108_181846_outLine +BABEL_OP2_202_82473_20131004_202625_inLine +BABEL_OP2_202_82473_20131004_202625_outLine +BABEL_OP2_202_82496_20130105_232830_inLine +BABEL_OP2_202_82496_20130105_232830_outLine +BABEL_OP2_202_82622_20131007_171417_inLine +BABEL_OP2_202_82622_20131007_171417_outLine +BABEL_OP2_202_83609_20131128_022206_inLine +BABEL_OP2_202_83609_20131128_022206_outLine +BABEL_OP2_202_83625_20131130_222251_inLine +BABEL_OP2_202_83625_20131130_222251_outLine +BABEL_OP2_202_83651_20131003_212624_inLine +BABEL_OP2_202_83651_20131003_212624_outLine +BABEL_OP2_202_84077_20131130_195755_inLine +BABEL_OP2_202_84077_20131130_195755_outLine +BABEL_OP2_202_84194_20131130_024921_inLine +BABEL_OP2_202_84194_20131130_024921_outLine +BABEL_OP2_202_84408_20130306_184336_inLine +BABEL_OP2_202_84408_20130306_184336_outLine +BABEL_OP2_202_84466_20131010_040505_inLine +BABEL_OP2_202_84466_20131010_040505_outLine +BABEL_OP2_202_84469_20131018_212735_inLine +BABEL_OP2_202_84469_20131018_212735_outLine +BABEL_OP2_202_84768_20130107_194303_inLine +BABEL_OP2_202_84768_20130107_194303_outLine +BABEL_OP2_202_86156_20131030_001706_inLine +BABEL_OP2_202_86156_20131030_001706_outLine +BABEL_OP2_202_87179_20140320_165556_inLine +BABEL_OP2_202_87179_20140320_165556_outLine +BABEL_OP2_202_87305_20131016_225546_inLine +BABEL_OP2_202_87305_20131016_225546_outLine +BABEL_OP2_202_88776_20130107_192204_inLine +BABEL_OP2_202_88776_20130107_192204_outLine +BABEL_OP2_202_88776_20130107_195623_inLine +BABEL_OP2_202_88776_20130107_195623_outLine +BABEL_OP2_202_88783_20131018_191706_inLine +BABEL_OP2_202_88783_20131018_191706_outLine +BABEL_OP2_202_88865_20140319_212413_inLine +BABEL_OP2_202_88865_20140319_212413_outLine +BABEL_OP2_202_89665_20140320_004314_inLine +BABEL_OP2_202_89665_20140320_004314_outLine +BABEL_OP2_202_89695_20130215_224831_inLine +BABEL_OP2_202_89695_20130215_224831_outLine +BABEL_OP2_202_90347_20130912_005052_inLine +BABEL_OP2_202_90347_20130912_005052_outLine +BABEL_OP2_202_90572_20131009_190400_inLine +BABEL_OP2_202_90572_20131009_190400_outLine +BABEL_OP2_202_90737_20130213_201303_inLine +BABEL_OP2_202_90737_20130213_201303_outLine +BABEL_OP2_202_90739_20130222_223815_inLine +BABEL_OP2_202_90739_20130222_223815_outLine +BABEL_OP2_202_90740_20131120_195825_inLine +BABEL_OP2_202_90740_20131120_195825_outLine +BABEL_OP2_202_91080_20130429_213558_inLine +BABEL_OP2_202_91080_20130429_213558_outLine +BABEL_OP2_202_91478_20131127_031740_inLine +BABEL_OP2_202_91478_20131127_031740_outLine +BABEL_OP2_202_91891_20130803_000104_inLine +BABEL_OP2_202_91891_20130803_000104_outLine +BABEL_OP2_202_92065_20140109_204802_inLine +BABEL_OP2_202_92065_20140109_204802_outLine +BABEL_OP2_202_92440_20131203_195407_inLine +BABEL_OP2_202_92440_20131203_195407_outLine +BABEL_OP2_202_92440_20131203_200046_inLine +BABEL_OP2_202_92440_20131203_200046_outLine +BABEL_OP2_202_92527_20130225_184732_inLine +BABEL_OP2_202_92527_20130225_184732_outLine +BABEL_OP2_202_93153_20131003_212947_inLine +BABEL_OP2_202_93153_20131003_212947_outLine +BABEL_OP2_202_93153_20131003_213722_inLine +BABEL_OP2_202_93153_20131003_213722_outLine +BABEL_OP2_202_93222_20131126_211540_inLine +BABEL_OP2_202_93222_20131126_211540_outLine +BABEL_OP2_202_94333_20130105_202651_inLine +BABEL_OP2_202_94333_20130105_202651_outLine +BABEL_OP2_202_94449_20131011_205657_inLine +BABEL_OP2_202_94449_20131011_205657_outLine +BABEL_OP2_202_94869_20121219_204921_inLine +BABEL_OP2_202_94869_20121219_204921_outLine +BABEL_OP2_202_95077_20140320_014923_inLine +BABEL_OP2_202_95077_20140320_014923_outLine +BABEL_OP2_202_95231_20131128_211454_inLine +BABEL_OP2_202_95231_20131128_211454_outLine +BABEL_OP2_202_96376_20131011_024111_inLine +BABEL_OP2_202_96376_20131011_024111_outLine +BABEL_OP2_202_96680_20131130_202936_inLine +BABEL_OP2_202_96680_20131130_202936_outLine +BABEL_OP2_202_96690_20130220_210217_inLine +BABEL_OP2_202_96690_20130220_210217_outLine +BABEL_OP2_202_96808_20131012_212254_inLine +BABEL_OP2_202_96808_20131012_212254_outLine +BABEL_OP2_202_97220_20140319_193818_inLine +BABEL_OP2_202_97220_20140319_193818_outLine +BABEL_OP2_202_97363_20131002_203133_inLine +BABEL_OP2_202_97363_20131002_203133_outLine +BABEL_OP2_202_97363_20131003_002739_inLine +BABEL_OP2_202_97363_20131003_002739_outLine +BABEL_OP2_202_97373_20130730_151855_inLine +BABEL_OP2_202_99401_20130108_001107_inLine +BABEL_OP2_202_99401_20130108_001107_outLine +BABEL_OP2_202_99594_20130220_222308_inLine +BABEL_OP2_202_99594_20130220_222308_outLine +BABEL_OP2_202_99883_20131120_212150_inLine +BABEL_OP2_202_99883_20131120_212150_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list b/egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list new file mode 100644 index 00000000000..72047620427 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list @@ -0,0 +1,555 @@ +BABEL_OP2_202_10036_20130313_171555_outLine +BABEL_OP2_202_10058_20131017_230021_outLine +BABEL_OP2_202_10313_20140205_002214_inLine +BABEL_OP2_202_10319_20121229_224454_outLine +BABEL_OP2_202_10411_20140209_011824_inLine +BABEL_OP2_202_10411_20140209_011824_outLine +BABEL_OP2_202_10469_20131130_014924_inLine +BABEL_OP2_202_10638_20140205_005404_inLine +BABEL_OP2_202_10638_20140205_005404_outLine +BABEL_OP2_202_10901_20130913_004135_outLine +BABEL_OP2_202_10938_20130930_020020_inLine +BABEL_OP2_202_11096_20131016_000245_outLine +BABEL_OP2_202_11486_20140206_204420_inLine +BABEL_OP2_202_11486_20140206_204420_outLine +BABEL_OP2_202_11486_20140206_205137_inLine +BABEL_OP2_202_11486_20140206_205137_outLine +BABEL_OP2_202_12242_20140214_231330_inLine +BABEL_OP2_202_12609_20140207_172212_inLine +BABEL_OP2_202_13030_20131218_195618_outLine +BABEL_OP2_202_13324_20131219_001852_inLine +BABEL_OP2_202_13324_20131219_001852_outLine +BABEL_OP2_202_13547_20131127_025355_outLine +BABEL_OP2_202_13776_20140206_201743_inLine +BABEL_OP2_202_14560_20140206_011812_inLine +BABEL_OP2_202_14723_20140205_060355_inLine +BABEL_OP2_202_14723_20140205_060355_outLine +BABEL_OP2_202_15042_20131130_221534_inLine +BABEL_OP2_202_15042_20131130_221534_outLine +BABEL_OP2_202_15216_20140208_175430_inLine +BABEL_OP2_202_15322_20140208_191251_inLine +BABEL_OP2_202_15322_20140208_191251_outLine +BABEL_OP2_202_15466_20131127_213156_inLine +BABEL_OP2_202_15466_20131127_214339_inLine +BABEL_OP2_202_15535_20131001_012120_inLine +BABEL_OP2_202_15749_20140206_024112_inLine +BABEL_OP2_202_15749_20140206_024112_outLine +BABEL_OP2_202_15926_20130925_034742_inLine +BABEL_OP2_202_15926_20130925_034742_outLine +BABEL_OP2_202_15926_20130925_035312_inLine +BABEL_OP2_202_15926_20130925_035312_outLine +BABEL_OP2_202_16800_20131219_012534_outLine +BABEL_OP2_202_17127_20140113_203603_inLine +BABEL_OP2_202_17127_20140113_203603_outLine +BABEL_OP2_202_17165_20130410_211020_outLine +BABEL_OP2_202_17320_20140207_162515_outLine +BABEL_OP2_202_17320_20140207_163148_outLine +BABEL_OP2_202_17420_20131029_235015_inLine +BABEL_OP2_202_17496_20130926_185827_inLine +BABEL_OP2_202_17582_20140208_011506_inLine +BABEL_OP2_202_17913_20131128_031821_outLine +BABEL_OP2_202_17914_20131031_221433_inLine +BABEL_OP2_202_17937_20131220_004727_outLine +BABEL_OP2_202_18033_20131218_020549_inLine +BABEL_OP2_202_18033_20131218_020549_outLine +BABEL_OP2_202_18037_20140205_003923_inLine +BABEL_OP2_202_18037_20140205_003923_outLine +BABEL_OP2_202_18280_20140205_025345_inLine +BABEL_OP2_202_18280_20140205_025345_outLine +BABEL_OP2_202_18297_20131012_004111_inLine +BABEL_OP2_202_18297_20131012_004111_outLine +BABEL_OP2_202_18370_20140205_033926_inLine +BABEL_OP2_202_18370_20140205_033926_outLine +BABEL_OP2_202_18863_20131130_020252_inLine +BABEL_OP2_202_18863_20131130_032443_inLine +BABEL_OP2_202_19703_20131218_015339_inLine +BABEL_OP2_202_19773_20131220_220513_inLine +BABEL_OP2_202_19773_20131220_220513_outLine +BABEL_OP2_202_19782_20131102_001852_inLine +BABEL_OP2_202_20330_20140109_172943_outLine +BABEL_OP2_202_20330_20140109_174004_outLine +BABEL_OP2_202_20367_20140207_065137_outLine +BABEL_OP2_202_20454_20131217_005702_outLine +BABEL_OP2_202_20724_20131218_014801_outLine +BABEL_OP2_202_20800_20130109_234836_outLine +BABEL_OP2_202_20922_20140108_231607_outLine +BABEL_OP2_202_21004_20131017_221746_inLine +BABEL_OP2_202_21159_20140205_213005_inLine +BABEL_OP2_202_21244_20131015_194634_outLine +BABEL_OP2_202_21315_20140206_175302_inLine +BABEL_OP2_202_21327_20140206_001641_inLine +BABEL_OP2_202_21426_20140204_235517_inLine +BABEL_OP2_202_21426_20140204_235517_outLine +BABEL_OP2_202_22034_20140130_232345_inLine +BABEL_OP2_202_22034_20140130_234608_inLine +BABEL_OP2_202_22170_20131009_031606_inLine +BABEL_OP2_202_22288_20131212_003625_inLine +BABEL_OP2_202_22612_20131101_182509_inLine +BABEL_OP2_202_22965_20140214_212302_inLine +BABEL_OP2_202_22965_20140214_212302_outLine +BABEL_OP2_202_23046_20140207_214018_inLine +BABEL_OP2_202_23046_20140207_214018_outLine +BABEL_OP2_202_23196_20131130_210710_inLine +BABEL_OP2_202_23196_20131130_210710_outLine +BABEL_OP2_202_23239_20130923_232142_inLine +BABEL_OP2_202_23239_20130923_232142_outLine +BABEL_OP2_202_23681_20131016_231325_outLine +BABEL_OP2_202_23752_20140204_225435_inLine +BABEL_OP2_202_23752_20140204_225435_outLine +BABEL_OP2_202_24017_20131220_000437_outLine +BABEL_OP2_202_24209_20131203_184510_outLine +BABEL_OP2_202_24253_20131010_013952_outLine +BABEL_OP2_202_24587_20131127_230044_inLine +BABEL_OP2_202_24587_20131127_230044_outLine +BABEL_OP2_202_24648_20131128_030622_inLine +BABEL_OP2_202_25015_20140207_234017_inLine +BABEL_OP2_202_25198_20140219_203259_inLine +BABEL_OP2_202_25895_20131203_201422_inLine +BABEL_OP2_202_25895_20131203_202401_inLine +BABEL_OP2_202_26072_20140114_031432_inLine +BABEL_OP2_202_26072_20140114_031432_outLine +BABEL_OP2_202_26388_20131217_235617_outLine +BABEL_OP2_202_26478_20140207_223256_outLine +BABEL_OP2_202_26574_20140205_203902_inLine +BABEL_OP2_202_26574_20140205_203902_outLine +BABEL_OP2_202_26574_20140205_205040_inLine +BABEL_OP2_202_26574_20140205_205040_outLine +BABEL_OP2_202_27014_20140109_225600_outLine +BABEL_OP2_202_27014_20140109_231225_outLine +BABEL_OP2_202_27218_20131003_212404_inLine +BABEL_OP2_202_27841_20140206_002202_inLine +BABEL_OP2_202_27841_20140206_002202_outLine +BABEL_OP2_202_28280_20140205_194444_outLine +BABEL_OP2_202_28419_20140205_193403_inLine +BABEL_OP2_202_29023_20131219_010409_outLine +BABEL_OP2_202_29230_20140207_212300_inLine +BABEL_OP2_202_29323_20131031_234945_inLine +BABEL_OP2_202_29323_20131101_000454_inLine +BABEL_OP2_202_29563_20131212_060621_inLine +BABEL_OP2_202_29563_20131212_060621_outLine +BABEL_OP2_202_29746_20131217_001441_inLine +BABEL_OP2_202_29765_20140209_221538_inLine +BABEL_OP2_202_29765_20140209_221538_outLine +BABEL_OP2_202_29911_20131127_184715_outLine +BABEL_OP2_202_30345_20131220_003550_outLine +BABEL_OP2_202_30576_20131017_012418_inLine +BABEL_OP2_202_30653_20131015_175341_inLine +BABEL_OP2_202_30653_20131015_175341_outLine +BABEL_OP2_202_30974_20140219_013521_inLine +BABEL_OP2_202_31267_20140227_054848_inLine +BABEL_OP2_202_31267_20140227_054848_outLine +BABEL_OP2_202_31346_20131019_000000_outLine +BABEL_OP2_202_31346_20131101_003311_outLine +BABEL_OP2_202_31490_20130109_003835_inLine +BABEL_OP2_202_31490_20130109_005722_inLine +BABEL_OP2_202_31500_20131217_015833_inLine +BABEL_OP2_202_31500_20131217_015833_outLine +BABEL_OP2_202_31583_20131219_213900_outLine +BABEL_OP2_202_31628_20130921_212411_inLine +BABEL_OP2_202_31728_20131129_044747_inLine +BABEL_OP2_202_31728_20131129_044747_outLine +BABEL_OP2_202_32048_20131219_013244_inLine +BABEL_OP2_202_32328_20131030_210553_inLine +BABEL_OP2_202_32380_20131130_215631_outLine +BABEL_OP2_202_32872_20131016_190028_inLine +BABEL_OP2_202_32872_20131016_190028_outLine +BABEL_OP2_202_33149_20140225_001335_inLine +BABEL_OP2_202_33149_20140225_001335_outLine +BABEL_OP2_202_33229_20131218_213456_outLine +BABEL_OP2_202_33659_20140225_233435_outLine +BABEL_OP2_202_33800_20131126_211758_inLine +BABEL_OP2_202_33800_20131126_211758_outLine +BABEL_OP2_202_33806_20140224_202859_inLine +BABEL_OP2_202_33806_20140224_202859_outLine +BABEL_OP2_202_33840_20131101_001620_outLine +BABEL_OP2_202_33951_20130426_182755_outLine +BABEL_OP2_202_33951_20130426_184040_outLine +BABEL_OP2_202_33992_20131015_184831_inLine +BABEL_OP2_202_33992_20131015_184831_outLine +BABEL_OP2_202_34019_20140224_193636_outLine +BABEL_OP2_202_34106_20121227_222718_inLine +BABEL_OP2_202_34629_20131130_211927_outLine +BABEL_OP2_202_34713_20140225_014158_inLine +BABEL_OP2_202_34713_20140225_014158_outLine +BABEL_OP2_202_35008_20130722_185829_inLine +BABEL_OP2_202_35008_20130722_191623_inLine +BABEL_OP2_202_35143_20131018_192106_inLine +BABEL_OP2_202_35202_20130801_172530_inLine +BABEL_OP2_202_35202_20130801_195816_inLine +BABEL_OP2_202_35786_20131015_181857_inLine +BABEL_OP2_202_35786_20131015_181857_outLine +BABEL_OP2_202_36364_20131130_021940_inLine +BABEL_OP2_202_36669_20130213_192457_inLine +BABEL_OP2_202_37064_20131002_185856_inLine +BABEL_OP2_202_37064_20131002_185856_outLine +BABEL_OP2_202_37229_20140114_233648_outLine +BABEL_OP2_202_37271_20140114_192528_inLine +BABEL_OP2_202_37271_20140114_192528_outLine +BABEL_OP2_202_37499_20131016_183113_inLine +BABEL_OP2_202_37499_20131016_183113_outLine +BABEL_OP2_202_37684_20131203_022005_inLine +BABEL_OP2_202_37684_20131203_024603_inLine +BABEL_OP2_202_37776_20140115_213234_outLine +BABEL_OP2_202_37853_20131101_233956_inLine +BABEL_OP2_202_37853_20131101_233956_outLine +BABEL_OP2_202_37853_20131101_235036_inLine +BABEL_OP2_202_37853_20131101_235036_outLine +BABEL_OP2_202_38323_20140114_202816_inLine +BABEL_OP2_202_38750_20131018_005908_outLine +BABEL_OP2_202_39555_20140114_200302_outLine +BABEL_OP2_202_39579_20140115_214035_outLine +BABEL_OP2_202_39638_20131130_231218_inLine +BABEL_OP2_202_39680_20140115_212202_inLine +BABEL_OP2_202_39680_20140115_212202_outLine +BABEL_OP2_202_39920_20131208_045704_inLine +BABEL_OP2_202_39920_20131208_051609_inLine +BABEL_OP2_202_40648_20131215_203941_outLine +BABEL_OP2_202_40648_20131215_205022_outLine +BABEL_OP2_202_40686_20140114_232425_outLine +BABEL_OP2_202_40686_20140114_233413_outLine +BABEL_OP2_202_40740_20140114_212913_outLine +BABEL_OP2_202_40740_20140114_221533_outLine +BABEL_OP2_202_40939_20140115_005331_inLine +BABEL_OP2_202_40939_20140115_005331_outLine +BABEL_OP2_202_41073_20140114_201346_inLine +BABEL_OP2_202_41073_20140114_201346_outLine +BABEL_OP2_202_41174_20130222_214400_outLine +BABEL_OP2_202_41233_20131127_035936_inLine +BABEL_OP2_202_41233_20131127_035936_outLine +BABEL_OP2_202_41592_20130927_203118_inLine +BABEL_OP2_202_41745_20130222_224352_outLine +BABEL_OP2_202_41745_20130222_225523_outLine +BABEL_OP2_202_41745_20130226_220300_outLine +BABEL_OP2_202_41890_20131011_232931_inLine +BABEL_OP2_202_41890_20131011_232931_outLine +BABEL_OP2_202_41890_20131011_235301_inLine +BABEL_OP2_202_41890_20131011_235301_outLine +BABEL_OP2_202_41958_20131001_185053_inLine +BABEL_OP2_202_42146_20131011_232931_inLine +BABEL_OP2_202_42146_20131011_232931_outLine +BABEL_OP2_202_42146_20131011_233957_inLine +BABEL_OP2_202_42146_20131011_233957_outLine +BABEL_OP2_202_42231_20140224_221548_inLine +BABEL_OP2_202_42299_20140220_233422_outLine +BABEL_OP2_202_42526_20140228_035815_outLine +BABEL_OP2_202_42600_20131001_200025_outLine +BABEL_OP2_202_43115_20131012_005141_inLine +BABEL_OP2_202_43115_20131012_005141_outLine +BABEL_OP2_202_43323_20131129_040506_outLine +BABEL_OP2_202_43784_20131003_005323_inLine +BABEL_OP2_202_44029_20140224_224653_outLine +BABEL_OP2_202_44290_20131013_001608_inLine +BABEL_OP2_202_44290_20131013_001608_outLine +BABEL_OP2_202_44446_20131130_014441_inLine +BABEL_OP2_202_44868_20130806_210559_inLine +BABEL_OP2_202_45486_20140224_210341_outLine +BABEL_OP2_202_45559_20131016_215852_inLine +BABEL_OP2_202_45559_20131016_215852_outLine +BABEL_OP2_202_46268_20130107_230757_inLine +BABEL_OP2_202_46389_20131216_020541_inLine +BABEL_OP2_202_46389_20131216_020541_outLine +BABEL_OP2_202_46763_20131009_191902_inLine +BABEL_OP2_202_46763_20131009_191902_outLine +BABEL_OP2_202_46808_20140224_220014_inLine +BABEL_OP2_202_46808_20140224_220014_outLine +BABEL_OP2_202_47270_20140225_015137_inLine +BABEL_OP2_202_47487_20140224_232210_outLine +BABEL_OP2_202_47637_20140224_183628_inLine +BABEL_OP2_202_47637_20140224_183628_outLine +BABEL_OP2_202_47882_20131128_015709_outLine +BABEL_OP2_202_47923_20131129_211629_inLine +BABEL_OP2_202_47923_20131129_211629_outLine +BABEL_OP2_202_48200_20131128_211840_inLine +BABEL_OP2_202_49001_20131003_151102_inLine +BABEL_OP2_202_49001_20131003_151102_outLine +BABEL_OP2_202_49027_20131012_171107_outLine +BABEL_OP2_202_49118_20140223_214255_inLine +BABEL_OP2_202_49118_20140223_214255_outLine +BABEL_OP2_202_49216_20121227_221242_inLine +BABEL_OP2_202_49216_20121227_233227_inLine +BABEL_OP2_202_49330_20131130_015311_inLine +BABEL_OP2_202_49330_20131130_015311_outLine +BABEL_OP2_202_49502_20121227_234825_outLine +BABEL_OP2_202_49812_20140224_225827_inLine +BABEL_OP2_202_49812_20140224_225827_outLine +BABEL_OP2_202_49870_20140224_214828_inLine +BABEL_OP2_202_49870_20140224_214828_outLine +BABEL_OP2_202_49912_20140224_190150_inLine +BABEL_OP2_202_49912_20140224_190150_outLine +BABEL_OP2_202_50549_20140122_212548_inLine +BABEL_OP2_202_50549_20140122_212548_outLine +BABEL_OP2_202_50940_20131128_044329_outLine +BABEL_OP2_202_51015_20130919_230711_outLine +BABEL_OP2_202_51693_20140122_221655_outLine +BABEL_OP2_202_51955_20131004_201017_inLine +BABEL_OP2_202_51955_20131004_201017_outLine +BABEL_OP2_202_52322_20140122_020749_outLine +BABEL_OP2_202_52322_20140122_022032_outLine +BABEL_OP2_202_52438_20131002_193009_inLine +BABEL_OP2_202_52499_20131216_043452_inLine +BABEL_OP2_202_52694_20140123_012847_inLine +BABEL_OP2_202_52694_20140123_012847_outLine +BABEL_OP2_202_52803_20131129_235234_inLine +BABEL_OP2_202_52803_20131129_235234_outLine +BABEL_OP2_202_53010_20131129_193814_inLine +BABEL_OP2_202_53010_20131129_193814_outLine +BABEL_OP2_202_53144_20140123_025818_inLine +BABEL_OP2_202_53144_20140123_025818_outLine +BABEL_OP2_202_53415_20131101_205155_inLine +BABEL_OP2_202_54040_20140124_235842_inLine +BABEL_OP2_202_54040_20140124_235842_outLine +BABEL_OP2_202_54040_20140125_000629_inLine +BABEL_OP2_202_54040_20140125_000629_outLine +BABEL_OP2_202_54104_20130108_184048_inLine +BABEL_OP2_202_54390_20140123_021824_inLine +BABEL_OP2_202_54477_20140121_195829_inLine +BABEL_OP2_202_54477_20140121_195829_outLine +BABEL_OP2_202_54567_20130215_201456_outLine +BABEL_OP2_202_54697_20140124_011928_inLine +BABEL_OP2_202_54827_20131012_020910_inLine +BABEL_OP2_202_55136_20131120_230735_inLine +BABEL_OP2_202_55136_20131120_230735_outLine +BABEL_OP2_202_55136_20131120_231613_inLine +BABEL_OP2_202_55136_20131120_231613_outLine +BABEL_OP2_202_55267_20130429_211135_inLine +BABEL_OP2_202_55968_20121221_210945_outLine +BABEL_OP2_202_55968_20121222_190905_outLine +BABEL_OP2_202_56019_20140122_235300_inLine +BABEL_OP2_202_56019_20140122_235300_outLine +BABEL_OP2_202_56057_20131217_015911_outLine +BABEL_OP2_202_56076_20131012_005019_inLine +BABEL_OP2_202_56076_20131012_005019_outLine +BABEL_OP2_202_56307_20130925_024659_outLine +BABEL_OP2_202_56326_20131129_235243_inLine +BABEL_OP2_202_56427_20140115_215916_inLine +BABEL_OP2_202_56427_20140115_215916_outLine +BABEL_OP2_202_56468_20140125_021443_inLine +BABEL_OP2_202_56468_20140125_021443_outLine +BABEL_OP2_202_56684_20140122_005322_inLine +BABEL_OP2_202_56684_20140122_005322_outLine +BABEL_OP2_202_56925_20131215_232111_outLine +BABEL_OP2_202_57035_20131218_025223_inLine +BABEL_OP2_202_57035_20131218_025223_outLine +BABEL_OP2_202_57116_20121220_184622_outLine +BABEL_OP2_202_57219_20140128_183934_inLine +BABEL_OP2_202_57219_20140128_183934_outLine +BABEL_OP2_202_57464_20131012_002232_inLine +BABEL_OP2_202_57566_20140130_012514_inLine +BABEL_OP2_202_57566_20140130_012514_outLine +BABEL_OP2_202_57654_20140115_204117_inLine +BABEL_OP2_202_57654_20140115_204117_outLine +BABEL_OP2_202_57982_20131010_025934_outLine +BABEL_OP2_202_58026_20140129_211720_inLine +BABEL_OP2_202_58026_20140129_211720_outLine +BABEL_OP2_202_58585_20140201_230856_inLine +BABEL_OP2_202_58915_20140129_222116_outLine +BABEL_OP2_202_59078_20130911_214801_outLine +BABEL_OP2_202_59163_20131212_044108_outLine +BABEL_OP2_202_59291_20140201_004317_inLine +BABEL_OP2_202_59301_20140125_202811_inLine +BABEL_OP2_202_59301_20140125_202811_outLine +BABEL_OP2_202_59864_20140131_204919_inLine +BABEL_OP2_202_59993_20140114_021749_inLine +BABEL_OP2_202_59993_20140114_021749_outLine +BABEL_OP2_202_60299_20140131_075856_inLine +BABEL_OP2_202_60299_20140131_075856_outLine +BABEL_OP2_202_60307_20140131_184522_inLine +BABEL_OP2_202_60307_20140131_184522_outLine +BABEL_OP2_202_60397_20131202_205856_inLine +BABEL_OP2_202_60397_20131202_205856_outLine +BABEL_OP2_202_60458_20140130_223733_inLine +BABEL_OP2_202_60458_20140130_223733_outLine +BABEL_OP2_202_60706_20121228_005527_outLine +BABEL_OP2_202_61190_20131002_225904_outLine +BABEL_OP2_202_61219_20140120_234802_inLine +BABEL_OP2_202_61219_20140120_234802_outLine +BABEL_OP2_202_61684_20131130_211629_inLine +BABEL_OP2_202_61731_20131003_043735_outLine +BABEL_OP2_202_61971_20131010_034223_inLine +BABEL_OP2_202_62286_20130221_203131_outLine +BABEL_OP2_202_62362_20140201_220857_inLine +BABEL_OP2_202_62362_20140201_220857_outLine +BABEL_OP2_202_62471_20131203_193149_inLine +BABEL_OP2_202_62471_20131203_193149_outLine +BABEL_OP2_202_62491_20140131_223205_inLine +BABEL_OP2_202_62491_20140131_223205_outLine +BABEL_OP2_202_62656_20131203_001914_inLine +BABEL_OP2_202_62656_20131203_001914_outLine +BABEL_OP2_202_62724_20131017_221403_inLine +BABEL_OP2_202_63081_20121228_003935_inLine +BABEL_OP2_202_63094_20131016_235150_inLine +BABEL_OP2_202_63094_20131016_235150_outLine +BABEL_OP2_202_63265_20131216_224818_outLine +BABEL_OP2_202_63265_20131216_232337_outLine +BABEL_OP2_202_63334_20131128_190201_outLine +BABEL_OP2_202_63445_20121229_001208_outLine +BABEL_OP2_202_63671_20131213_034007_inLine +BABEL_OP2_202_63671_20131213_034007_outLine +BABEL_OP2_202_63766_20131217_022038_outLine +BABEL_OP2_202_64014_20140130_213507_inLine +BABEL_OP2_202_64014_20140130_213507_outLine +BABEL_OP2_202_64469_20131216_032049_outLine +BABEL_OP2_202_64635_20131129_013800_inLine +BABEL_OP2_202_64635_20131129_013800_outLine +BABEL_OP2_202_65252_20131016_183445_inLine +BABEL_OP2_202_65252_20131016_183445_outLine +BABEL_OP2_202_65268_20131220_020108_inLine +BABEL_OP2_202_65268_20131220_020108_outLine +BABEL_OP2_202_65268_20131220_021438_inLine +BABEL_OP2_202_65268_20131220_021438_outLine +BABEL_OP2_202_65367_20140129_234024_inLine +BABEL_OP2_202_65367_20140129_234024_outLine +BABEL_OP2_202_65370_20140201_195241_inLine +BABEL_OP2_202_65370_20140201_195241_outLine +BABEL_OP2_202_67066_20140130_212058_inLine +BABEL_OP2_202_67066_20140130_212058_outLine +BABEL_OP2_202_67213_20140130_185616_outLine +BABEL_OP2_202_67304_20140201_230632_inLine +BABEL_OP2_202_67304_20140201_230632_outLine +BABEL_OP2_202_67592_20140129_234213_inLine +BABEL_OP2_202_67659_20140115_234146_outLine +BABEL_OP2_202_67773_20140129_215114_inLine +BABEL_OP2_202_67773_20140129_215114_outLine +BABEL_OP2_202_67894_20140130_232658_inLine +BABEL_OP2_202_67894_20140130_232658_outLine +BABEL_OP2_202_68068_20130802_203147_outLine +BABEL_OP2_202_68385_20130208_214719_outLine +BABEL_OP2_202_68668_20140131_221117_inLine +BABEL_OP2_202_68668_20140131_221117_outLine +BABEL_OP2_202_68823_20131215_001456_outLine +BABEL_OP2_202_68910_20140127_211718_inLine +BABEL_OP2_202_68910_20140127_211718_outLine +BABEL_OP2_202_69096_20140312_022044_inLine +BABEL_OP2_202_69096_20140312_022044_outLine +BABEL_OP2_202_69153_20131128_194250_outLine +BABEL_OP2_202_69474_20130731_011215_inLine +BABEL_OP2_202_69474_20130731_012232_inLine +BABEL_OP2_202_69746_20140125_215609_inLine +BABEL_OP2_202_69746_20140125_215609_outLine +BABEL_OP2_202_69982_20140131_212729_inLine +BABEL_OP2_202_69982_20140131_212729_outLine +BABEL_OP2_202_69982_20140131_213451_inLine +BABEL_OP2_202_69982_20140131_213451_outLine +BABEL_OP2_202_70121_20130308_180634_inLine +BABEL_OP2_202_70221_20130429_160925_outLine +BABEL_OP2_202_70293_20131218_043924_inLine +BABEL_OP2_202_70293_20131218_043924_outLine +BABEL_OP2_202_70343_20130730_000937_outLine +BABEL_OP2_202_70452_20140115_230438_inLine +BABEL_OP2_202_70460_20131101_001026_inLine +BABEL_OP2_202_70460_20131101_001026_outLine +BABEL_OP2_202_70726_20131216_011153_inLine +BABEL_OP2_202_70726_20131216_011153_outLine +BABEL_OP2_202_71121_20131208_073117_outLine +BABEL_OP2_202_71404_20131004_002732_inLine +BABEL_OP2_202_72073_20131213_042304_inLine +BABEL_OP2_202_72349_20131128_183232_inLine +BABEL_OP2_202_72349_20131128_185336_inLine +BABEL_OP2_202_72844_20121222_003955_outLine +BABEL_OP2_202_73119_20140116_003550_outLine +BABEL_OP2_202_73119_20140120_205305_outLine +BABEL_OP2_202_73299_20131017_003841_outLine +BABEL_OP2_202_73909_20140303_224953_outLine +BABEL_OP2_202_74799_20130911_223303_outLine +BABEL_OP2_202_75342_20130731_021621_outLine +BABEL_OP2_202_75505_20121222_213031_inLine +BABEL_OP2_202_76437_20121219_005936_outLine +BABEL_OP2_202_76730_20131217_052627_outLine +BABEL_OP2_202_77112_20131003_031801_inLine +BABEL_OP2_202_77391_20140121_013824_inLine +BABEL_OP2_202_77391_20140121_013824_outLine +BABEL_OP2_202_77567_20121228_181102_inLine +BABEL_OP2_202_77803_20121222_202737_inLine +BABEL_OP2_202_77803_20121222_215157_inLine +BABEL_OP2_202_77904_20131221_020558_inLine +BABEL_OP2_202_77904_20131221_020558_outLine +BABEL_OP2_202_77909_20140126_064115_inLine +BABEL_OP2_202_77909_20140126_064115_outLine +BABEL_OP2_202_77921_20131127_232806_inLine +BABEL_OP2_202_77921_20131127_232806_outLine +BABEL_OP2_202_77921_20131127_234200_inLine +BABEL_OP2_202_77921_20131127_234200_outLine +BABEL_OP2_202_78016_20140226_215807_outLine +BABEL_OP2_202_78398_20131004_061913_outLine +BABEL_OP2_202_78511_20140226_231944_inLine +BABEL_OP2_202_78630_20140113_211239_inLine +BABEL_OP2_202_78630_20140113_211239_outLine +BABEL_OP2_202_78630_20140113_212040_inLine +BABEL_OP2_202_78630_20140113_212040_outLine +BABEL_OP2_202_78829_20131126_221958_outLine +BABEL_OP2_202_78833_20131119_205910_inLine +BABEL_OP2_202_78943_20140121_025623_inLine +BABEL_OP2_202_78943_20140121_025623_outLine +BABEL_OP2_202_78958_20131207_004716_inLine +BABEL_OP2_202_78976_20140115_235057_inLine +BABEL_OP2_202_78976_20140115_235057_outLine +BABEL_OP2_202_79139_20130928_173217_inLine +BABEL_OP2_202_79190_20130927_184727_outLine +BABEL_OP2_202_79451_20131004_055308_outLine +BABEL_OP2_202_80439_20131001_190050_outLine +BABEL_OP2_202_80559_20131007_063834_outLine +BABEL_OP2_202_80655_20131016_212951_inLine +BABEL_OP2_202_81213_20131004_205633_inLine +BABEL_OP2_202_81213_20131004_210252_inLine +BABEL_OP2_202_81287_20130731_193240_outLine +BABEL_OP2_202_81287_20130731_195716_outLine +BABEL_OP2_202_81392_20130728_234236_inLine +BABEL_OP2_202_81392_20130729_021638_inLine +BABEL_OP2_202_81404_20130314_220702_inLine +BABEL_OP2_202_81769_20131127_214614_outLine +BABEL_OP2_202_82742_20131029_223343_inLine +BABEL_OP2_202_83545_20131017_183706_inLine +BABEL_OP2_202_83783_20130911_225559_outLine +BABEL_OP2_202_84055_20131208_040856_outLine +BABEL_OP2_202_84430_20131217_023038_inLine +BABEL_OP2_202_84430_20131217_024752_inLine +BABEL_OP2_202_84467_20131126_224903_inLine +BABEL_OP2_202_84467_20131126_224903_outLine +BABEL_OP2_202_85010_20131201_004538_outLine +BABEL_OP2_202_85340_20131002_202217_outLine +BABEL_OP2_202_85647_20130416_223722_inLine +BABEL_OP2_202_86191_20140121_192414_inLine +BABEL_OP2_202_86191_20140121_192414_outLine +BABEL_OP2_202_86321_20131018_003746_inLine +BABEL_OP2_202_86676_20130802_223159_outLine +BABEL_OP2_202_86676_20130802_225309_outLine +BABEL_OP2_202_86722_20131001_193946_inLine +BABEL_OP2_202_86845_20131126_204553_outLine +BABEL_OP2_202_86845_20131126_210711_outLine +BABEL_OP2_202_86878_20131220_215841_inLine +BABEL_OP2_202_87693_20131004_231549_outLine +BABEL_OP2_202_87884_20131017_214906_inLine +BABEL_OP2_202_88982_20130930_042104_outLine +BABEL_OP2_202_89516_20131208_025053_inLine +BABEL_OP2_202_89516_20131208_025053_outLine +BABEL_OP2_202_89943_20131003_153927_outLine +BABEL_OP2_202_90318_20131215_222302_outLine +BABEL_OP2_202_91266_20131127_021953_inLine +BABEL_OP2_202_91266_20131127_021953_outLine +BABEL_OP2_202_92060_20131011_235309_outLine +BABEL_OP2_202_92176_20130319_022508_inLine +BABEL_OP2_202_92886_20131004_210342_inLine +BABEL_OP2_202_93443_20131129_212311_inLine +BABEL_OP2_202_93443_20131129_212311_outLine +BABEL_OP2_202_94025_20130801_210343_outLine +BABEL_OP2_202_94253_20131004_010116_outLine +BABEL_OP2_202_94316_20131017_194727_inLine +BABEL_OP2_202_94891_20131126_173659_inLine +BABEL_OP2_202_94891_20131126_173659_outLine +BABEL_OP2_202_95399_20130211_220740_inLine +BABEL_OP2_202_95399_20130211_230605_inLine +BABEL_OP2_202_95399_20130211_232555_inLine +BABEL_OP2_202_95937_20131217_005609_inLine +BABEL_OP2_202_96504_20140125_035346_inLine +BABEL_OP2_202_96504_20140125_035346_outLine +BABEL_OP2_202_96525_20131018_225425_inLine +BABEL_OP2_202_96525_20131018_225425_outLine +BABEL_OP2_202_96525_20131018_230802_inLine +BABEL_OP2_202_96525_20131018_230802_outLine +BABEL_OP2_202_97063_20131128_231626_outLine +BABEL_OP2_202_97461_20130928_010334_outLine +BABEL_OP2_202_97836_20131009_221934_outLine +BABEL_OP2_202_97925_20131203_210706_outLine +BABEL_OP2_202_98678_20131010_023001_outLine +BABEL_OP2_202_99732_20131126_215915_inLine +BABEL_OP2_202_99732_20131126_215915_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/dev.list b/egs/babel/s5d/conf/lists/203-lao/dev.list new file mode 100644 index 00000000000..3a31f075909 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/dev.list @@ -0,0 +1,131 @@ +BABEL_OP1_203_10188_20130220_225432_inLine +BABEL_OP1_203_10188_20130220_225432_outLine +BABEL_OP1_203_10188_20130220_230849_inLine +BABEL_OP1_203_10188_20130220_230849_outLine +BABEL_OP1_203_10319_20130314_213741_inLine +BABEL_OP1_203_10319_20130314_213741_outLine +BABEL_OP1_203_10319_20130314_214749_inLine +BABEL_OP1_203_10319_20130314_214749_outLine +BABEL_OP1_203_14158_20130409_181505_inLine +BABEL_OP1_203_14158_20130409_181505_outLine +BABEL_OP1_203_14158_20130409_182411_inLine +BABEL_OP1_203_14158_20130409_182411_outLine +BABEL_OP1_203_14158_20130409_183108_inLine +BABEL_OP1_203_14158_20130409_183108_outLine +BABEL_OP1_203_14228_20130405_154037_inLine +BABEL_OP1_203_14228_20130405_154037_outLine +BABEL_OP1_203_14228_20130405_163836_inLine +BABEL_OP1_203_14228_20130405_163836_outLine +BABEL_OP1_203_14440_20130509_205709_inLine +BABEL_OP1_203_14440_20130509_205709_outLine +BABEL_OP1_203_15042_20130727_173946_inLine +BABEL_OP1_203_15042_20130727_173946_outLine +BABEL_OP1_203_16800_20130421_140442_inLine +BABEL_OP1_203_16800_20130421_140442_outLine +BABEL_OP1_203_17127_20130421_131732_inLine +BABEL_OP1_203_17127_20130421_131732_outLine +BABEL_OP1_203_17127_20130421_132248_inLine +BABEL_OP1_203_17127_20130421_132248_outLine +BABEL_OP1_203_17573_20130331_192906_inLine +BABEL_OP1_203_17573_20130331_192906_outLine +BABEL_OP1_203_17890_20130329_160302_inLine +BABEL_OP1_203_17890_20130329_160302_outLine +BABEL_OP1_203_19621_20130330_192114_inLine +BABEL_OP1_203_19621_20130330_192114_outLine +BABEL_OP1_203_19663_20130322_163118_inLine +BABEL_OP1_203_19663_20130322_163118_outLine +BABEL_OP1_203_19672_20130401_204303_inLine +BABEL_OP1_203_19672_20130401_204303_outLine +BABEL_OP1_203_21581_20130327_180143_inLine +BABEL_OP1_203_21581_20130327_180143_outLine +BABEL_OP1_203_22170_20130424_213413_inLine +BABEL_OP1_203_22170_20130424_213413_outLine +BABEL_OP1_203_22216_20130307_190055_inLine +BABEL_OP1_203_22216_20130307_190055_outLine +BABEL_OP1_203_22466_20130218_191925_inLine +BABEL_OP1_203_22466_20130218_191925_outLine +BABEL_OP1_203_23151_20130408_192838_inLine +BABEL_OP1_203_23151_20130408_192838_outLine +BABEL_OP1_203_23260_20130726_170748_inLine +BABEL_OP1_203_23260_20130726_170748_outLine +BABEL_OP1_203_23681_20130730_162132_inLine +BABEL_OP1_203_23681_20130730_162132_outLine +BABEL_OP1_203_23995_20130731_195202_inLine +BABEL_OP1_203_23995_20130731_195202_outLine +BABEL_OP1_203_25012_20130814_141020_inLine +BABEL_OP1_203_25012_20130814_141020_outLine +BABEL_OP1_203_26206_20130328_193450_inLine +BABEL_OP1_203_26206_20130328_193450_outLine +BABEL_OP1_203_29208_20130320_141202_inLine +BABEL_OP1_203_29208_20130320_141202_outLine +BABEL_OP1_203_29765_20130426_185032_inLine +BABEL_OP1_203_29765_20130426_185032_outLine +BABEL_OP1_203_31484_20130404_184608_inLine +BABEL_OP1_203_31484_20130404_184608_outLine +BABEL_OP1_203_32861_20130424_133938_inLine +BABEL_OP1_203_32861_20130424_133938_outLine +BABEL_OP1_203_32959_20130406_145730_inLine +BABEL_OP1_203_32959_20130406_145730_outLine +BABEL_OP1_203_37499_20130512_203148_inLine +BABEL_OP1_203_37499_20130512_203148_outLine +BABEL_OP1_203_39744_20130307_140614_inLine +BABEL_OP1_203_39744_20130307_140614_outLine +BABEL_OP1_203_41400_20130728_194416_inLine +BABEL_OP1_203_41400_20130728_194416_outLine +BABEL_OP1_203_41920_20130310_185621_inLine +BABEL_OP1_203_41920_20130310_185621_outLine +BABEL_OP1_203_48789_20130324_180810_inLine +BABEL_OP1_203_48789_20130324_180810_outLine +BABEL_OP1_203_50565_20130307_164552_inLine +BABEL_OP1_203_50565_20130307_164552_outLine +BABEL_OP1_203_52025_20130306_143713_inLine +BABEL_OP1_203_52025_20130306_143713_outLine +BABEL_OP1_203_52725_20130410_214000_inLine +BABEL_OP1_203_52725_20130410_214000_outLine +BABEL_OP1_203_52932_20130314_203215_inLine +BABEL_OP1_203_52932_20130314_203215_outLine +BABEL_OP1_203_56090_20130304_141755_inLine +BABEL_OP1_203_56090_20130304_141755_outLine +BABEL_OP1_203_56429_20130313_200952_inLine +BABEL_OP1_203_56429_20130313_200952_outLine +BABEL_OP1_203_56743_20130319_152822_inLine +BABEL_OP1_203_56743_20130319_152822_outLine +BABEL_OP1_203_57609_20130330_155903_inLine +BABEL_OP1_203_57609_20130330_155903_outLine +BABEL_OP1_203_58717_20130505_152817_inLine +BABEL_OP1_203_58717_20130505_152817_outLine +BABEL_OP1_203_58734_20130309_204100_inLine +BABEL_OP1_203_60538_20130311_163456_inLine +BABEL_OP1_203_60538_20130311_163456_outLine +BABEL_OP1_203_60836_20130314_211014_inLine +BABEL_OP1_203_60836_20130314_211014_outLine +BABEL_OP1_203_61963_20130718_155107_inLine +BABEL_OP1_203_61963_20130718_155107_outLine +BABEL_OP1_203_62155_20130426_173905_inLine +BABEL_OP1_203_62155_20130426_173905_outLine +BABEL_OP1_203_65252_20130731_170815_inLine +BABEL_OP1_203_65252_20130731_170815_outLine +BABEL_OP1_203_66026_20130331_154806_inLine +BABEL_OP1_203_66026_20130331_154806_outLine +BABEL_OP1_203_67842_20130313_142229_inLine +BABEL_OP1_203_67842_20130313_142229_outLine +BABEL_OP1_203_72654_20130323_163248_inLine +BABEL_OP1_203_72654_20130323_163248_outLine +BABEL_OP1_203_72733_20130731_235502_inLine +BABEL_OP1_203_72733_20130731_235502_outLine +BABEL_OP1_203_79190_20130714_135011_inLine +BABEL_OP1_203_79190_20130714_135011_outLine +BABEL_OP1_203_84370_20130506_190748_inLine +BABEL_OP1_203_84370_20130506_190748_outLine +BABEL_OP1_203_88601_20130323_155050_inLine +BABEL_OP1_203_88601_20130323_155050_outLine +BABEL_OP1_203_90417_20130507_172057_inLine +BABEL_OP1_203_90417_20130507_172057_outLine +BABEL_OP1_203_93475_20130312_144135_inLine +BABEL_OP1_203_93475_20130312_144135_outLine +BABEL_OP1_203_95467_20130506_155929_inLine +BABEL_OP1_203_95467_20130506_155929_outLine +BABEL_OP1_203_96504_20130319_161923_inLine +BABEL_OP1_203_96504_20130319_161923_outLine +BABEL_OP1_203_99732_20130406_175258_inLine +BABEL_OP1_203_99732_20130406_175258_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/eval.list b/egs/babel/s5d/conf/lists/203-lao/eval.list new file mode 100644 index 00000000000..f231ad9d910 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/eval.list @@ -0,0 +1,192 @@ +BABEL_OP1_203_12321_20130406_165656_inLine +BABEL_OP1_203_12321_20130406_165656_outLine +BABEL_OP1_203_12916_20130309_200304_inLine +BABEL_OP1_203_12916_20130309_200304_outLine +BABEL_OP1_203_13040_20130312_181212_inLine +BABEL_OP1_203_13040_20130312_181212_outLine +BABEL_OP1_203_13427_20130428_153335_inLine +BABEL_OP1_203_13427_20130428_153335_outLine +BABEL_OP1_203_14537_20130726_183519_inLine +BABEL_OP1_203_14537_20130726_183519_outLine +BABEL_OP1_203_15262_20130311_163214_inLine +BABEL_OP1_203_15262_20130311_163214_outLine +BABEL_OP1_203_15848_20130304_193558_inLine +BABEL_OP1_203_15848_20130304_193558_outLine +BABEL_OP1_203_16056_20130309_212127_inLine +BABEL_OP1_203_16056_20130309_212127_outLine +BABEL_OP1_203_17165_20130323_193349_inLine +BABEL_OP1_203_17165_20130323_193349_outLine +BABEL_OP1_203_17420_20130410_223425_inLine +BABEL_OP1_203_17420_20130410_223425_outLine +BABEL_OP1_203_18863_20130423_201154_inLine +BABEL_OP1_203_18863_20130423_201154_outLine +BABEL_OP1_203_19545_20130328_181847_inLine +BABEL_OP1_203_19545_20130328_181847_outLine +BABEL_OP1_203_19767_20130729_162359_inLine +BABEL_OP1_203_20721_20130805_184106_inLine +BABEL_OP1_203_20721_20130805_184106_outLine +BABEL_OP1_203_20738_20130501_144021_inLine +BABEL_OP1_203_20738_20130501_144021_outLine +BABEL_OP1_203_20800_20130312_182739_inLine +BABEL_OP1_203_20800_20130312_182739_outLine +BABEL_OP1_203_20800_20130312_190729_inLine +BABEL_OP1_203_20800_20130312_190729_outLine +BABEL_OP1_203_21159_20130428_145928_inLine +BABEL_OP1_203_21159_20130428_145928_outLine +BABEL_OP1_203_21393_20130802_160502_inLine +BABEL_OP1_203_21393_20130802_160502_outLine +BABEL_OP1_203_21794_20130323_191728_inLine +BABEL_OP1_203_21794_20130323_191728_outLine +BABEL_OP1_203_22641_20130310_194352_inLine +BABEL_OP1_203_22641_20130310_194352_outLine +BABEL_OP1_203_23395_20130423_140708_inLine +BABEL_OP1_203_23395_20130423_140708_outLine +BABEL_OP1_203_23731_20130331_144735_inLine +BABEL_OP1_203_23731_20130331_144735_outLine +BABEL_OP1_203_24924_20130509_190210_inLine +BABEL_OP1_203_24924_20130509_190210_outLine +BABEL_OP1_203_27189_20130812_203016_inLine +BABEL_OP1_203_27189_20130812_203016_outLine +BABEL_OP1_203_28422_20130401_201546_inLine +BABEL_OP1_203_28422_20130401_201546_outLine +BABEL_OP1_203_28538_20130323_211503_inLine +BABEL_OP1_203_28538_20130323_211503_outLine +BABEL_OP1_203_28538_20130323_212946_inLine +BABEL_OP1_203_28538_20130323_212946_outLine +BABEL_OP1_203_29685_20130319_225955_inLine +BABEL_OP1_203_29685_20130319_225955_outLine +BABEL_OP1_203_30250_20130307_153941_inLine +BABEL_OP1_203_30250_20130307_153941_outLine +BABEL_OP1_203_32832_20130410_151037_inLine +BABEL_OP1_203_32832_20130410_151037_outLine +BABEL_OP1_203_32872_20130715_135603_inLine +BABEL_OP1_203_32872_20130715_135603_outLine +BABEL_OP1_203_33216_20130427_175935_inLine +BABEL_OP1_203_33216_20130427_175935_outLine +BABEL_OP1_203_33216_20130427_182630_inLine +BABEL_OP1_203_33216_20130427_182630_outLine +BABEL_OP1_203_33424_20130728_164533_inLine +BABEL_OP1_203_33424_20130728_164533_outLine +BABEL_OP1_203_40624_20130812_181331_inLine +BABEL_OP1_203_40624_20130812_181331_outLine +BABEL_OP1_203_41038_20130629_153757_inLine +BABEL_OP1_203_41038_20130629_153757_outLine +BABEL_OP1_203_41109_20130410_205358_inLine +BABEL_OP1_203_41109_20130410_205358_outLine +BABEL_OP1_203_41109_20130410_210805_inLine +BABEL_OP1_203_41109_20130410_210805_outLine +BABEL_OP1_203_41233_20130801_201451_inLine +BABEL_OP1_203_41233_20130801_201451_outLine +BABEL_OP1_203_41890_20130731_203018_inLine +BABEL_OP1_203_41890_20130731_203018_outLine +BABEL_OP1_203_42231_20130330_183550_inLine +BABEL_OP1_203_42231_20130330_183550_outLine +BABEL_OP1_203_43789_20130324_223656_inLine +BABEL_OP1_203_43789_20130324_223656_outLine +BABEL_OP1_203_44255_20130410_165447_inLine +BABEL_OP1_203_44255_20130410_165447_outLine +BABEL_OP1_203_44420_20130320_170344_inLine +BABEL_OP1_203_44420_20130320_170344_outLine +BABEL_OP1_203_45140_20130725_155519_inLine +BABEL_OP1_203_45140_20130725_155519_outLine +BABEL_OP1_203_45770_20130309_142629_inLine +BABEL_OP1_203_45770_20130309_142629_outLine +BABEL_OP1_203_45777_20130324_154017_inLine +BABEL_OP1_203_45777_20130324_154017_outLine +BABEL_OP1_203_45908_20130728_202553_inLine +BABEL_OP1_203_45908_20130728_202553_outLine +BABEL_OP1_203_46333_20130309_224915_inLine +BABEL_OP1_203_46333_20130309_224915_outLine +BABEL_OP1_203_46905_20130812_144116_inLine +BABEL_OP1_203_46905_20130812_144116_outLine +BABEL_OP1_203_47959_20130323_214413_inLine +BABEL_OP1_203_47959_20130323_214413_outLine +BABEL_OP1_203_48399_20130309_162921_inLine +BABEL_OP1_203_48399_20130309_162921_outLine +BABEL_OP1_203_48399_20130309_164247_inLine +BABEL_OP1_203_48399_20130309_164247_outLine +BABEL_OP1_203_49870_20130813_180458_inLine +BABEL_OP1_203_49870_20130813_180458_outLine +BABEL_OP1_203_50962_20130326_161422_inLine +BABEL_OP1_203_50962_20130326_161422_outLine +BABEL_OP1_203_53072_20130714_171830_inLine +BABEL_OP1_203_53072_20130714_171830_outLine +BABEL_OP1_203_56019_20130512_160906_inLine +BABEL_OP1_203_56019_20130512_160906_outLine +BABEL_OP1_203_56523_20130319_184906_inLine +BABEL_OP1_203_56523_20130319_184906_outLine +BABEL_OP1_203_57650_20130411_204456_inLine +BABEL_OP1_203_57650_20130411_204456_outLine +BABEL_OP1_203_57922_20130329_164830_inLine +BABEL_OP1_203_57922_20130329_164830_outLine +BABEL_OP1_203_59898_20130309_161351_inLine +BABEL_OP1_203_59898_20130309_161351_outLine +BABEL_OP1_203_62434_20130309_161135_inLine +BABEL_OP1_203_62434_20130309_161135_outLine +BABEL_OP1_203_65339_20130813_152743_inLine +BABEL_OP1_203_65339_20130813_152743_outLine +BABEL_OP1_203_67085_20130803_171200_inLine +BABEL_OP1_203_67085_20130803_171200_outLine +BABEL_OP1_203_67373_20130314_214840_inLine +BABEL_OP1_203_67373_20130314_214840_outLine +BABEL_OP1_203_70726_20130812_194620_inLine +BABEL_OP1_203_70726_20130812_194620_outLine +BABEL_OP1_203_71282_20130425_151939_inLine +BABEL_OP1_203_71282_20130425_151939_outLine +BABEL_OP1_203_71333_20130314_164236_inLine +BABEL_OP1_203_71333_20130314_164236_outLine +BABEL_OP1_203_72073_20130813_163908_inLine +BABEL_OP1_203_72073_20130813_163908_outLine +BABEL_OP1_203_73119_20130318_205141_inLine +BABEL_OP1_203_73119_20130318_205141_outLine +BABEL_OP1_203_73119_20130318_210234_inLine +BABEL_OP1_203_73119_20130318_210234_outLine +BABEL_OP1_203_73757_20130327_154312_inLine +BABEL_OP1_203_73757_20130327_154312_outLine +BABEL_OP1_203_73837_20130320_223755_inLine +BABEL_OP1_203_73837_20130320_223755_outLine +BABEL_OP1_203_74111_20130720_165204_inLine +BABEL_OP1_203_74111_20130720_165204_outLine +BABEL_OP1_203_74641_20130329_192047_inLine +BABEL_OP1_203_74641_20130329_192047_outLine +BABEL_OP1_203_75359_20130719_144824_inLine +BABEL_OP1_203_75359_20130719_144824_outLine +BABEL_OP1_203_77225_20130813_222437_inLine +BABEL_OP1_203_77225_20130813_222437_outLine +BABEL_OP1_203_82904_20130726_192222_inLine +BABEL_OP1_203_82904_20130726_192222_outLine +BABEL_OP1_203_83771_20130729_194808_inLine +BABEL_OP1_203_88394_20130813_004013_inLine +BABEL_OP1_203_88394_20130813_004013_outLine +BABEL_OP1_203_88550_20130714_194639_inLine +BABEL_OP1_203_88550_20130714_194639_outLine +BABEL_OP1_203_88686_20130307_221522_inLine +BABEL_OP1_203_88686_20130307_221522_outLine +BABEL_OP1_203_89372_20130306_162204_inLine +BABEL_OP1_203_89372_20130306_162204_outLine +BABEL_OP1_203_89794_20130714_144126_inLine +BABEL_OP1_203_89794_20130714_144126_outLine +BABEL_OP1_203_91930_20130424_162834_inLine +BABEL_OP1_203_91930_20130424_162834_outLine +BABEL_OP1_203_93861_20130327_171912_inLine +BABEL_OP1_203_93861_20130327_171912_outLine +BABEL_OP1_203_94002_20130324_154206_inLine +BABEL_OP1_203_94002_20130324_154206_outLine +BABEL_OP1_203_94237_20130801_180053_inLine +BABEL_OP1_203_94237_20130801_180053_outLine +BABEL_OP1_203_96088_20130714_191026_inLine +BABEL_OP1_203_96088_20130714_191026_outLine +BABEL_OP1_203_96525_20130713_172412_inLine +BABEL_OP1_203_96525_20130713_172412_outLine +BABEL_OP1_203_97097_20130721_180647_inLine +BABEL_OP1_203_97097_20130721_180647_outLine +BABEL_OP1_203_97570_20130501_151019_inLine +BABEL_OP1_203_97570_20130501_151019_outLine +BABEL_OP1_203_97911_20130427_144233_inLine +BABEL_OP1_203_97911_20130427_144233_outLine +BABEL_OP1_203_98489_20130314_215814_inLine +BABEL_OP1_203_98489_20130314_215814_outLine +BABEL_OP1_203_98580_20130324_195754_inLine +BABEL_OP1_203_98580_20130324_195754_outLine +BABEL_OP1_203_99264_20130726_161527_inLine +BABEL_OP1_203_99264_20130726_161527_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/evalpart1.list b/egs/babel/s5d/conf/lists/203-lao/evalpart1.list new file mode 100644 index 00000000000..a4ebcdd2d76 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/evalpart1.list @@ -0,0 +1,70 @@ +BABEL_OP1_203_18863_20130423_201154_inLine +BABEL_OP1_203_18863_20130423_201154_outLine +BABEL_OP1_203_19545_20130328_181847_inLine +BABEL_OP1_203_19545_20130328_181847_outLine +BABEL_OP1_203_20738_20130501_144021_inLine +BABEL_OP1_203_20738_20130501_144021_outLine +BABEL_OP1_203_21794_20130323_191728_inLine +BABEL_OP1_203_21794_20130323_191728_outLine +BABEL_OP1_203_23395_20130423_140708_inLine +BABEL_OP1_203_23395_20130423_140708_outLine +BABEL_OP1_203_28538_20130323_211503_inLine +BABEL_OP1_203_28538_20130323_211503_outLine +BABEL_OP1_203_28538_20130323_212946_inLine +BABEL_OP1_203_28538_20130323_212946_outLine +BABEL_OP1_203_30250_20130307_153941_inLine +BABEL_OP1_203_30250_20130307_153941_outLine +BABEL_OP1_203_32872_20130715_135603_inLine +BABEL_OP1_203_32872_20130715_135603_outLine +BABEL_OP1_203_41109_20130410_205358_inLine +BABEL_OP1_203_41109_20130410_205358_outLine +BABEL_OP1_203_41109_20130410_210805_inLine +BABEL_OP1_203_41109_20130410_210805_outLine +BABEL_OP1_203_44255_20130410_165447_inLine +BABEL_OP1_203_44255_20130410_165447_outLine +BABEL_OP1_203_45140_20130725_155519_inLine +BABEL_OP1_203_45140_20130725_155519_outLine +BABEL_OP1_203_45777_20130324_154017_inLine +BABEL_OP1_203_45777_20130324_154017_outLine +BABEL_OP1_203_47959_20130323_214413_inLine +BABEL_OP1_203_47959_20130323_214413_outLine +BABEL_OP1_203_48399_20130309_162921_inLine +BABEL_OP1_203_48399_20130309_162921_outLine +BABEL_OP1_203_48399_20130309_164247_inLine +BABEL_OP1_203_48399_20130309_164247_outLine +BABEL_OP1_203_56019_20130512_160906_inLine +BABEL_OP1_203_56019_20130512_160906_outLine +BABEL_OP1_203_56523_20130319_184906_inLine +BABEL_OP1_203_56523_20130319_184906_outLine +BABEL_OP1_203_57650_20130411_204456_inLine +BABEL_OP1_203_57650_20130411_204456_outLine +BABEL_OP1_203_57922_20130329_164830_inLine +BABEL_OP1_203_57922_20130329_164830_outLine +BABEL_OP1_203_59898_20130309_161351_inLine +BABEL_OP1_203_59898_20130309_161351_outLine +BABEL_OP1_203_67085_20130803_171200_inLine +BABEL_OP1_203_67085_20130803_171200_outLine +BABEL_OP1_203_71282_20130425_151939_inLine +BABEL_OP1_203_71282_20130425_151939_outLine +BABEL_OP1_203_73119_20130318_205141_inLine +BABEL_OP1_203_73119_20130318_205141_outLine +BABEL_OP1_203_73119_20130318_210234_inLine +BABEL_OP1_203_73119_20130318_210234_outLine +BABEL_OP1_203_73837_20130320_223755_inLine +BABEL_OP1_203_73837_20130320_223755_outLine +BABEL_OP1_203_74111_20130720_165204_inLine +BABEL_OP1_203_74111_20130720_165204_outLine +BABEL_OP1_203_75359_20130719_144824_inLine +BABEL_OP1_203_75359_20130719_144824_outLine +BABEL_OP1_203_89372_20130306_162204_inLine +BABEL_OP1_203_89372_20130306_162204_outLine +BABEL_OP1_203_93861_20130327_171912_inLine +BABEL_OP1_203_93861_20130327_171912_outLine +BABEL_OP1_203_94002_20130324_154206_inLine +BABEL_OP1_203_94002_20130324_154206_outLine +BABEL_OP1_203_97097_20130721_180647_inLine +BABEL_OP1_203_97097_20130721_180647_outLine +BABEL_OP1_203_97570_20130501_151019_inLine +BABEL_OP1_203_97570_20130501_151019_outLine +BABEL_OP1_203_98580_20130324_195754_inLine +BABEL_OP1_203_98580_20130324_195754_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.FullLP.list b/egs/babel/s5d/conf/lists/203-lao/train.FullLP.list new file mode 100644 index 00000000000..b7fb97d771f --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.FullLP.list @@ -0,0 +1,781 @@ +BABEL_OP1_203_10036_20130318_191401_inLine +BABEL_OP1_203_10036_20130318_191401_outLine +BABEL_OP1_203_10411_20130511_174439_inLine +BABEL_OP1_203_10411_20130511_174439_outLine +BABEL_OP1_203_10482_20130403_160013_inLine +BABEL_OP1_203_10482_20130403_160013_outLine +BABEL_OP1_203_10524_20130425_183925_inLine +BABEL_OP1_203_10524_20130425_183925_outLine +BABEL_OP1_203_10524_20130425_185048_inLine +BABEL_OP1_203_10524_20130425_185048_outLine +BABEL_OP1_203_10901_20130321_180232_inLine +BABEL_OP1_203_10901_20130321_180232_outLine +BABEL_OP1_203_10938_20130319_190809_inLine +BABEL_OP1_203_10938_20130319_190809_outLine +BABEL_OP1_203_10966_20130319_135742_inLine +BABEL_OP1_203_10966_20130319_135742_outLine +BABEL_OP1_203_10974_20130425_162609_inLine +BABEL_OP1_203_10974_20130425_162609_outLine +BABEL_OP1_203_11352_20130426_170450_inLine +BABEL_OP1_203_11352_20130426_170450_outLine +BABEL_OP1_203_11486_20130428_131348_inLine +BABEL_OP1_203_11486_20130428_131348_outLine +BABEL_OP1_203_11663_20130402_202025_inLine +BABEL_OP1_203_11663_20130402_202025_outLine +BABEL_OP1_203_11673_20130306_201125_inLine +BABEL_OP1_203_11673_20130306_201125_outLine +BABEL_OP1_203_11797_20130309_195420_inLine +BABEL_OP1_203_11797_20130309_195420_outLine +BABEL_OP1_203_11859_20130511_201411_inLine +BABEL_OP1_203_11859_20130511_201411_outLine +BABEL_OP1_203_12036_20130312_182225_inLine +BABEL_OP1_203_12036_20130312_182225_outLine +BABEL_OP1_203_12220_20130321_160841_inLine +BABEL_OP1_203_12220_20130321_160841_outLine +BABEL_OP1_203_12606_20130726_174724_inLine +BABEL_OP1_203_12606_20130726_174724_outLine +BABEL_OP1_203_12609_20130727_133133_outLine +BABEL_OP1_203_12767_20130313_214914_inLine +BABEL_OP1_203_12767_20130313_214914_outLine +BABEL_OP1_203_12851_20130304_181335_inLine +BABEL_OP1_203_12851_20130304_181335_outLine +BABEL_OP1_203_12851_20130304_182835_inLine +BABEL_OP1_203_12851_20130304_182835_outLine +BABEL_OP1_203_12851_20130304_185138_inLine +BABEL_OP1_203_12851_20130304_185138_outLine +BABEL_OP1_203_13126_20130421_175306_inLine +BABEL_OP1_203_13126_20130421_175306_outLine +BABEL_OP1_203_13126_20130421_180154_inLine +BABEL_OP1_203_13126_20130421_180154_outLine +BABEL_OP1_203_13324_20130313_185155_inLine +BABEL_OP1_203_13324_20130313_185155_outLine +BABEL_OP1_203_13483_20130409_231107_inLine +BABEL_OP1_203_13483_20130409_231107_outLine +BABEL_OP1_203_13490_20130322_143131_inLine +BABEL_OP1_203_13490_20130322_143131_outLine +BABEL_OP1_203_13664_20130304_155051_inLine +BABEL_OP1_203_13664_20130304_155051_outLine +BABEL_OP1_203_13709_20130410_222037_inLine +BABEL_OP1_203_13709_20130410_222037_outLine +BABEL_OP1_203_13744_20130307_215445_inLine +BABEL_OP1_203_13744_20130307_215445_outLine +BABEL_OP1_203_13792_20130310_142445_inLine +BABEL_OP1_203_13792_20130310_142445_outLine +BABEL_OP1_203_14137_20130314_181335_inLine +BABEL_OP1_203_14137_20130314_181335_outLine +BABEL_OP1_203_14141_20130410_212719_inLine +BABEL_OP1_203_14141_20130410_212719_outLine +BABEL_OP1_203_14179_20130402_211621_inLine +BABEL_OP1_203_14179_20130402_211621_outLine +BABEL_OP1_203_14229_20130324_162827_inLine +BABEL_OP1_203_14229_20130324_162827_outLine +BABEL_OP1_203_14237_20130313_222650_inLine +BABEL_OP1_203_14237_20130313_222650_outLine +BABEL_OP1_203_14560_20130425_140155_inLine +BABEL_OP1_203_14560_20130425_140155_outLine +BABEL_OP1_203_14719_20130406_191558_inLine +BABEL_OP1_203_14719_20130406_191558_outLine +BABEL_OP1_203_14725_20130309_185639_inLine +BABEL_OP1_203_14725_20130309_185639_outLine +BABEL_OP1_203_14729_20130411_214726_inLine +BABEL_OP1_203_14729_20130411_214726_outLine +BABEL_OP1_203_14814_20130314_133131_inLine +BABEL_OP1_203_14814_20130314_133131_outLine +BABEL_OP1_203_14899_20130311_184638_inLine +BABEL_OP1_203_14899_20130311_184638_outLine +BABEL_OP1_203_14929_20130324_184056_inLine +BABEL_OP1_203_14929_20130324_184056_outLine +BABEL_OP1_203_15024_20130322_152846_inLine +BABEL_OP1_203_15024_20130322_152846_outLine +BABEL_OP1_203_15163_20130319_154026_inLine +BABEL_OP1_203_15163_20130319_154026_outLine +BABEL_OP1_203_15227_20130513_222256_inLine +BABEL_OP1_203_15227_20130513_222256_outLine +BABEL_OP1_203_15322_20130511_152438_inLine +BABEL_OP1_203_15322_20130511_152438_outLine +BABEL_OP1_203_15324_20130512_224242_inLine +BABEL_OP1_203_15324_20130512_224242_outLine +BABEL_OP1_203_15324_20130512_225202_inLine +BABEL_OP1_203_15324_20130512_225202_outLine +BABEL_OP1_203_15535_20130329_143236_inLine +BABEL_OP1_203_15535_20130329_143236_outLine +BABEL_OP1_203_15638_20130409_150143_inLine +BABEL_OP1_203_15638_20130409_150143_outLine +BABEL_OP1_203_15730_20130307_201711_inLine +BABEL_OP1_203_15730_20130307_201711_outLine +BABEL_OP1_203_15749_20130407_175145_inLine +BABEL_OP1_203_15749_20130407_175145_outLine +BABEL_OP1_203_15902_20130309_193940_inLine +BABEL_OP1_203_15902_20130309_193940_outLine +BABEL_OP1_203_16149_20130309_171014_inLine +BABEL_OP1_203_16149_20130309_171014_outLine +BABEL_OP1_203_16924_20130720_175321_inLine +BABEL_OP1_203_16924_20130720_175321_outLine +BABEL_OP1_203_17032_20130402_175428_inLine +BABEL_OP1_203_17032_20130402_175428_outLine +BABEL_OP1_203_17097_20130430_173440_inLine +BABEL_OP1_203_17097_20130430_173440_outLine +BABEL_OP1_203_17115_20130425_173844_inLine +BABEL_OP1_203_17115_20130425_173844_outLine +BABEL_OP1_203_17115_20130425_175816_inLine +BABEL_OP1_203_17115_20130425_175816_outLine +BABEL_OP1_203_17472_20130408_215034_inLine +BABEL_OP1_203_17472_20130408_215034_outLine +BABEL_OP1_203_17567_20130425_145936_inLine +BABEL_OP1_203_17567_20130425_145936_outLine +BABEL_OP1_203_17751_20130512_155328_inLine +BABEL_OP1_203_17751_20130512_155328_outLine +BABEL_OP1_203_17914_20130503_215602_inLine +BABEL_OP1_203_17914_20130503_215602_outLine +BABEL_OP1_203_17923_20130314_203130_inLine +BABEL_OP1_203_17923_20130314_203130_outLine +BABEL_OP1_203_18118_20130730_191442_inLine +BABEL_OP1_203_18118_20130730_191442_outLine +BABEL_OP1_203_18380_20130327_214619_inLine +BABEL_OP1_203_18380_20130327_214619_outLine +BABEL_OP1_203_18566_20130503_153904_inLine +BABEL_OP1_203_18566_20130503_153904_outLine +BABEL_OP1_203_18939_20130311_144740_inLine +BABEL_OP1_203_18939_20130311_144740_outLine +BABEL_OP1_203_19101_20130423_142324_inLine +BABEL_OP1_203_19101_20130423_142324_outLine +BABEL_OP1_203_19134_20130328_220635_inLine +BABEL_OP1_203_19134_20130328_220635_outLine +BABEL_OP1_203_19589_20130727_143145_inLine +BABEL_OP1_203_19589_20130727_143145_outLine +BABEL_OP1_203_19703_20130318_160958_inLine +BABEL_OP1_203_19703_20130318_160958_outLine +BABEL_OP1_203_19703_20130318_162314_inLine +BABEL_OP1_203_19703_20130318_162314_outLine +BABEL_OP1_203_19773_20130407_183531_inLine +BABEL_OP1_203_19773_20130407_183531_outLine +BABEL_OP1_203_19782_20130404_170141_inLine +BABEL_OP1_203_19782_20130404_170141_outLine +BABEL_OP1_203_20133_20130304_160351_inLine +BABEL_OP1_203_20133_20130304_160351_outLine +BABEL_OP1_203_20330_20130410_161539_inLine +BABEL_OP1_203_20330_20130410_161539_outLine +BABEL_OP1_203_20682_20130406_194906_inLine +BABEL_OP1_203_20682_20130406_194906_outLine +BABEL_OP1_203_20768_20130407_190152_inLine +BABEL_OP1_203_20768_20130407_190152_outLine +BABEL_OP1_203_20985_20130330_210730_inLine +BABEL_OP1_203_20985_20130330_210730_outLine +BABEL_OP1_203_21004_20130410_181101_inLine +BABEL_OP1_203_21004_20130410_181101_outLine +BABEL_OP1_203_21004_20130410_182740_inLine +BABEL_OP1_203_21004_20130410_182740_outLine +BABEL_OP1_203_21109_20130406_161601_inLine +BABEL_OP1_203_21109_20130406_161601_outLine +BABEL_OP1_203_21206_20130312_164516_inLine +BABEL_OP1_203_21206_20130312_164516_outLine +BABEL_OP1_203_21315_20130501_151005_inLine +BABEL_OP1_203_21315_20130501_151005_outLine +BABEL_OP1_203_21327_20130405_203336_inLine +BABEL_OP1_203_21327_20130405_203336_outLine +BABEL_OP1_203_21435_20130423_181043_inLine +BABEL_OP1_203_21435_20130423_181043_outLine +BABEL_OP1_203_22280_20130329_161951_inLine +BABEL_OP1_203_22280_20130329_161951_outLine +BABEL_OP1_203_22321_20130309_191222_inLine +BABEL_OP1_203_22321_20130309_191222_outLine +BABEL_OP1_203_22446_20130309_134600_inLine +BABEL_OP1_203_22446_20130309_134600_outLine +BABEL_OP1_203_22494_20130402_171234_inLine +BABEL_OP1_203_22494_20130402_171234_outLine +BABEL_OP1_203_22612_20130406_220338_inLine +BABEL_OP1_203_22612_20130406_220338_outLine +BABEL_OP1_203_22624_20130403_190935_inLine +BABEL_OP1_203_22624_20130403_190935_outLine +BABEL_OP1_203_22918_20130410_190723_inLine +BABEL_OP1_203_22918_20130410_190723_outLine +BABEL_OP1_203_23006_20130319_211412_inLine +BABEL_OP1_203_23006_20130319_211412_outLine +BABEL_OP1_203_23046_20130322_165811_inLine +BABEL_OP1_203_23046_20130322_165811_outLine +BABEL_OP1_203_23092_20130406_014425_inLine +BABEL_OP1_203_23092_20130406_014425_outLine +BABEL_OP1_203_23092_20130406_015338_inLine +BABEL_OP1_203_23092_20130406_015338_outLine +BABEL_OP1_203_23153_20130320_194433_inLine +BABEL_OP1_203_23153_20130320_194433_outLine +BABEL_OP1_203_23239_20130331_171214_inLine +BABEL_OP1_203_23239_20130331_171214_outLine +BABEL_OP1_203_23505_20130309_204825_inLine +BABEL_OP1_203_23505_20130309_204825_outLine +BABEL_OP1_203_23980_20130321_193946_inLine +BABEL_OP1_203_23980_20130321_193946_outLine +BABEL_OP1_203_24017_20130424_174037_inLine +BABEL_OP1_203_24017_20130424_174037_outLine +BABEL_OP1_203_24253_20130423_175626_inLine +BABEL_OP1_203_24253_20130423_175626_outLine +BABEL_OP1_203_24270_20130329_153331_inLine +BABEL_OP1_203_24270_20130329_153331_outLine +BABEL_OP1_203_24290_20130423_133315_inLine +BABEL_OP1_203_24290_20130423_133315_outLine +BABEL_OP1_203_24323_20130320_160949_inLine +BABEL_OP1_203_24323_20130320_160949_outLine +BABEL_OP1_203_24470_20130329_205656_inLine +BABEL_OP1_203_24470_20130329_205656_outLine +BABEL_OP1_203_24501_20130421_141711_inLine +BABEL_OP1_203_24501_20130421_141711_outLine +BABEL_OP1_203_24569_20130405_200644_inLine +BABEL_OP1_203_24569_20130405_200644_outLine +BABEL_OP1_203_24586_20130506_203931_inLine +BABEL_OP1_203_24586_20130506_203931_outLine +BABEL_OP1_203_24589_20130323_190409_inLine +BABEL_OP1_203_24589_20130323_190409_outLine +BABEL_OP1_203_24589_20130323_192722_inLine +BABEL_OP1_203_24589_20130323_192722_outLine +BABEL_OP1_203_24590_20130321_221146_inLine +BABEL_OP1_203_24590_20130321_221146_outLine +BABEL_OP1_203_24679_20130307_145644_inLine +BABEL_OP1_203_24679_20130307_145644_outLine +BABEL_OP1_203_24779_20130426_183526_inLine +BABEL_OP1_203_24779_20130426_183526_outLine +BABEL_OP1_203_24982_20130327_153429_inLine +BABEL_OP1_203_24982_20130327_153429_outLine +BABEL_OP1_203_25015_20130728_150746_inLine +BABEL_OP1_203_25015_20130728_150746_outLine +BABEL_OP1_203_25085_20130508_145922_inLine +BABEL_OP1_203_25085_20130508_145922_outLine +BABEL_OP1_203_25220_20130502_183943_inLine +BABEL_OP1_203_25220_20130502_183943_outLine +BABEL_OP1_203_25412_20130329_201051_inLine +BABEL_OP1_203_25412_20130329_201051_outLine +BABEL_OP1_203_25698_20130509_182226_inLine +BABEL_OP1_203_25698_20130509_182226_outLine +BABEL_OP1_203_25719_20130426_202355_inLine +BABEL_OP1_203_25719_20130426_202355_outLine +BABEL_OP1_203_25767_20130311_183243_inLine +BABEL_OP1_203_25767_20130311_183243_outLine +BABEL_OP1_203_25961_20130311_171235_inLine +BABEL_OP1_203_25961_20130311_171235_outLine +BABEL_OP1_203_26388_20130318_200305_inLine +BABEL_OP1_203_26388_20130318_200305_outLine +BABEL_OP1_203_26507_20130430_234212_inLine +BABEL_OP1_203_26507_20130430_234212_outLine +BABEL_OP1_203_26574_20130411_160556_inLine +BABEL_OP1_203_26574_20130411_160556_outLine +BABEL_OP1_203_26602_20130801_171131_inLine +BABEL_OP1_203_26602_20130801_171131_outLine +BABEL_OP1_203_26836_20130315_160512_inLine +BABEL_OP1_203_26836_20130315_160512_outLine +BABEL_OP1_203_27125_20130308_003724_inLine +BABEL_OP1_203_27125_20130308_003724_outLine +BABEL_OP1_203_27218_20130312_194932_inLine +BABEL_OP1_203_27218_20130312_194932_outLine +BABEL_OP1_203_27478_20130501_195141_inLine +BABEL_OP1_203_27478_20130501_195141_outLine +BABEL_OP1_203_27478_20130501_200641_inLine +BABEL_OP1_203_27478_20130501_200641_outLine +BABEL_OP1_203_27590_20130405_200930_inLine +BABEL_OP1_203_27590_20130405_200930_outLine +BABEL_OP1_203_27841_20130403_211143_inLine +BABEL_OP1_203_27841_20130403_211143_outLine +BABEL_OP1_203_28190_20130730_195836_inLine +BABEL_OP1_203_28190_20130730_195836_outLine +BABEL_OP1_203_28280_20130501_220643_inLine +BABEL_OP1_203_28280_20130501_220643_outLine +BABEL_OP1_203_28419_20130319_165427_inLine +BABEL_OP1_203_28419_20130319_165427_outLine +BABEL_OP1_203_28522_20130328_170837_inLine +BABEL_OP1_203_28522_20130328_170837_outLine +BABEL_OP1_203_28775_20130313_213707_inLine +BABEL_OP1_203_28775_20130313_213707_outLine +BABEL_OP1_203_28775_20130313_215352_inLine +BABEL_OP1_203_28775_20130313_215352_outLine +BABEL_OP1_203_28945_20130315_171902_inLine +BABEL_OP1_203_28945_20130315_171902_outLine +BABEL_OP1_203_29023_20130313_194148_inLine +BABEL_OP1_203_29023_20130313_194148_outLine +BABEL_OP1_203_29023_20130313_195106_inLine +BABEL_OP1_203_29023_20130313_195106_outLine +BABEL_OP1_203_29039_20130402_153541_inLine +BABEL_OP1_203_29039_20130402_153541_outLine +BABEL_OP1_203_29168_20130306_213504_inLine +BABEL_OP1_203_29168_20130306_213504_outLine +BABEL_OP1_203_29323_20130403_215525_inLine +BABEL_OP1_203_29323_20130403_215525_outLine +BABEL_OP1_203_29416_20130421_133101_inLine +BABEL_OP1_203_29416_20130421_133101_outLine +BABEL_OP1_203_29439_20130422_150608_inLine +BABEL_OP1_203_29439_20130422_150608_outLine +BABEL_OP1_203_30013_20130331_170538_inLine +BABEL_OP1_203_30013_20130331_170538_outLine +BABEL_OP1_203_30395_20130318_180120_inLine +BABEL_OP1_203_30395_20130318_180120_outLine +BABEL_OP1_203_30645_20130309_151850_inLine +BABEL_OP1_203_30645_20130309_151850_outLine +BABEL_OP1_203_31184_20130322_141512_inLine +BABEL_OP1_203_31184_20130322_141512_outLine +BABEL_OP1_203_31184_20130322_142743_inLine +BABEL_OP1_203_31184_20130322_142743_outLine +BABEL_OP1_203_31490_20130321_210518_inLine +BABEL_OP1_203_31490_20130321_210518_outLine +BABEL_OP1_203_31992_20130313_143826_inLine +BABEL_OP1_203_31992_20130313_143826_outLine +BABEL_OP1_203_32097_20130304_195431_inLine +BABEL_OP1_203_32097_20130304_195431_outLine +BABEL_OP1_203_32122_20130320_174321_inLine +BABEL_OP1_203_32122_20130320_174321_outLine +BABEL_OP1_203_32122_20130320_175419_inLine +BABEL_OP1_203_32122_20130320_175419_outLine +BABEL_OP1_203_32244_20130728_182847_inLine +BABEL_OP1_203_32244_20130728_182847_outLine +BABEL_OP1_203_32914_20130411_174738_inLine +BABEL_OP1_203_32914_20130411_174738_outLine +BABEL_OP1_203_32998_20130329_155417_inLine +BABEL_OP1_203_32998_20130329_155417_outLine +BABEL_OP1_203_33175_20130307_204134_inLine +BABEL_OP1_203_33175_20130307_204134_outLine +BABEL_OP1_203_33476_20130320_140412_inLine +BABEL_OP1_203_33476_20130320_140412_outLine +BABEL_OP1_203_33672_20130312_165130_inLine +BABEL_OP1_203_33672_20130312_165130_outLine +BABEL_OP1_203_33704_20130405_220001_inLine +BABEL_OP1_203_33704_20130405_220001_outLine +BABEL_OP1_203_33840_20130803_192343_inLine +BABEL_OP1_203_33840_20130803_192343_outLine +BABEL_OP1_203_34145_20130331_145240_inLine +BABEL_OP1_203_34145_20130331_145240_outLine +BABEL_OP1_203_35139_20130313_143646_inLine +BABEL_OP1_203_35139_20130313_143646_outLine +BABEL_OP1_203_36505_20130731_191406_inLine +BABEL_OP1_203_36505_20130731_191406_outLine +BABEL_OP1_203_36594_20130421_182303_inLine +BABEL_OP1_203_36594_20130421_182303_outLine +BABEL_OP1_203_37598_20130330_000102_inLine +BABEL_OP1_203_37598_20130330_000102_outLine +BABEL_OP1_203_38979_20130409_173446_inLine +BABEL_OP1_203_38979_20130409_173446_outLine +BABEL_OP1_203_38979_20130409_174405_inLine +BABEL_OP1_203_38979_20130409_174405_outLine +BABEL_OP1_203_39006_20130506_192659_inLine +BABEL_OP1_203_39006_20130506_192659_outLine +BABEL_OP1_203_39555_20130720_183746_inLine +BABEL_OP1_203_39555_20130720_183746_outLine +BABEL_OP1_203_39848_20130320_133756_inLine +BABEL_OP1_203_39848_20130320_133756_outLine +BABEL_OP1_203_40557_20130404_005522_inLine +BABEL_OP1_203_40557_20130404_005522_outLine +BABEL_OP1_203_40565_20130331_171210_inLine +BABEL_OP1_203_40565_20130331_171210_outLine +BABEL_OP1_203_40713_20130321_155930_inLine +BABEL_OP1_203_40713_20130321_155930_outLine +BABEL_OP1_203_41073_20130721_172038_inLine +BABEL_OP1_203_41073_20130721_172038_outLine +BABEL_OP1_203_41097_20130427_224950_inLine +BABEL_OP1_203_41097_20130427_224950_outLine +BABEL_OP1_203_41100_20130313_161755_inLine +BABEL_OP1_203_41100_20130313_161755_outLine +BABEL_OP1_203_41174_20130318_203041_inLine +BABEL_OP1_203_41174_20130318_203041_outLine +BABEL_OP1_203_41334_20130501_232034_inLine +BABEL_OP1_203_41334_20130501_232034_outLine +BABEL_OP1_203_41442_20130404_174409_inLine +BABEL_OP1_203_41442_20130404_174409_outLine +BABEL_OP1_203_41469_20130313_185923_inLine +BABEL_OP1_203_41469_20130313_185923_outLine +BABEL_OP1_203_41609_20130309_175203_inLine +BABEL_OP1_203_41609_20130309_175203_outLine +BABEL_OP1_203_41680_20130304_134640_inLine +BABEL_OP1_203_41680_20130304_134640_outLine +BABEL_OP1_203_42029_20130403_184623_inLine +BABEL_OP1_203_42029_20130403_184623_outLine +BABEL_OP1_203_42126_20130805_213859_inLine +BABEL_OP1_203_42126_20130805_213859_outLine +BABEL_OP1_203_42243_20130313_170336_inLine +BABEL_OP1_203_42243_20130313_170336_outLine +BABEL_OP1_203_42299_20130508_203220_inLine +BABEL_OP1_203_42299_20130508_203220_outLine +BABEL_OP1_203_42299_20130508_204824_inLine +BABEL_OP1_203_42299_20130508_204824_outLine +BABEL_OP1_203_42309_20130428_191239_inLine +BABEL_OP1_203_42309_20130428_191239_outLine +BABEL_OP1_203_42434_20130323_160637_inLine +BABEL_OP1_203_42434_20130323_160637_outLine +BABEL_OP1_203_42834_20130404_194840_inLine +BABEL_OP1_203_42834_20130404_194840_outLine +BABEL_OP1_203_42848_20130513_201112_inLine +BABEL_OP1_203_42848_20130513_201112_outLine +BABEL_OP1_203_42883_20130729_171646_inLine +BABEL_OP1_203_42883_20130729_171646_outLine +BABEL_OP1_203_43368_20130327_215424_inLine +BABEL_OP1_203_43368_20130327_215424_outLine +BABEL_OP1_203_43388_20130327_192024_inLine +BABEL_OP1_203_43388_20130327_192024_outLine +BABEL_OP1_203_43588_20130714_163553_inLine +BABEL_OP1_203_43588_20130714_163553_outLine +BABEL_OP1_203_43784_20130314_171933_inLine +BABEL_OP1_203_43784_20130314_171933_outLine +BABEL_OP1_203_43788_20130504_173234_inLine +BABEL_OP1_203_43788_20130504_173234_outLine +BABEL_OP1_203_43920_20130405_194800_inLine +BABEL_OP1_203_43920_20130405_194800_outLine +BABEL_OP1_203_44477_20130331_190402_inLine +BABEL_OP1_203_44477_20130331_190402_outLine +BABEL_OP1_203_44478_20130730_170938_inLine +BABEL_OP1_203_44478_20130730_170938_outLine +BABEL_OP1_203_44619_20130313_175437_inLine +BABEL_OP1_203_44619_20130313_175437_outLine +BABEL_OP1_203_44709_20130331_183159_inLine +BABEL_OP1_203_44709_20130331_183159_outLine +BABEL_OP1_203_44961_20130311_173427_inLine +BABEL_OP1_203_44961_20130311_173427_outLine +BABEL_OP1_203_45560_20130309_173444_inLine +BABEL_OP1_203_45560_20130309_173444_outLine +BABEL_OP1_203_45642_20130313_202110_inLine +BABEL_OP1_203_45642_20130313_202110_outLine +BABEL_OP1_203_45851_20130801_014413_inLine +BABEL_OP1_203_45851_20130801_014413_outLine +BABEL_OP1_203_46310_20130309_211431_inLine +BABEL_OP1_203_46310_20130309_211431_outLine +BABEL_OP1_203_46550_20130313_153012_inLine +BABEL_OP1_203_46550_20130313_153012_outLine +BABEL_OP1_203_46625_20130304_201959_inLine +BABEL_OP1_203_46625_20130304_201959_outLine +BABEL_OP1_203_46681_20130313_203139_inLine +BABEL_OP1_203_46681_20130313_203139_outLine +BABEL_OP1_203_46688_20130314_212550_inLine +BABEL_OP1_203_46688_20130314_212550_outLine +BABEL_OP1_203_46763_20130426_160841_inLine +BABEL_OP1_203_46763_20130426_160841_outLine +BABEL_OP1_203_47186_20130405_120609_inLine +BABEL_OP1_203_47186_20130405_120609_outLine +BABEL_OP1_203_47270_20130410_160110_inLine +BABEL_OP1_203_47270_20130410_160110_outLine +BABEL_OP1_203_47487_20130321_145055_inLine +BABEL_OP1_203_47487_20130321_145055_outLine +BABEL_OP1_203_47823_20130406_151016_inLine +BABEL_OP1_203_47823_20130406_151016_outLine +BABEL_OP1_203_47866_20130723_152640_inLine +BABEL_OP1_203_47866_20130723_152640_outLine +BABEL_OP1_203_48422_20130425_175947_inLine +BABEL_OP1_203_48422_20130425_175947_outLine +BABEL_OP1_203_48610_20130309_222037_inLine +BABEL_OP1_203_48610_20130309_222037_outLine +BABEL_OP1_203_49001_20130315_160533_inLine +BABEL_OP1_203_49001_20130315_160533_outLine +BABEL_OP1_203_49216_20130307_211955_inLine +BABEL_OP1_203_49216_20130307_211955_outLine +BABEL_OP1_203_49287_20130331_155341_inLine +BABEL_OP1_203_49287_20130331_155341_outLine +BABEL_OP1_203_49437_20130405_194333_inLine +BABEL_OP1_203_49437_20130405_194333_outLine +BABEL_OP1_203_49437_20130405_195645_inLine +BABEL_OP1_203_49437_20130405_195645_outLine +BABEL_OP1_203_49630_20130408_182919_inLine +BABEL_OP1_203_49630_20130408_182919_outLine +BABEL_OP1_203_49637_20130313_134853_inLine +BABEL_OP1_203_49637_20130313_134853_outLine +BABEL_OP1_203_49768_20130320_164815_inLine +BABEL_OP1_203_49768_20130320_164815_outLine +BABEL_OP1_203_49902_20130323_175920_inLine +BABEL_OP1_203_49902_20130323_175920_outLine +BABEL_OP1_203_50090_20130726_145642_inLine +BABEL_OP1_203_50090_20130726_145642_outLine +BABEL_OP1_203_50175_20130311_181803_inLine +BABEL_OP1_203_50175_20130311_181803_outLine +BABEL_OP1_203_50726_20130307_135236_inLine +BABEL_OP1_203_50726_20130307_135236_outLine +BABEL_OP1_203_51414_20130729_152916_inLine +BABEL_OP1_203_51414_20130729_152916_outLine +BABEL_OP1_203_51530_20130803_174620_inLine +BABEL_OP1_203_51530_20130803_174620_outLine +BABEL_OP1_203_51611_20130312_195333_inLine +BABEL_OP1_203_51611_20130312_195333_outLine +BABEL_OP1_203_51701_20130508_232537_inLine +BABEL_OP1_203_51701_20130508_232537_outLine +BABEL_OP1_203_51819_20130328_150620_inLine +BABEL_OP1_203_51819_20130328_150620_outLine +BABEL_OP1_203_51955_20130314_175859_inLine +BABEL_OP1_203_51955_20130314_175859_outLine +BABEL_OP1_203_51955_20130314_180731_inLine +BABEL_OP1_203_51955_20130314_180731_outLine +BABEL_OP1_203_52246_20130319_221049_inLine +BABEL_OP1_203_52246_20130319_221049_outLine +BABEL_OP1_203_52272_20130313_140038_inLine +BABEL_OP1_203_52272_20130313_140038_outLine +BABEL_OP1_203_52404_20130409_005414_inLine +BABEL_OP1_203_52404_20130409_005414_outLine +BABEL_OP1_203_52422_20130427_140502_inLine +BABEL_OP1_203_52422_20130427_140502_outLine +BABEL_OP1_203_52447_20130513_224209_inLine +BABEL_OP1_203_52447_20130513_224209_outLine +BABEL_OP1_203_52490_20130309_141915_inLine +BABEL_OP1_203_52490_20130309_141915_outLine +BABEL_OP1_203_52717_20130311_173849_inLine +BABEL_OP1_203_52717_20130311_173849_outLine +BABEL_OP1_203_52854_20130221_192229_inLine +BABEL_OP1_203_52854_20130221_192229_outLine +BABEL_OP1_203_53063_20130407_210935_inLine +BABEL_OP1_203_53063_20130407_210935_outLine +BABEL_OP1_203_53665_20130727_150857_inLine +BABEL_OP1_203_53665_20130727_150857_outLine +BABEL_OP1_203_53842_20130322_165451_inLine +BABEL_OP1_203_53842_20130322_165451_outLine +BABEL_OP1_203_54046_20130804_193101_inLine +BABEL_OP1_203_54046_20130804_193101_outLine +BABEL_OP1_203_54074_20130319_150208_inLine +BABEL_OP1_203_54074_20130319_150208_outLine +BABEL_OP1_203_54104_20130309_204103_inLine +BABEL_OP1_203_54104_20130309_204103_outLine +BABEL_OP1_203_54390_20130313_161947_inLine +BABEL_OP1_203_54390_20130313_161947_outLine +BABEL_OP1_203_54477_20130408_133628_inLine +BABEL_OP1_203_54477_20130408_133628_outLine +BABEL_OP1_203_54530_20130424_194302_inLine +BABEL_OP1_203_54530_20130424_194302_outLine +BABEL_OP1_203_54697_20130405_153323_inLine +BABEL_OP1_203_54697_20130405_153323_outLine +BABEL_OP1_203_54744_20130311_153522_inLine +BABEL_OP1_203_54744_20130311_153522_outLine +BABEL_OP1_203_54827_20130803_201026_inLine +BABEL_OP1_203_54827_20130803_201026_outLine +BABEL_OP1_203_54953_20130319_135125_inLine +BABEL_OP1_203_54953_20130319_135125_outLine +BABEL_OP1_203_55259_20130323_181918_inLine +BABEL_OP1_203_55259_20130323_181918_outLine +BABEL_OP1_203_55818_20130309_163433_inLine +BABEL_OP1_203_55818_20130309_163433_outLine +BABEL_OP1_203_55950_20130728_141857_inLine +BABEL_OP1_203_55950_20130728_141857_outLine +BABEL_OP1_203_56076_20130728_212423_inLine +BABEL_OP1_203_56076_20130728_212423_outLine +BABEL_OP1_203_56198_20130314_163346_inLine +BABEL_OP1_203_56198_20130314_163346_outLine +BABEL_OP1_203_56198_20130314_164412_inLine +BABEL_OP1_203_56198_20130314_164412_outLine +BABEL_OP1_203_56213_20130407_184955_inLine +BABEL_OP1_203_56213_20130407_184955_outLine +BABEL_OP1_203_56306_20130408_202539_inLine +BABEL_OP1_203_56306_20130408_202539_outLine +BABEL_OP1_203_56307_20130401_212823_inLine +BABEL_OP1_203_56307_20130401_212823_outLine +BABEL_OP1_203_56465_20130503_211423_inLine +BABEL_OP1_203_56465_20130503_211423_outLine +BABEL_OP1_203_56677_20130407_020513_inLine +BABEL_OP1_203_56677_20130407_020513_outLine +BABEL_OP1_203_56826_20130403_155349_inLine +BABEL_OP1_203_56826_20130403_155349_outLine +BABEL_OP1_203_57093_20130323_155842_inLine +BABEL_OP1_203_57093_20130323_155842_outLine +BABEL_OP1_203_57116_20130306_200913_inLine +BABEL_OP1_203_57116_20130306_200913_outLine +BABEL_OP1_203_57529_20130404_225031_inLine +BABEL_OP1_203_57529_20130404_225031_outLine +BABEL_OP1_203_57678_20130319_173142_inLine +BABEL_OP1_203_57678_20130319_173142_outLine +BABEL_OP1_203_58107_20130331_163124_inLine +BABEL_OP1_203_58107_20130331_163124_outLine +BABEL_OP1_203_58107_20130331_164049_inLine +BABEL_OP1_203_58107_20130331_164049_outLine +BABEL_OP1_203_58145_20130404_174142_inLine +BABEL_OP1_203_58145_20130404_174142_outLine +BABEL_OP1_203_58489_20130406_171644_inLine +BABEL_OP1_203_58489_20130406_171644_outLine +BABEL_OP1_203_58821_20130330_171943_inLine +BABEL_OP1_203_58821_20130330_171943_outLine +BABEL_OP1_203_58850_20130320_210438_outLine +BABEL_OP1_203_58853_20130804_133710_inLine +BABEL_OP1_203_58853_20130804_133710_outLine +BABEL_OP1_203_58915_20130508_170813_inLine +BABEL_OP1_203_58915_20130508_170813_outLine +BABEL_OP1_203_58926_20130314_221922_inLine +BABEL_OP1_203_58926_20130314_221922_outLine +BABEL_OP1_203_59078_20130328_222520_inLine +BABEL_OP1_203_59078_20130328_222520_outLine +BABEL_OP1_203_59307_20130503_211805_inLine +BABEL_OP1_203_59307_20130503_211805_outLine +BABEL_OP1_203_59720_20130323_160840_inLine +BABEL_OP1_203_59720_20130323_160840_outLine +BABEL_OP1_203_59747_20130307_185538_inLine +BABEL_OP1_203_59747_20130307_185538_outLine +BABEL_OP1_203_59864_20130719_183902_inLine +BABEL_OP1_203_59864_20130719_183902_outLine +BABEL_OP1_203_59928_20130314_205249_inLine +BABEL_OP1_203_59928_20130314_205249_outLine +BABEL_OP1_203_60026_20130311_192442_inLine +BABEL_OP1_203_60026_20130311_192442_outLine +BABEL_OP1_203_60352_20130724_151721_inLine +BABEL_OP1_203_60352_20130724_151721_outLine +BABEL_OP1_203_60397_20130814_170113_inLine +BABEL_OP1_203_60397_20130814_170113_outLine +BABEL_OP1_203_60436_20130726_213808_inLine +BABEL_OP1_203_60436_20130726_213808_outLine +BABEL_OP1_203_60830_20130323_152836_inLine +BABEL_OP1_203_60830_20130323_152836_outLine +BABEL_OP1_203_61011_20130307_163948_inLine +BABEL_OP1_203_61011_20130307_163948_outLine +BABEL_OP1_203_61225_20130310_001509_inLine +BABEL_OP1_203_61225_20130310_001509_outLine +BABEL_OP1_203_61225_20130310_002607_inLine +BABEL_OP1_203_61225_20130310_002607_outLine +BABEL_OP1_203_61435_20130421_175121_inLine +BABEL_OP1_203_61435_20130421_175121_outLine +BABEL_OP1_203_61440_20130513_143551_inLine +BABEL_OP1_203_61440_20130513_143551_outLine +BABEL_OP1_203_61888_20130410_154115_inLine +BABEL_OP1_203_61888_20130410_154115_outLine +BABEL_OP1_203_62014_20130503_150317_inLine +BABEL_OP1_203_62014_20130503_150317_outLine +BABEL_OP1_203_62200_20130320_155842_inLine +BABEL_OP1_203_62200_20130320_155842_outLine +BABEL_OP1_203_62360_20130729_185133_inLine +BABEL_OP1_203_62360_20130729_185133_outLine +BABEL_OP1_203_62362_20130513_145108_inLine +BABEL_OP1_203_62362_20130513_145108_outLine +BABEL_OP1_203_62714_20130430_183624_inLine +BABEL_OP1_203_62714_20130430_183624_outLine +BABEL_OP1_203_62800_20130307_204137_inLine +BABEL_OP1_203_62800_20130307_204137_outLine +BABEL_OP1_203_62976_20130512_201748_inLine +BABEL_OP1_203_62976_20130512_201748_outLine +BABEL_OP1_203_63094_20130512_165833_inLine +BABEL_OP1_203_63094_20130512_165833_outLine +BABEL_OP1_203_63730_20130507_163540_inLine +BABEL_OP1_203_63730_20130507_163540_outLine +BABEL_OP1_203_64014_20130411_192910_inLine +BABEL_OP1_203_64014_20130411_192910_outLine +BABEL_OP1_203_64065_20130326_201717_inLine +BABEL_OP1_203_64065_20130326_201717_outLine +BABEL_OP1_203_64065_20130326_202638_inLine +BABEL_OP1_203_64065_20130326_202638_outLine +BABEL_OP1_203_65723_20130313_205922_inLine +BABEL_OP1_203_65723_20130313_205922_outLine +BABEL_OP1_203_65913_20130726_205358_inLine +BABEL_OP1_203_65913_20130726_205358_outLine +BABEL_OP1_203_66001_20130309_233448_inLine +BABEL_OP1_203_66001_20130309_233448_outLine +BABEL_OP1_203_66045_20130323_203735_inLine +BABEL_OP1_203_66045_20130323_203735_outLine +BABEL_OP1_203_66822_20130324_142935_inLine +BABEL_OP1_203_66822_20130324_142935_outLine +BABEL_OP1_203_66916_20130308_142310_inLine +BABEL_OP1_203_66916_20130308_142310_outLine +BABEL_OP1_203_66971_20130725_151439_inLine +BABEL_OP1_203_66971_20130725_151439_outLine +BABEL_OP1_203_67066_20130509_215551_inLine +BABEL_OP1_203_67066_20130509_215551_outLine +BABEL_OP1_203_68289_20130409_222355_inLine +BABEL_OP1_203_68289_20130409_222355_outLine +BABEL_OP1_203_68385_20130221_213027_inLine +BABEL_OP1_203_68385_20130221_213027_outLine +BABEL_OP1_203_69096_20130714_153203_inLine +BABEL_OP1_203_69096_20130714_153203_outLine +BABEL_OP1_203_69474_20130409_153705_inLine +BABEL_OP1_203_69474_20130409_153705_outLine +BABEL_OP1_203_69885_20130729_175242_inLine +BABEL_OP1_203_69885_20130729_175242_outLine +BABEL_OP1_203_69964_20130801_183705_inLine +BABEL_OP1_203_69964_20130801_183705_outLine +BABEL_OP1_203_70221_20130502_153055_inLine +BABEL_OP1_203_70221_20130502_153055_outLine +BABEL_OP1_203_70386_20130315_162835_inLine +BABEL_OP1_203_70386_20130315_162835_outLine +BABEL_OP1_203_70639_20130805_192027_inLine +BABEL_OP1_203_70639_20130805_192027_outLine +BABEL_OP1_203_70716_20130731_182939_inLine +BABEL_OP1_203_70716_20130731_182939_outLine +BABEL_OP1_203_71067_20130503_201919_inLine +BABEL_OP1_203_71067_20130503_201919_outLine +BABEL_OP1_203_71566_20130406_212124_inLine +BABEL_OP1_203_71566_20130406_212124_outLine +BABEL_OP1_203_72324_20130721_195442_inLine +BABEL_OP1_203_72324_20130721_195442_outLine +BABEL_OP1_203_72587_20130331_220349_inLine +BABEL_OP1_203_72587_20130331_220349_outLine +BABEL_OP1_203_73042_20130314_184552_inLine +BABEL_OP1_203_73042_20130314_184552_outLine +BABEL_OP1_203_73301_20130321_151848_inLine +BABEL_OP1_203_73301_20130321_151848_outLine +BABEL_OP1_203_73591_20130222_132516_inLine +BABEL_OP1_203_73591_20130222_132516_outLine +BABEL_OP1_203_74667_20130322_155857_inLine +BABEL_OP1_203_74667_20130322_155857_outLine +BABEL_OP1_203_74886_20130309_200304_inLine +BABEL_OP1_203_74886_20130309_200304_outLine +BABEL_OP1_203_75064_20130322_142556_inLine +BABEL_OP1_203_75064_20130322_142556_outLine +BABEL_OP1_203_75342_20130404_193602_inLine +BABEL_OP1_203_75342_20130404_193602_outLine +BABEL_OP1_203_75869_20130721_161850_inLine +BABEL_OP1_203_75869_20130721_161850_outLine +BABEL_OP1_203_76444_20130406_153810_inLine +BABEL_OP1_203_76444_20130406_153810_outLine +BABEL_OP1_203_76482_20130508_220808_inLine +BABEL_OP1_203_76482_20130508_220808_outLine +BABEL_OP1_203_77242_20130508_191854_inLine +BABEL_OP1_203_77242_20130508_191854_outLine +BABEL_OP1_203_78749_20130426_182140_inLine +BABEL_OP1_203_78749_20130426_182140_outLine +BABEL_OP1_203_79131_20130727_202021_inLine +BABEL_OP1_203_79131_20130727_202021_outLine +BABEL_OP1_203_79660_20130512_173422_inLine +BABEL_OP1_203_79660_20130512_173422_outLine +BABEL_OP1_203_80134_20130814_145021_inLine +BABEL_OP1_203_80134_20130814_145021_outLine +BABEL_OP1_203_81287_20130403_225530_inLine +BABEL_OP1_203_81287_20130403_225530_outLine +BABEL_OP1_203_82224_20130718_134750_inLine +BABEL_OP1_203_82224_20130718_134750_outLine +BABEL_OP1_203_83813_20130812_133548_inLine +BABEL_OP1_203_83813_20130812_133548_outLine +BABEL_OP1_203_84339_20130802_181641_inLine +BABEL_OP1_203_84339_20130802_181641_outLine +BABEL_OP1_203_84469_20130421_132749_inLine +BABEL_OP1_203_84469_20130421_132749_outLine +BABEL_OP1_203_84611_20130312_152852_inLine +BABEL_OP1_203_84611_20130312_152852_outLine +BABEL_OP1_203_85325_20130802_212902_inLine +BABEL_OP1_203_85325_20130802_212902_outLine +BABEL_OP1_203_86597_20130508_182316_inLine +BABEL_OP1_203_86597_20130508_182316_outLine +BABEL_OP1_203_86628_20130512_215243_inLine +BABEL_OP1_203_86628_20130512_215243_outLine +BABEL_OP1_203_86830_20130423_194221_inLine +BABEL_OP1_203_86830_20130423_194221_outLine +BABEL_OP1_203_86878_20130804_174949_inLine +BABEL_OP1_203_86878_20130804_174949_outLine +BABEL_OP1_203_86891_20130427_122020_inLine +BABEL_OP1_203_86891_20130427_122020_outLine +BABEL_OP1_203_87305_20130512_150816_inLine +BABEL_OP1_203_87305_20130512_150816_outLine +BABEL_OP1_203_89358_20130327_183946_inLine +BABEL_OP1_203_89358_20130327_183946_outLine +BABEL_OP1_203_89943_20130319_151705_inLine +BABEL_OP1_203_89943_20130319_151705_outLine +BABEL_OP1_203_90709_20130311_171156_inLine +BABEL_OP1_203_90709_20130311_171156_outLine +BABEL_OP1_203_91760_20130728_190550_inLine +BABEL_OP1_203_91760_20130728_190550_outLine +BABEL_OP1_203_92077_20130725_140650_inLine +BABEL_OP1_203_92077_20130725_140650_outLine +BABEL_OP1_203_93411_20130324_150550_inLine +BABEL_OP1_203_93411_20130324_150550_outLine +BABEL_OP1_203_93490_20130804_201521_inLine +BABEL_OP1_203_93490_20130804_201521_outLine +BABEL_OP1_203_93964_20130327_171307_inLine +BABEL_OP1_203_93964_20130327_171307_outLine +BABEL_OP1_203_94442_20130727_182743_inLine +BABEL_OP1_203_94442_20130727_182743_outLine +BABEL_OP1_203_94449_20130801_010717_inLine +BABEL_OP1_203_94449_20130801_010717_outLine +BABEL_OP1_203_95338_20130727_211019_inLine +BABEL_OP1_203_95338_20130727_211019_outLine +BABEL_OP1_203_96059_20130731_211048_inLine +BABEL_OP1_203_96059_20130731_211048_outLine +BABEL_OP1_203_96376_20130731_143340_outLine +BABEL_OP1_203_96690_20130320_183730_inLine +BABEL_OP1_203_96690_20130320_183730_outLine +BABEL_OP1_203_96690_20130320_185039_inLine +BABEL_OP1_203_96690_20130320_185039_outLine +BABEL_OP1_203_96842_20130726_140248_inLine +BABEL_OP1_203_96842_20130726_140248_outLine +BABEL_OP1_203_97220_20130508_165310_inLine +BABEL_OP1_203_97220_20130508_165310_outLine +BABEL_OP1_203_97836_20130430_195102_inLine +BABEL_OP1_203_97836_20130430_195102_outLine +BABEL_OP1_203_98192_20130511_210223_inLine +BABEL_OP1_203_98192_20130511_210223_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list new file mode 100644 index 00000000000..bc4c7166c32 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list @@ -0,0 +1,127 @@ +BABEL_OP1_203_10974_20130425_162609_inLine +BABEL_OP1_203_10974_20130425_162609_outLine +BABEL_OP1_203_14141_20130410_212719_inLine +BABEL_OP1_203_14141_20130410_212719_outLine +BABEL_OP1_203_14237_20130313_222650_inLine +BABEL_OP1_203_14237_20130313_222650_outLine +BABEL_OP1_203_15163_20130319_154026_inLine +BABEL_OP1_203_15163_20130319_154026_outLine +BABEL_OP1_203_15324_20130512_224242_inLine +BABEL_OP1_203_15324_20130512_224242_outLine +BABEL_OP1_203_15324_20130512_225202_inLine +BABEL_OP1_203_15324_20130512_225202_outLine +BABEL_OP1_203_15638_20130409_150143_inLine +BABEL_OP1_203_15638_20130409_150143_outLine +BABEL_OP1_203_17115_20130425_173844_inLine +BABEL_OP1_203_17115_20130425_173844_outLine +BABEL_OP1_203_17115_20130425_175816_inLine +BABEL_OP1_203_17115_20130425_175816_outLine +BABEL_OP1_203_17751_20130512_155328_inLine +BABEL_OP1_203_17751_20130512_155328_outLine +BABEL_OP1_203_17914_20130503_215602_inLine +BABEL_OP1_203_17914_20130503_215602_outLine +BABEL_OP1_203_17923_20130314_203130_inLine +BABEL_OP1_203_17923_20130314_203130_outLine +BABEL_OP1_203_20682_20130406_194906_inLine +BABEL_OP1_203_20682_20130406_194906_outLine +BABEL_OP1_203_22624_20130403_190935_inLine +BABEL_OP1_203_22624_20130403_190935_outLine +BABEL_OP1_203_24270_20130329_153331_inLine +BABEL_OP1_203_24270_20130329_153331_outLine +BABEL_OP1_203_24589_20130323_190409_inLine +BABEL_OP1_203_24589_20130323_190409_outLine +BABEL_OP1_203_24589_20130323_192722_inLine +BABEL_OP1_203_24589_20130323_192722_outLine +BABEL_OP1_203_25220_20130502_183943_inLine +BABEL_OP1_203_25220_20130502_183943_outLine +BABEL_OP1_203_27478_20130501_195141_inLine +BABEL_OP1_203_27478_20130501_195141_outLine +BABEL_OP1_203_27478_20130501_200641_inLine +BABEL_OP1_203_27478_20130501_200641_outLine +BABEL_OP1_203_28190_20130730_195836_inLine +BABEL_OP1_203_28190_20130730_195836_outLine +BABEL_OP1_203_28945_20130315_171902_inLine +BABEL_OP1_203_28945_20130315_171902_outLine +BABEL_OP1_203_32914_20130411_174738_inLine +BABEL_OP1_203_32914_20130411_174738_outLine +BABEL_OP1_203_33175_20130307_204134_inLine +BABEL_OP1_203_33175_20130307_204134_outLine +BABEL_OP1_203_40713_20130321_155930_inLine +BABEL_OP1_203_40713_20130321_155930_outLine +BABEL_OP1_203_41097_20130427_224950_inLine +BABEL_OP1_203_41097_20130427_224950_outLine +BABEL_OP1_203_41100_20130313_161755_inLine +BABEL_OP1_203_41100_20130313_161755_outLine +BABEL_OP1_203_41680_20130304_134640_inLine +BABEL_OP1_203_41680_20130304_134640_outLine +BABEL_OP1_203_42126_20130805_213859_inLine +BABEL_OP1_203_42126_20130805_213859_outLine +BABEL_OP1_203_42243_20130313_170336_inLine +BABEL_OP1_203_42243_20130313_170336_outLine +BABEL_OP1_203_42834_20130404_194840_inLine +BABEL_OP1_203_42834_20130404_194840_outLine +BABEL_OP1_203_42883_20130729_171646_inLine +BABEL_OP1_203_42883_20130729_171646_outLine +BABEL_OP1_203_44477_20130331_190402_inLine +BABEL_OP1_203_44477_20130331_190402_outLine +BABEL_OP1_203_45642_20130313_202110_inLine +BABEL_OP1_203_45642_20130313_202110_outLine +BABEL_OP1_203_46625_20130304_201959_inLine +BABEL_OP1_203_46625_20130304_201959_outLine +BABEL_OP1_203_46763_20130426_160841_inLine +BABEL_OP1_203_46763_20130426_160841_outLine +BABEL_OP1_203_47270_20130410_160110_inLine +BABEL_OP1_203_47270_20130410_160110_outLine +BABEL_OP1_203_49637_20130313_134853_inLine +BABEL_OP1_203_49637_20130313_134853_outLine +BABEL_OP1_203_49902_20130323_175920_inLine +BABEL_OP1_203_49902_20130323_175920_outLine +BABEL_OP1_203_50726_20130307_135236_inLine +BABEL_OP1_203_50726_20130307_135236_outLine +BABEL_OP1_203_51414_20130729_152916_inLine +BABEL_OP1_203_51414_20130729_152916_outLine +BABEL_OP1_203_52447_20130513_224209_inLine +BABEL_OP1_203_52447_20130513_224209_outLine +BABEL_OP1_203_52854_20130221_192229_inLine +BABEL_OP1_203_52854_20130221_192229_outLine +BABEL_OP1_203_54046_20130804_193101_inLine +BABEL_OP1_203_54046_20130804_193101_outLine +BABEL_OP1_203_54744_20130311_153522_inLine +BABEL_OP1_203_54744_20130311_153522_outLine +BABEL_OP1_203_55818_20130309_163433_inLine +BABEL_OP1_203_55818_20130309_163433_outLine +BABEL_OP1_203_56213_20130407_184955_inLine +BABEL_OP1_203_56213_20130407_184955_outLine +BABEL_OP1_203_56465_20130503_211423_inLine +BABEL_OP1_203_56465_20130503_211423_outLine +BABEL_OP1_203_56677_20130407_020513_inLine +BABEL_OP1_203_56677_20130407_020513_outLine +BABEL_OP1_203_58850_20130320_210438_outLine +BABEL_OP1_203_58853_20130804_133710_inLine +BABEL_OP1_203_58853_20130804_133710_outLine +BABEL_OP1_203_61011_20130307_163948_inLine +BABEL_OP1_203_61011_20130307_163948_outLine +BABEL_OP1_203_62362_20130513_145108_inLine +BABEL_OP1_203_62362_20130513_145108_outLine +BABEL_OP1_203_63094_20130512_165833_inLine +BABEL_OP1_203_63094_20130512_165833_outLine +BABEL_OP1_203_64014_20130411_192910_inLine +BABEL_OP1_203_64014_20130411_192910_outLine +BABEL_OP1_203_65723_20130313_205922_inLine +BABEL_OP1_203_65723_20130313_205922_outLine +BABEL_OP1_203_69885_20130729_175242_inLine +BABEL_OP1_203_69885_20130729_175242_outLine +BABEL_OP1_203_70639_20130805_192027_inLine +BABEL_OP1_203_70639_20130805_192027_outLine +BABEL_OP1_203_73042_20130314_184552_inLine +BABEL_OP1_203_73042_20130314_184552_outLine +BABEL_OP1_203_73301_20130321_151848_inLine +BABEL_OP1_203_73301_20130321_151848_outLine +BABEL_OP1_203_78749_20130426_182140_inLine +BABEL_OP1_203_78749_20130426_182140_outLine +BABEL_OP1_203_83813_20130812_133548_inLine +BABEL_OP1_203_83813_20130812_133548_outLine +BABEL_OP1_203_86830_20130423_194221_inLine +BABEL_OP1_203_86830_20130423_194221_outLine +BABEL_OP1_203_96842_20130726_140248_inLine +BABEL_OP1_203_96842_20130726_140248_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..500c68fda58 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list @@ -0,0 +1,654 @@ +BABEL_OP1_203_10036_20130318_191401_inLine +BABEL_OP1_203_10036_20130318_191401_outLine +BABEL_OP1_203_10411_20130511_174439_inLine +BABEL_OP1_203_10411_20130511_174439_outLine +BABEL_OP1_203_10482_20130403_160013_inLine +BABEL_OP1_203_10482_20130403_160013_outLine +BABEL_OP1_203_10524_20130425_183925_inLine +BABEL_OP1_203_10524_20130425_183925_outLine +BABEL_OP1_203_10524_20130425_185048_inLine +BABEL_OP1_203_10524_20130425_185048_outLine +BABEL_OP1_203_10901_20130321_180232_inLine +BABEL_OP1_203_10901_20130321_180232_outLine +BABEL_OP1_203_10938_20130319_190809_inLine +BABEL_OP1_203_10938_20130319_190809_outLine +BABEL_OP1_203_10966_20130319_135742_inLine +BABEL_OP1_203_10966_20130319_135742_outLine +BABEL_OP1_203_11352_20130426_170450_inLine +BABEL_OP1_203_11352_20130426_170450_outLine +BABEL_OP1_203_11486_20130428_131348_inLine +BABEL_OP1_203_11486_20130428_131348_outLine +BABEL_OP1_203_11663_20130402_202025_inLine +BABEL_OP1_203_11663_20130402_202025_outLine +BABEL_OP1_203_11673_20130306_201125_inLine +BABEL_OP1_203_11673_20130306_201125_outLine +BABEL_OP1_203_11797_20130309_195420_inLine +BABEL_OP1_203_11797_20130309_195420_outLine +BABEL_OP1_203_11859_20130511_201411_inLine +BABEL_OP1_203_11859_20130511_201411_outLine +BABEL_OP1_203_12036_20130312_182225_inLine +BABEL_OP1_203_12036_20130312_182225_outLine +BABEL_OP1_203_12220_20130321_160841_inLine +BABEL_OP1_203_12220_20130321_160841_outLine +BABEL_OP1_203_12606_20130726_174724_inLine +BABEL_OP1_203_12606_20130726_174724_outLine +BABEL_OP1_203_12609_20130727_133133_outLine +BABEL_OP1_203_12767_20130313_214914_inLine +BABEL_OP1_203_12767_20130313_214914_outLine +BABEL_OP1_203_12851_20130304_181335_inLine +BABEL_OP1_203_12851_20130304_181335_outLine +BABEL_OP1_203_12851_20130304_182835_inLine +BABEL_OP1_203_12851_20130304_182835_outLine +BABEL_OP1_203_12851_20130304_185138_inLine +BABEL_OP1_203_12851_20130304_185138_outLine +BABEL_OP1_203_13126_20130421_175306_inLine +BABEL_OP1_203_13126_20130421_175306_outLine +BABEL_OP1_203_13126_20130421_180154_inLine +BABEL_OP1_203_13126_20130421_180154_outLine +BABEL_OP1_203_13324_20130313_185155_inLine +BABEL_OP1_203_13324_20130313_185155_outLine +BABEL_OP1_203_13483_20130409_231107_inLine +BABEL_OP1_203_13483_20130409_231107_outLine +BABEL_OP1_203_13490_20130322_143131_inLine +BABEL_OP1_203_13490_20130322_143131_outLine +BABEL_OP1_203_13664_20130304_155051_inLine +BABEL_OP1_203_13664_20130304_155051_outLine +BABEL_OP1_203_13709_20130410_222037_inLine +BABEL_OP1_203_13709_20130410_222037_outLine +BABEL_OP1_203_13744_20130307_215445_inLine +BABEL_OP1_203_13744_20130307_215445_outLine +BABEL_OP1_203_13792_20130310_142445_inLine +BABEL_OP1_203_13792_20130310_142445_outLine +BABEL_OP1_203_14137_20130314_181335_inLine +BABEL_OP1_203_14137_20130314_181335_outLine +BABEL_OP1_203_14179_20130402_211621_inLine +BABEL_OP1_203_14179_20130402_211621_outLine +BABEL_OP1_203_14229_20130324_162827_inLine +BABEL_OP1_203_14229_20130324_162827_outLine +BABEL_OP1_203_14560_20130425_140155_inLine +BABEL_OP1_203_14560_20130425_140155_outLine +BABEL_OP1_203_14719_20130406_191558_inLine +BABEL_OP1_203_14719_20130406_191558_outLine +BABEL_OP1_203_14725_20130309_185639_inLine +BABEL_OP1_203_14725_20130309_185639_outLine +BABEL_OP1_203_14729_20130411_214726_inLine +BABEL_OP1_203_14729_20130411_214726_outLine +BABEL_OP1_203_14814_20130314_133131_inLine +BABEL_OP1_203_14814_20130314_133131_outLine +BABEL_OP1_203_14899_20130311_184638_inLine +BABEL_OP1_203_14899_20130311_184638_outLine +BABEL_OP1_203_14929_20130324_184056_inLine +BABEL_OP1_203_14929_20130324_184056_outLine +BABEL_OP1_203_15024_20130322_152846_inLine +BABEL_OP1_203_15024_20130322_152846_outLine +BABEL_OP1_203_15227_20130513_222256_inLine +BABEL_OP1_203_15227_20130513_222256_outLine +BABEL_OP1_203_15322_20130511_152438_inLine +BABEL_OP1_203_15322_20130511_152438_outLine +BABEL_OP1_203_15535_20130329_143236_inLine +BABEL_OP1_203_15535_20130329_143236_outLine +BABEL_OP1_203_15730_20130307_201711_inLine +BABEL_OP1_203_15730_20130307_201711_outLine +BABEL_OP1_203_15749_20130407_175145_inLine +BABEL_OP1_203_15749_20130407_175145_outLine +BABEL_OP1_203_15902_20130309_193940_inLine +BABEL_OP1_203_15902_20130309_193940_outLine +BABEL_OP1_203_16149_20130309_171014_inLine +BABEL_OP1_203_16149_20130309_171014_outLine +BABEL_OP1_203_16924_20130720_175321_inLine +BABEL_OP1_203_16924_20130720_175321_outLine +BABEL_OP1_203_17032_20130402_175428_inLine +BABEL_OP1_203_17032_20130402_175428_outLine +BABEL_OP1_203_17097_20130430_173440_inLine +BABEL_OP1_203_17097_20130430_173440_outLine +BABEL_OP1_203_17472_20130408_215034_inLine +BABEL_OP1_203_17472_20130408_215034_outLine +BABEL_OP1_203_17567_20130425_145936_inLine +BABEL_OP1_203_17567_20130425_145936_outLine +BABEL_OP1_203_18118_20130730_191442_inLine +BABEL_OP1_203_18118_20130730_191442_outLine +BABEL_OP1_203_18380_20130327_214619_inLine +BABEL_OP1_203_18380_20130327_214619_outLine +BABEL_OP1_203_18566_20130503_153904_inLine +BABEL_OP1_203_18566_20130503_153904_outLine +BABEL_OP1_203_18939_20130311_144740_inLine +BABEL_OP1_203_18939_20130311_144740_outLine +BABEL_OP1_203_19101_20130423_142324_inLine +BABEL_OP1_203_19101_20130423_142324_outLine +BABEL_OP1_203_19134_20130328_220635_inLine +BABEL_OP1_203_19134_20130328_220635_outLine +BABEL_OP1_203_19589_20130727_143145_inLine +BABEL_OP1_203_19589_20130727_143145_outLine +BABEL_OP1_203_19703_20130318_160958_inLine +BABEL_OP1_203_19703_20130318_160958_outLine +BABEL_OP1_203_19703_20130318_162314_inLine +BABEL_OP1_203_19703_20130318_162314_outLine +BABEL_OP1_203_19773_20130407_183531_inLine +BABEL_OP1_203_19773_20130407_183531_outLine +BABEL_OP1_203_19782_20130404_170141_inLine +BABEL_OP1_203_19782_20130404_170141_outLine +BABEL_OP1_203_20133_20130304_160351_inLine +BABEL_OP1_203_20133_20130304_160351_outLine +BABEL_OP1_203_20330_20130410_161539_inLine +BABEL_OP1_203_20330_20130410_161539_outLine +BABEL_OP1_203_20768_20130407_190152_inLine +BABEL_OP1_203_20768_20130407_190152_outLine +BABEL_OP1_203_20985_20130330_210730_inLine +BABEL_OP1_203_20985_20130330_210730_outLine +BABEL_OP1_203_21004_20130410_181101_inLine +BABEL_OP1_203_21004_20130410_181101_outLine +BABEL_OP1_203_21004_20130410_182740_inLine +BABEL_OP1_203_21004_20130410_182740_outLine +BABEL_OP1_203_21109_20130406_161601_inLine +BABEL_OP1_203_21109_20130406_161601_outLine +BABEL_OP1_203_21206_20130312_164516_inLine +BABEL_OP1_203_21206_20130312_164516_outLine +BABEL_OP1_203_21315_20130501_151005_inLine +BABEL_OP1_203_21315_20130501_151005_outLine +BABEL_OP1_203_21327_20130405_203336_inLine +BABEL_OP1_203_21327_20130405_203336_outLine +BABEL_OP1_203_21435_20130423_181043_inLine +BABEL_OP1_203_21435_20130423_181043_outLine +BABEL_OP1_203_22280_20130329_161951_inLine +BABEL_OP1_203_22280_20130329_161951_outLine +BABEL_OP1_203_22321_20130309_191222_inLine +BABEL_OP1_203_22321_20130309_191222_outLine +BABEL_OP1_203_22446_20130309_134600_inLine +BABEL_OP1_203_22446_20130309_134600_outLine +BABEL_OP1_203_22494_20130402_171234_inLine +BABEL_OP1_203_22494_20130402_171234_outLine +BABEL_OP1_203_22612_20130406_220338_inLine +BABEL_OP1_203_22612_20130406_220338_outLine +BABEL_OP1_203_22918_20130410_190723_inLine +BABEL_OP1_203_22918_20130410_190723_outLine +BABEL_OP1_203_23006_20130319_211412_inLine +BABEL_OP1_203_23006_20130319_211412_outLine +BABEL_OP1_203_23046_20130322_165811_inLine +BABEL_OP1_203_23046_20130322_165811_outLine +BABEL_OP1_203_23092_20130406_014425_inLine +BABEL_OP1_203_23092_20130406_014425_outLine +BABEL_OP1_203_23092_20130406_015338_inLine +BABEL_OP1_203_23092_20130406_015338_outLine +BABEL_OP1_203_23153_20130320_194433_inLine +BABEL_OP1_203_23153_20130320_194433_outLine +BABEL_OP1_203_23239_20130331_171214_inLine +BABEL_OP1_203_23239_20130331_171214_outLine +BABEL_OP1_203_23505_20130309_204825_inLine +BABEL_OP1_203_23505_20130309_204825_outLine +BABEL_OP1_203_23980_20130321_193946_inLine +BABEL_OP1_203_23980_20130321_193946_outLine +BABEL_OP1_203_24017_20130424_174037_inLine +BABEL_OP1_203_24017_20130424_174037_outLine +BABEL_OP1_203_24253_20130423_175626_inLine +BABEL_OP1_203_24253_20130423_175626_outLine +BABEL_OP1_203_24290_20130423_133315_inLine +BABEL_OP1_203_24290_20130423_133315_outLine +BABEL_OP1_203_24323_20130320_160949_inLine +BABEL_OP1_203_24323_20130320_160949_outLine +BABEL_OP1_203_24470_20130329_205656_inLine +BABEL_OP1_203_24470_20130329_205656_outLine +BABEL_OP1_203_24501_20130421_141711_inLine +BABEL_OP1_203_24501_20130421_141711_outLine +BABEL_OP1_203_24569_20130405_200644_inLine +BABEL_OP1_203_24569_20130405_200644_outLine +BABEL_OP1_203_24586_20130506_203931_inLine +BABEL_OP1_203_24586_20130506_203931_outLine +BABEL_OP1_203_24590_20130321_221146_inLine +BABEL_OP1_203_24590_20130321_221146_outLine +BABEL_OP1_203_24679_20130307_145644_inLine +BABEL_OP1_203_24679_20130307_145644_outLine +BABEL_OP1_203_24779_20130426_183526_inLine +BABEL_OP1_203_24779_20130426_183526_outLine +BABEL_OP1_203_24982_20130327_153429_inLine +BABEL_OP1_203_24982_20130327_153429_outLine +BABEL_OP1_203_25015_20130728_150746_inLine +BABEL_OP1_203_25015_20130728_150746_outLine +BABEL_OP1_203_25085_20130508_145922_inLine +BABEL_OP1_203_25085_20130508_145922_outLine +BABEL_OP1_203_25412_20130329_201051_inLine +BABEL_OP1_203_25412_20130329_201051_outLine +BABEL_OP1_203_25698_20130509_182226_inLine +BABEL_OP1_203_25698_20130509_182226_outLine +BABEL_OP1_203_25719_20130426_202355_inLine +BABEL_OP1_203_25719_20130426_202355_outLine +BABEL_OP1_203_25767_20130311_183243_inLine +BABEL_OP1_203_25767_20130311_183243_outLine +BABEL_OP1_203_25961_20130311_171235_inLine +BABEL_OP1_203_25961_20130311_171235_outLine +BABEL_OP1_203_26388_20130318_200305_inLine +BABEL_OP1_203_26388_20130318_200305_outLine +BABEL_OP1_203_26507_20130430_234212_inLine +BABEL_OP1_203_26507_20130430_234212_outLine +BABEL_OP1_203_26574_20130411_160556_inLine +BABEL_OP1_203_26574_20130411_160556_outLine +BABEL_OP1_203_26602_20130801_171131_inLine +BABEL_OP1_203_26602_20130801_171131_outLine +BABEL_OP1_203_26836_20130315_160512_inLine +BABEL_OP1_203_26836_20130315_160512_outLine +BABEL_OP1_203_27125_20130308_003724_inLine +BABEL_OP1_203_27125_20130308_003724_outLine +BABEL_OP1_203_27218_20130312_194932_inLine +BABEL_OP1_203_27218_20130312_194932_outLine +BABEL_OP1_203_27590_20130405_200930_inLine +BABEL_OP1_203_27590_20130405_200930_outLine +BABEL_OP1_203_27841_20130403_211143_inLine +BABEL_OP1_203_27841_20130403_211143_outLine +BABEL_OP1_203_28280_20130501_220643_inLine +BABEL_OP1_203_28280_20130501_220643_outLine +BABEL_OP1_203_28419_20130319_165427_inLine +BABEL_OP1_203_28419_20130319_165427_outLine +BABEL_OP1_203_28522_20130328_170837_inLine +BABEL_OP1_203_28522_20130328_170837_outLine +BABEL_OP1_203_28775_20130313_213707_inLine +BABEL_OP1_203_28775_20130313_213707_outLine +BABEL_OP1_203_28775_20130313_215352_inLine +BABEL_OP1_203_28775_20130313_215352_outLine +BABEL_OP1_203_29023_20130313_194148_inLine +BABEL_OP1_203_29023_20130313_194148_outLine +BABEL_OP1_203_29023_20130313_195106_inLine +BABEL_OP1_203_29023_20130313_195106_outLine +BABEL_OP1_203_29039_20130402_153541_inLine +BABEL_OP1_203_29039_20130402_153541_outLine +BABEL_OP1_203_29168_20130306_213504_inLine +BABEL_OP1_203_29168_20130306_213504_outLine +BABEL_OP1_203_29323_20130403_215525_inLine +BABEL_OP1_203_29323_20130403_215525_outLine +BABEL_OP1_203_29416_20130421_133101_inLine +BABEL_OP1_203_29416_20130421_133101_outLine +BABEL_OP1_203_29439_20130422_150608_inLine +BABEL_OP1_203_29439_20130422_150608_outLine +BABEL_OP1_203_30013_20130331_170538_inLine +BABEL_OP1_203_30013_20130331_170538_outLine +BABEL_OP1_203_30395_20130318_180120_inLine +BABEL_OP1_203_30395_20130318_180120_outLine +BABEL_OP1_203_30645_20130309_151850_inLine +BABEL_OP1_203_30645_20130309_151850_outLine +BABEL_OP1_203_31184_20130322_141512_inLine +BABEL_OP1_203_31184_20130322_141512_outLine +BABEL_OP1_203_31184_20130322_142743_inLine +BABEL_OP1_203_31184_20130322_142743_outLine +BABEL_OP1_203_31490_20130321_210518_inLine +BABEL_OP1_203_31490_20130321_210518_outLine +BABEL_OP1_203_31992_20130313_143826_inLine +BABEL_OP1_203_31992_20130313_143826_outLine +BABEL_OP1_203_32097_20130304_195431_inLine +BABEL_OP1_203_32097_20130304_195431_outLine +BABEL_OP1_203_32122_20130320_174321_inLine +BABEL_OP1_203_32122_20130320_174321_outLine +BABEL_OP1_203_32122_20130320_175419_inLine +BABEL_OP1_203_32122_20130320_175419_outLine +BABEL_OP1_203_32244_20130728_182847_inLine +BABEL_OP1_203_32244_20130728_182847_outLine +BABEL_OP1_203_32998_20130329_155417_inLine +BABEL_OP1_203_32998_20130329_155417_outLine +BABEL_OP1_203_33476_20130320_140412_inLine +BABEL_OP1_203_33476_20130320_140412_outLine +BABEL_OP1_203_33672_20130312_165130_inLine +BABEL_OP1_203_33672_20130312_165130_outLine +BABEL_OP1_203_33704_20130405_220001_inLine +BABEL_OP1_203_33704_20130405_220001_outLine +BABEL_OP1_203_33840_20130803_192343_inLine +BABEL_OP1_203_33840_20130803_192343_outLine +BABEL_OP1_203_34145_20130331_145240_inLine +BABEL_OP1_203_34145_20130331_145240_outLine +BABEL_OP1_203_35139_20130313_143646_inLine +BABEL_OP1_203_35139_20130313_143646_outLine +BABEL_OP1_203_36505_20130731_191406_inLine +BABEL_OP1_203_36505_20130731_191406_outLine +BABEL_OP1_203_36594_20130421_182303_inLine +BABEL_OP1_203_36594_20130421_182303_outLine +BABEL_OP1_203_37598_20130330_000102_inLine +BABEL_OP1_203_37598_20130330_000102_outLine +BABEL_OP1_203_38979_20130409_173446_inLine +BABEL_OP1_203_38979_20130409_173446_outLine +BABEL_OP1_203_38979_20130409_174405_inLine +BABEL_OP1_203_38979_20130409_174405_outLine +BABEL_OP1_203_39006_20130506_192659_inLine +BABEL_OP1_203_39006_20130506_192659_outLine +BABEL_OP1_203_39555_20130720_183746_inLine +BABEL_OP1_203_39555_20130720_183746_outLine +BABEL_OP1_203_39848_20130320_133756_inLine +BABEL_OP1_203_39848_20130320_133756_outLine +BABEL_OP1_203_40557_20130404_005522_inLine +BABEL_OP1_203_40557_20130404_005522_outLine +BABEL_OP1_203_40565_20130331_171210_inLine +BABEL_OP1_203_40565_20130331_171210_outLine +BABEL_OP1_203_41073_20130721_172038_inLine +BABEL_OP1_203_41073_20130721_172038_outLine +BABEL_OP1_203_41174_20130318_203041_inLine +BABEL_OP1_203_41174_20130318_203041_outLine +BABEL_OP1_203_41334_20130501_232034_inLine +BABEL_OP1_203_41334_20130501_232034_outLine +BABEL_OP1_203_41442_20130404_174409_inLine +BABEL_OP1_203_41442_20130404_174409_outLine +BABEL_OP1_203_41469_20130313_185923_inLine +BABEL_OP1_203_41469_20130313_185923_outLine +BABEL_OP1_203_41609_20130309_175203_inLine +BABEL_OP1_203_41609_20130309_175203_outLine +BABEL_OP1_203_42029_20130403_184623_inLine +BABEL_OP1_203_42029_20130403_184623_outLine +BABEL_OP1_203_42299_20130508_203220_inLine +BABEL_OP1_203_42299_20130508_203220_outLine +BABEL_OP1_203_42299_20130508_204824_inLine +BABEL_OP1_203_42299_20130508_204824_outLine +BABEL_OP1_203_42309_20130428_191239_inLine +BABEL_OP1_203_42309_20130428_191239_outLine +BABEL_OP1_203_42434_20130323_160637_inLine +BABEL_OP1_203_42434_20130323_160637_outLine +BABEL_OP1_203_42848_20130513_201112_inLine +BABEL_OP1_203_42848_20130513_201112_outLine +BABEL_OP1_203_43368_20130327_215424_inLine +BABEL_OP1_203_43368_20130327_215424_outLine +BABEL_OP1_203_43388_20130327_192024_inLine +BABEL_OP1_203_43388_20130327_192024_outLine +BABEL_OP1_203_43588_20130714_163553_inLine +BABEL_OP1_203_43588_20130714_163553_outLine +BABEL_OP1_203_43784_20130314_171933_inLine +BABEL_OP1_203_43784_20130314_171933_outLine +BABEL_OP1_203_43788_20130504_173234_inLine +BABEL_OP1_203_43788_20130504_173234_outLine +BABEL_OP1_203_43920_20130405_194800_inLine +BABEL_OP1_203_43920_20130405_194800_outLine +BABEL_OP1_203_44478_20130730_170938_inLine +BABEL_OP1_203_44478_20130730_170938_outLine +BABEL_OP1_203_44619_20130313_175437_inLine +BABEL_OP1_203_44619_20130313_175437_outLine +BABEL_OP1_203_44709_20130331_183159_inLine +BABEL_OP1_203_44709_20130331_183159_outLine +BABEL_OP1_203_44961_20130311_173427_inLine +BABEL_OP1_203_44961_20130311_173427_outLine +BABEL_OP1_203_45560_20130309_173444_inLine +BABEL_OP1_203_45560_20130309_173444_outLine +BABEL_OP1_203_45851_20130801_014413_inLine +BABEL_OP1_203_45851_20130801_014413_outLine +BABEL_OP1_203_46310_20130309_211431_inLine +BABEL_OP1_203_46310_20130309_211431_outLine +BABEL_OP1_203_46550_20130313_153012_inLine +BABEL_OP1_203_46550_20130313_153012_outLine +BABEL_OP1_203_46681_20130313_203139_inLine +BABEL_OP1_203_46681_20130313_203139_outLine +BABEL_OP1_203_46688_20130314_212550_inLine +BABEL_OP1_203_46688_20130314_212550_outLine +BABEL_OP1_203_47186_20130405_120609_inLine +BABEL_OP1_203_47186_20130405_120609_outLine +BABEL_OP1_203_47487_20130321_145055_inLine +BABEL_OP1_203_47487_20130321_145055_outLine +BABEL_OP1_203_47823_20130406_151016_inLine +BABEL_OP1_203_47823_20130406_151016_outLine +BABEL_OP1_203_47866_20130723_152640_inLine +BABEL_OP1_203_47866_20130723_152640_outLine +BABEL_OP1_203_48422_20130425_175947_inLine +BABEL_OP1_203_48422_20130425_175947_outLine +BABEL_OP1_203_48610_20130309_222037_inLine +BABEL_OP1_203_48610_20130309_222037_outLine +BABEL_OP1_203_49001_20130315_160533_inLine +BABEL_OP1_203_49001_20130315_160533_outLine +BABEL_OP1_203_49216_20130307_211955_inLine +BABEL_OP1_203_49216_20130307_211955_outLine +BABEL_OP1_203_49287_20130331_155341_inLine +BABEL_OP1_203_49287_20130331_155341_outLine +BABEL_OP1_203_49437_20130405_194333_inLine +BABEL_OP1_203_49437_20130405_194333_outLine +BABEL_OP1_203_49437_20130405_195645_inLine +BABEL_OP1_203_49437_20130405_195645_outLine +BABEL_OP1_203_49630_20130408_182919_inLine +BABEL_OP1_203_49630_20130408_182919_outLine +BABEL_OP1_203_49768_20130320_164815_inLine +BABEL_OP1_203_49768_20130320_164815_outLine +BABEL_OP1_203_50090_20130726_145642_inLine +BABEL_OP1_203_50090_20130726_145642_outLine +BABEL_OP1_203_50175_20130311_181803_inLine +BABEL_OP1_203_50175_20130311_181803_outLine +BABEL_OP1_203_51530_20130803_174620_inLine +BABEL_OP1_203_51530_20130803_174620_outLine +BABEL_OP1_203_51611_20130312_195333_inLine +BABEL_OP1_203_51611_20130312_195333_outLine +BABEL_OP1_203_51701_20130508_232537_inLine +BABEL_OP1_203_51701_20130508_232537_outLine +BABEL_OP1_203_51819_20130328_150620_inLine +BABEL_OP1_203_51819_20130328_150620_outLine +BABEL_OP1_203_51955_20130314_175859_inLine +BABEL_OP1_203_51955_20130314_175859_outLine +BABEL_OP1_203_51955_20130314_180731_inLine +BABEL_OP1_203_51955_20130314_180731_outLine +BABEL_OP1_203_52246_20130319_221049_inLine +BABEL_OP1_203_52246_20130319_221049_outLine +BABEL_OP1_203_52272_20130313_140038_inLine +BABEL_OP1_203_52272_20130313_140038_outLine +BABEL_OP1_203_52404_20130409_005414_inLine +BABEL_OP1_203_52404_20130409_005414_outLine +BABEL_OP1_203_52422_20130427_140502_inLine +BABEL_OP1_203_52422_20130427_140502_outLine +BABEL_OP1_203_52490_20130309_141915_inLine +BABEL_OP1_203_52490_20130309_141915_outLine +BABEL_OP1_203_52717_20130311_173849_inLine +BABEL_OP1_203_52717_20130311_173849_outLine +BABEL_OP1_203_53063_20130407_210935_inLine +BABEL_OP1_203_53063_20130407_210935_outLine +BABEL_OP1_203_53665_20130727_150857_inLine +BABEL_OP1_203_53665_20130727_150857_outLine +BABEL_OP1_203_53842_20130322_165451_inLine +BABEL_OP1_203_53842_20130322_165451_outLine +BABEL_OP1_203_54074_20130319_150208_inLine +BABEL_OP1_203_54074_20130319_150208_outLine +BABEL_OP1_203_54104_20130309_204103_inLine +BABEL_OP1_203_54104_20130309_204103_outLine +BABEL_OP1_203_54390_20130313_161947_inLine +BABEL_OP1_203_54390_20130313_161947_outLine +BABEL_OP1_203_54477_20130408_133628_inLine +BABEL_OP1_203_54477_20130408_133628_outLine +BABEL_OP1_203_54530_20130424_194302_inLine +BABEL_OP1_203_54530_20130424_194302_outLine +BABEL_OP1_203_54697_20130405_153323_inLine +BABEL_OP1_203_54697_20130405_153323_outLine +BABEL_OP1_203_54827_20130803_201026_inLine +BABEL_OP1_203_54827_20130803_201026_outLine +BABEL_OP1_203_54953_20130319_135125_inLine +BABEL_OP1_203_54953_20130319_135125_outLine +BABEL_OP1_203_55259_20130323_181918_inLine +BABEL_OP1_203_55259_20130323_181918_outLine +BABEL_OP1_203_55950_20130728_141857_inLine +BABEL_OP1_203_55950_20130728_141857_outLine +BABEL_OP1_203_56076_20130728_212423_inLine +BABEL_OP1_203_56076_20130728_212423_outLine +BABEL_OP1_203_56198_20130314_163346_inLine +BABEL_OP1_203_56198_20130314_163346_outLine +BABEL_OP1_203_56198_20130314_164412_inLine +BABEL_OP1_203_56198_20130314_164412_outLine +BABEL_OP1_203_56306_20130408_202539_inLine +BABEL_OP1_203_56306_20130408_202539_outLine +BABEL_OP1_203_56307_20130401_212823_inLine +BABEL_OP1_203_56307_20130401_212823_outLine +BABEL_OP1_203_56826_20130403_155349_inLine +BABEL_OP1_203_56826_20130403_155349_outLine +BABEL_OP1_203_57093_20130323_155842_inLine +BABEL_OP1_203_57093_20130323_155842_outLine +BABEL_OP1_203_57116_20130306_200913_inLine +BABEL_OP1_203_57116_20130306_200913_outLine +BABEL_OP1_203_57529_20130404_225031_inLine +BABEL_OP1_203_57529_20130404_225031_outLine +BABEL_OP1_203_57678_20130319_173142_inLine +BABEL_OP1_203_57678_20130319_173142_outLine +BABEL_OP1_203_58107_20130331_163124_inLine +BABEL_OP1_203_58107_20130331_163124_outLine +BABEL_OP1_203_58107_20130331_164049_inLine +BABEL_OP1_203_58107_20130331_164049_outLine +BABEL_OP1_203_58145_20130404_174142_inLine +BABEL_OP1_203_58145_20130404_174142_outLine +BABEL_OP1_203_58489_20130406_171644_inLine +BABEL_OP1_203_58489_20130406_171644_outLine +BABEL_OP1_203_58821_20130330_171943_inLine +BABEL_OP1_203_58821_20130330_171943_outLine +BABEL_OP1_203_58915_20130508_170813_inLine +BABEL_OP1_203_58915_20130508_170813_outLine +BABEL_OP1_203_58926_20130314_221922_inLine +BABEL_OP1_203_58926_20130314_221922_outLine +BABEL_OP1_203_59078_20130328_222520_inLine +BABEL_OP1_203_59078_20130328_222520_outLine +BABEL_OP1_203_59307_20130503_211805_inLine +BABEL_OP1_203_59307_20130503_211805_outLine +BABEL_OP1_203_59720_20130323_160840_inLine +BABEL_OP1_203_59720_20130323_160840_outLine +BABEL_OP1_203_59747_20130307_185538_inLine +BABEL_OP1_203_59747_20130307_185538_outLine +BABEL_OP1_203_59864_20130719_183902_inLine +BABEL_OP1_203_59864_20130719_183902_outLine +BABEL_OP1_203_59928_20130314_205249_inLine +BABEL_OP1_203_59928_20130314_205249_outLine +BABEL_OP1_203_60026_20130311_192442_inLine +BABEL_OP1_203_60026_20130311_192442_outLine +BABEL_OP1_203_60352_20130724_151721_inLine +BABEL_OP1_203_60352_20130724_151721_outLine +BABEL_OP1_203_60397_20130814_170113_inLine +BABEL_OP1_203_60397_20130814_170113_outLine +BABEL_OP1_203_60436_20130726_213808_inLine +BABEL_OP1_203_60436_20130726_213808_outLine +BABEL_OP1_203_60830_20130323_152836_inLine +BABEL_OP1_203_60830_20130323_152836_outLine +BABEL_OP1_203_61225_20130310_001509_inLine +BABEL_OP1_203_61225_20130310_001509_outLine +BABEL_OP1_203_61225_20130310_002607_inLine +BABEL_OP1_203_61225_20130310_002607_outLine +BABEL_OP1_203_61435_20130421_175121_inLine +BABEL_OP1_203_61435_20130421_175121_outLine +BABEL_OP1_203_61440_20130513_143551_inLine +BABEL_OP1_203_61440_20130513_143551_outLine +BABEL_OP1_203_61888_20130410_154115_inLine +BABEL_OP1_203_61888_20130410_154115_outLine +BABEL_OP1_203_62014_20130503_150317_inLine +BABEL_OP1_203_62014_20130503_150317_outLine +BABEL_OP1_203_62200_20130320_155842_inLine +BABEL_OP1_203_62200_20130320_155842_outLine +BABEL_OP1_203_62360_20130729_185133_inLine +BABEL_OP1_203_62360_20130729_185133_outLine +BABEL_OP1_203_62714_20130430_183624_inLine +BABEL_OP1_203_62714_20130430_183624_outLine +BABEL_OP1_203_62800_20130307_204137_inLine +BABEL_OP1_203_62800_20130307_204137_outLine +BABEL_OP1_203_62976_20130512_201748_inLine +BABEL_OP1_203_62976_20130512_201748_outLine +BABEL_OP1_203_63730_20130507_163540_inLine +BABEL_OP1_203_63730_20130507_163540_outLine +BABEL_OP1_203_64065_20130326_201717_inLine +BABEL_OP1_203_64065_20130326_201717_outLine +BABEL_OP1_203_64065_20130326_202638_inLine +BABEL_OP1_203_64065_20130326_202638_outLine +BABEL_OP1_203_65913_20130726_205358_inLine +BABEL_OP1_203_65913_20130726_205358_outLine +BABEL_OP1_203_66001_20130309_233448_inLine +BABEL_OP1_203_66001_20130309_233448_outLine +BABEL_OP1_203_66045_20130323_203735_inLine +BABEL_OP1_203_66045_20130323_203735_outLine +BABEL_OP1_203_66822_20130324_142935_inLine +BABEL_OP1_203_66822_20130324_142935_outLine +BABEL_OP1_203_66916_20130308_142310_inLine +BABEL_OP1_203_66916_20130308_142310_outLine +BABEL_OP1_203_66971_20130725_151439_inLine +BABEL_OP1_203_66971_20130725_151439_outLine +BABEL_OP1_203_67066_20130509_215551_inLine +BABEL_OP1_203_67066_20130509_215551_outLine +BABEL_OP1_203_68289_20130409_222355_inLine +BABEL_OP1_203_68289_20130409_222355_outLine +BABEL_OP1_203_68385_20130221_213027_inLine +BABEL_OP1_203_68385_20130221_213027_outLine +BABEL_OP1_203_69096_20130714_153203_inLine +BABEL_OP1_203_69096_20130714_153203_outLine +BABEL_OP1_203_69474_20130409_153705_inLine +BABEL_OP1_203_69474_20130409_153705_outLine +BABEL_OP1_203_69964_20130801_183705_inLine +BABEL_OP1_203_69964_20130801_183705_outLine +BABEL_OP1_203_70221_20130502_153055_inLine +BABEL_OP1_203_70221_20130502_153055_outLine +BABEL_OP1_203_70386_20130315_162835_inLine +BABEL_OP1_203_70386_20130315_162835_outLine +BABEL_OP1_203_70716_20130731_182939_inLine +BABEL_OP1_203_70716_20130731_182939_outLine +BABEL_OP1_203_71067_20130503_201919_inLine +BABEL_OP1_203_71067_20130503_201919_outLine +BABEL_OP1_203_71566_20130406_212124_inLine +BABEL_OP1_203_71566_20130406_212124_outLine +BABEL_OP1_203_72324_20130721_195442_inLine +BABEL_OP1_203_72324_20130721_195442_outLine +BABEL_OP1_203_72587_20130331_220349_inLine +BABEL_OP1_203_72587_20130331_220349_outLine +BABEL_OP1_203_73591_20130222_132516_inLine +BABEL_OP1_203_73591_20130222_132516_outLine +BABEL_OP1_203_74667_20130322_155857_inLine +BABEL_OP1_203_74667_20130322_155857_outLine +BABEL_OP1_203_74886_20130309_200304_inLine +BABEL_OP1_203_74886_20130309_200304_outLine +BABEL_OP1_203_75064_20130322_142556_inLine +BABEL_OP1_203_75064_20130322_142556_outLine +BABEL_OP1_203_75342_20130404_193602_inLine +BABEL_OP1_203_75342_20130404_193602_outLine +BABEL_OP1_203_75869_20130721_161850_inLine +BABEL_OP1_203_75869_20130721_161850_outLine +BABEL_OP1_203_76444_20130406_153810_inLine +BABEL_OP1_203_76444_20130406_153810_outLine +BABEL_OP1_203_76482_20130508_220808_inLine +BABEL_OP1_203_76482_20130508_220808_outLine +BABEL_OP1_203_77242_20130508_191854_inLine +BABEL_OP1_203_77242_20130508_191854_outLine +BABEL_OP1_203_79131_20130727_202021_inLine +BABEL_OP1_203_79131_20130727_202021_outLine +BABEL_OP1_203_79660_20130512_173422_inLine +BABEL_OP1_203_79660_20130512_173422_outLine +BABEL_OP1_203_80134_20130814_145021_inLine +BABEL_OP1_203_80134_20130814_145021_outLine +BABEL_OP1_203_81287_20130403_225530_inLine +BABEL_OP1_203_81287_20130403_225530_outLine +BABEL_OP1_203_82224_20130718_134750_inLine +BABEL_OP1_203_82224_20130718_134750_outLine +BABEL_OP1_203_84339_20130802_181641_inLine +BABEL_OP1_203_84339_20130802_181641_outLine +BABEL_OP1_203_84469_20130421_132749_inLine +BABEL_OP1_203_84469_20130421_132749_outLine +BABEL_OP1_203_84611_20130312_152852_inLine +BABEL_OP1_203_84611_20130312_152852_outLine +BABEL_OP1_203_85325_20130802_212902_inLine +BABEL_OP1_203_85325_20130802_212902_outLine +BABEL_OP1_203_86597_20130508_182316_inLine +BABEL_OP1_203_86597_20130508_182316_outLine +BABEL_OP1_203_86628_20130512_215243_inLine +BABEL_OP1_203_86628_20130512_215243_outLine +BABEL_OP1_203_86878_20130804_174949_inLine +BABEL_OP1_203_86878_20130804_174949_outLine +BABEL_OP1_203_86891_20130427_122020_inLine +BABEL_OP1_203_86891_20130427_122020_outLine +BABEL_OP1_203_87305_20130512_150816_inLine +BABEL_OP1_203_87305_20130512_150816_outLine +BABEL_OP1_203_89358_20130327_183946_inLine +BABEL_OP1_203_89358_20130327_183946_outLine +BABEL_OP1_203_89943_20130319_151705_inLine +BABEL_OP1_203_89943_20130319_151705_outLine +BABEL_OP1_203_90709_20130311_171156_inLine +BABEL_OP1_203_90709_20130311_171156_outLine +BABEL_OP1_203_91760_20130728_190550_inLine +BABEL_OP1_203_91760_20130728_190550_outLine +BABEL_OP1_203_92077_20130725_140650_inLine +BABEL_OP1_203_92077_20130725_140650_outLine +BABEL_OP1_203_93411_20130324_150550_inLine +BABEL_OP1_203_93411_20130324_150550_outLine +BABEL_OP1_203_93490_20130804_201521_inLine +BABEL_OP1_203_93490_20130804_201521_outLine +BABEL_OP1_203_93964_20130327_171307_inLine +BABEL_OP1_203_93964_20130327_171307_outLine +BABEL_OP1_203_94442_20130727_182743_inLine +BABEL_OP1_203_94442_20130727_182743_outLine +BABEL_OP1_203_94449_20130801_010717_inLine +BABEL_OP1_203_94449_20130801_010717_outLine +BABEL_OP1_203_95338_20130727_211019_inLine +BABEL_OP1_203_95338_20130727_211019_outLine +BABEL_OP1_203_96059_20130731_211048_inLine +BABEL_OP1_203_96059_20130731_211048_outLine +BABEL_OP1_203_96376_20130731_143340_outLine +BABEL_OP1_203_96690_20130320_183730_inLine +BABEL_OP1_203_96690_20130320_183730_outLine +BABEL_OP1_203_96690_20130320_185039_inLine +BABEL_OP1_203_96690_20130320_185039_outLine +BABEL_OP1_203_97220_20130508_165310_inLine +BABEL_OP1_203_97220_20130508_165310_outLine +BABEL_OP1_203_97836_20130430_195102_inLine +BABEL_OP1_203_97836_20130430_195102_outLine +BABEL_OP1_203_98192_20130511_210223_inLine +BABEL_OP1_203_98192_20130511_210223_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list b/egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list new file mode 100644 index 00000000000..38bcbffd9e6 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list @@ -0,0 +1,257 @@ +BABEL_OP1_203_16184_20130309_181723_inLine +BABEL_OP1_203_29777_20130424_230709_inLine +BABEL_OP1_203_29777_20130424_230709_outLine +BABEL_OP1_203_30253_20130406_221820_inLine +BABEL_OP1_203_30253_20130406_221820_outLine +BABEL_OP1_203_30497_20130724_152950_inLine +BABEL_OP1_203_30497_20130724_152950_outLine +BABEL_OP1_203_30497_20130724_154924_inLine +BABEL_OP1_203_30497_20130724_154924_outLine +BABEL_OP1_203_30653_20130422_190728_inLine +BABEL_OP1_203_30653_20130422_190728_outLine +BABEL_OP1_203_31182_20130407_165109_inLine +BABEL_OP1_203_31182_20130407_165109_outLine +BABEL_OP1_203_33229_20130716_174756_inLine +BABEL_OP1_203_33229_20130716_174756_outLine +BABEL_OP1_203_33273_20130321_185940_inLine +BABEL_OP1_203_33273_20130321_185940_outLine +BABEL_OP1_203_34688_20130309_150605_inLine +BABEL_OP1_203_34688_20130309_150605_outLine +BABEL_OP1_203_35202_20130403_172345_inLine +BABEL_OP1_203_35202_20130403_172345_outLine +BABEL_OP1_203_35885_20130423_124518_inLine +BABEL_OP1_203_35885_20130423_124518_outLine +BABEL_OP1_203_36017_20130727_194800_inLine +BABEL_OP1_203_36017_20130727_194800_outLine +BABEL_OP1_203_36059_20130420_141048_inLine +BABEL_OP1_203_36059_20130420_141048_outLine +BABEL_OP1_203_37064_20130315_163413_inLine +BABEL_OP1_203_37064_20130315_163413_outLine +BABEL_OP1_203_39159_20130307_161600_inLine +BABEL_OP1_203_39159_20130307_161600_outLine +BABEL_OP1_203_40740_20130425_194217_inLine +BABEL_OP1_203_40740_20130425_194217_outLine +BABEL_OP1_203_42718_20130719_184452_inLine +BABEL_OP1_203_42718_20130719_184452_outLine +BABEL_OP1_203_43285_20130403_143505_inLine +BABEL_OP1_203_43285_20130403_143505_outLine +BABEL_OP1_203_44309_20130724_151039_inLine +BABEL_OP1_203_44309_20130724_151039_outLine +BABEL_OP1_203_44681_20130808_235229_inLine +BABEL_OP1_203_44681_20130808_235229_outLine +BABEL_OP1_203_44847_20130713_184411_inLine +BABEL_OP1_203_44847_20130713_184411_outLine +BABEL_OP1_203_45201_20130802_170453_inLine +BABEL_OP1_203_45201_20130802_170453_outLine +BABEL_OP1_203_45697_20130410_000422_inLine +BABEL_OP1_203_45697_20130410_000422_outLine +BABEL_OP1_203_46702_20130306_164740_inLine +BABEL_OP1_203_46702_20130306_164740_outLine +BABEL_OP1_203_46712_20130323_203036_inLine +BABEL_OP1_203_46712_20130323_203036_outLine +BABEL_OP1_203_46881_20130307_203600_inLine +BABEL_OP1_203_46881_20130307_203600_outLine +BABEL_OP1_203_46974_20130404_232711_inLine +BABEL_OP1_203_46974_20130404_232711_outLine +BABEL_OP1_203_49197_20130318_181956_inLine +BABEL_OP1_203_49197_20130318_181956_outLine +BABEL_OP1_203_49767_20130430_202016_inLine +BABEL_OP1_203_49767_20130430_202016_outLine +BABEL_OP1_203_49812_20130808_171144_inLine +BABEL_OP1_203_49812_20130808_171144_outLine +BABEL_OP1_203_52070_20130808_163435_inLine +BABEL_OP1_203_52070_20130808_163435_outLine +BABEL_OP1_203_52442_20130506_145255_inLine +BABEL_OP1_203_52442_20130506_145255_outLine +BABEL_OP1_203_52614_20130727_194453_inLine +BABEL_OP1_203_52614_20130727_194453_outLine +BABEL_OP1_203_53419_20130406_175304_inLine +BABEL_OP1_203_53419_20130406_175304_outLine +BABEL_OP1_203_54040_20130406_184426_inLine +BABEL_OP1_203_54040_20130406_184426_outLine +BABEL_OP1_203_54405_20130729_003503_inLine +BABEL_OP1_203_54405_20130729_003503_outLine +BABEL_OP1_203_55267_20130505_191654_inLine +BABEL_OP1_203_55267_20130505_191654_outLine +BABEL_OP1_203_57219_20130502_194506_inLine +BABEL_OP1_203_57219_20130502_194506_outLine +BABEL_OP1_203_57464_20130725_171314_inLine +BABEL_OP1_203_57464_20130725_171314_outLine +BABEL_OP1_203_60626_20130322_152952_inLine +BABEL_OP1_203_60626_20130322_152952_outLine +BABEL_OP1_203_61971_20130725_164007_inLine +BABEL_OP1_203_61971_20130725_164007_outLine +BABEL_OP1_203_62047_20130407_151438_inLine +BABEL_OP1_203_62047_20130407_151438_outLine +BABEL_OP1_203_62286_20130320_214620_inLine +BABEL_OP1_203_62286_20130320_214620_outLine +BABEL_OP1_203_62456_20130328_142035_inLine +BABEL_OP1_203_62456_20130328_142035_outLine +BABEL_OP1_203_62835_20130323_203456_inLine +BABEL_OP1_203_62835_20130323_203456_outLine +BABEL_OP1_203_62852_20130306_200729_inLine +BABEL_OP1_203_62852_20130306_200729_outLine +BABEL_OP1_203_63220_20130331_212757_inLine +BABEL_OP1_203_63220_20130331_212757_outLine +BABEL_OP1_203_63445_20130307_151033_inLine +BABEL_OP1_203_63445_20130307_151033_outLine +BABEL_OP1_203_63757_20130328_223730_inLine +BABEL_OP1_203_63757_20130328_223730_outLine +BABEL_OP1_203_63938_20130410_173153_inLine +BABEL_OP1_203_63938_20130410_173153_outLine +BABEL_OP1_203_64494_20130313_131022_inLine +BABEL_OP1_203_64494_20130313_131022_outLine +BABEL_OP1_203_64638_20130410_142811_inLine +BABEL_OP1_203_64638_20130410_142811_outLine +BABEL_OP1_203_64759_20130309_200819_inLine +BABEL_OP1_203_64759_20130309_200819_outLine +BABEL_OP1_203_64796_20130307_184812_inLine +BABEL_OP1_203_64796_20130307_184812_outLine +BABEL_OP1_203_65466_20130725_163637_inLine +BABEL_OP1_203_65466_20130725_163637_outLine +BABEL_OP1_203_65477_20130320_173710_inLine +BABEL_OP1_203_65477_20130320_173710_outLine +BABEL_OP1_203_65477_20130320_180148_inLine +BABEL_OP1_203_65477_20130320_180148_outLine +BABEL_OP1_203_65477_20130320_201453_inLine +BABEL_OP1_203_65477_20130320_201453_outLine +BABEL_OP1_203_65639_20130806_171139_inLine +BABEL_OP1_203_65639_20130806_171139_outLine +BABEL_OP1_203_66837_20130405_182629_inLine +BABEL_OP1_203_66837_20130405_182629_outLine +BABEL_OP1_203_66959_20130401_000804_inLine +BABEL_OP1_203_66959_20130401_000804_outLine +BABEL_OP1_203_66967_20130309_193012_inLine +BABEL_OP1_203_66967_20130309_193012_outLine +BABEL_OP1_203_67726_20130815_142409_inLine +BABEL_OP1_203_67726_20130815_142409_outLine +BABEL_OP1_203_68910_20130819_161909_inLine +BABEL_OP1_203_68910_20130819_161909_outLine +BABEL_OP1_203_68910_20130819_163243_inLine +BABEL_OP1_203_68910_20130819_163243_outLine +BABEL_OP1_203_69633_20130425_200355_inLine +BABEL_OP1_203_69633_20130425_200355_outLine +BABEL_OP1_203_69982_20130506_163359_inLine +BABEL_OP1_203_69982_20130506_163359_outLine +BABEL_OP1_203_70282_20130329_152316_inLine +BABEL_OP1_203_70282_20130329_152316_outLine +BABEL_OP1_203_71704_20130312_213023_inLine +BABEL_OP1_203_71704_20130312_213023_outLine +BABEL_OP1_203_72349_20130726_200409_inLine +BABEL_OP1_203_72349_20130726_200409_outLine +BABEL_OP1_203_72844_20130307_143012_inLine +BABEL_OP1_203_72844_20130307_143012_outLine +BABEL_OP1_203_73622_20130311_175840_inLine +BABEL_OP1_203_73622_20130311_175840_outLine +BABEL_OP1_203_74253_20130403_190412_inLine +BABEL_OP1_203_74253_20130403_190412_outLine +BABEL_OP1_203_75366_20130430_153011_inLine +BABEL_OP1_203_75366_20130430_153011_outLine +BABEL_OP1_203_75465_20130408_174529_inLine +BABEL_OP1_203_75465_20130408_174529_outLine +BABEL_OP1_203_76218_20130320_160931_inLine +BABEL_OP1_203_76218_20130320_160931_outLine +BABEL_OP1_203_76218_20130320_162301_inLine +BABEL_OP1_203_76218_20130320_162301_outLine +BABEL_OP1_203_76773_20130313_174635_inLine +BABEL_OP1_203_76773_20130313_174635_outLine +BABEL_OP1_203_76970_20130502_140228_inLine +BABEL_OP1_203_76970_20130502_140228_outLine +BABEL_OP1_203_76970_20130502_141316_inLine +BABEL_OP1_203_76970_20130502_141316_outLine +BABEL_OP1_203_77391_20130321_134502_inLine +BABEL_OP1_203_77391_20130321_134502_outLine +BABEL_OP1_203_77567_20130307_183648_inLine +BABEL_OP1_203_77567_20130307_183648_outLine +BABEL_OP1_203_78609_20130411_135436_inLine +BABEL_OP1_203_78609_20130411_135436_outLine +BABEL_OP1_203_78958_20130815_152142_inLine +BABEL_OP1_203_78958_20130815_152142_outLine +BABEL_OP1_203_78976_20130320_143441_inLine +BABEL_OP1_203_78976_20130320_143441_outLine +BABEL_OP1_203_79107_20130501_145558_inLine +BABEL_OP1_203_79107_20130501_145558_outLine +BABEL_OP1_203_79571_20130401_193207_inLine +BABEL_OP1_203_79571_20130401_193207_outLine +BABEL_OP1_203_79858_20130309_212924_inLine +BABEL_OP1_203_79858_20130309_212924_outLine +BABEL_OP1_203_80721_20130402_142121_inLine +BABEL_OP1_203_80721_20130402_142121_outLine +BABEL_OP1_203_80897_20130328_174210_inLine +BABEL_OP1_203_80897_20130328_174210_outLine +BABEL_OP1_203_81229_20130321_133228_inLine +BABEL_OP1_203_81229_20130321_133228_outLine +BABEL_OP1_203_81854_20130730_230009_inLine +BABEL_OP1_203_81854_20130730_230009_outLine +BABEL_OP1_203_82966_20130405_153412_inLine +BABEL_OP1_203_82966_20130405_153412_outLine +BABEL_OP1_203_83366_20130428_224139_inLine +BABEL_OP1_203_83366_20130428_224139_outLine +BABEL_OP1_203_83775_20130319_135705_inLine +BABEL_OP1_203_83775_20130319_135705_outLine +BABEL_OP1_203_84029_20130812_185834_inLine +BABEL_OP1_203_84029_20130812_185834_outLine +BABEL_OP1_203_84125_20130306_192759_inLine +BABEL_OP1_203_84125_20130306_192759_outLine +BABEL_OP1_203_84583_20130409_145116_inLine +BABEL_OP1_203_84583_20130409_145116_outLine +BABEL_OP1_203_85248_20130403_172428_inLine +BABEL_OP1_203_85248_20130403_172428_outLine +BABEL_OP1_203_85248_20130403_173731_inLine +BABEL_OP1_203_85248_20130403_173731_outLine +BABEL_OP1_203_86748_20130424_181510_inLine +BABEL_OP1_203_86748_20130424_181510_outLine +BABEL_OP1_203_87871_20130403_233602_inLine +BABEL_OP1_203_87871_20130403_233602_outLine +BABEL_OP1_203_88812_20130724_142719_inLine +BABEL_OP1_203_88812_20130724_142719_outLine +BABEL_OP1_203_89045_20130306_200546_inLine +BABEL_OP1_203_89045_20130306_200546_outLine +BABEL_OP1_203_90935_20130319_215413_inLine +BABEL_OP1_203_90935_20130319_215413_outLine +BABEL_OP1_203_91581_20130406_211109_inLine +BABEL_OP1_203_91581_20130406_211109_outLine +BABEL_OP1_203_91593_20130511_222420_inLine +BABEL_OP1_203_91593_20130511_222420_outLine +BABEL_OP1_203_91825_20130310_211043_inLine +BABEL_OP1_203_91825_20130310_211043_outLine +BABEL_OP1_203_91884_20130422_190145_inLine +BABEL_OP1_203_91884_20130422_190145_outLine +BABEL_OP1_203_92176_20130322_143345_inLine +BABEL_OP1_203_92176_20130322_143345_outLine +BABEL_OP1_203_92356_20130715_210447_inLine +BABEL_OP1_203_92356_20130715_210447_outLine +BABEL_OP1_203_92698_20130327_174701_inLine +BABEL_OP1_203_92698_20130327_174701_outLine +BABEL_OP1_203_92698_20130327_175923_inLine +BABEL_OP1_203_92698_20130327_175923_outLine +BABEL_OP1_203_92886_20130314_211354_inLine +BABEL_OP1_203_92886_20130314_211354_outLine +BABEL_OP1_203_93224_20130503_144751_inLine +BABEL_OP1_203_93224_20130503_144751_outLine +BABEL_OP1_203_93320_20130502_175919_inLine +BABEL_OP1_203_93320_20130502_175919_outLine +BABEL_OP1_203_93946_20130406_004722_inLine +BABEL_OP1_203_93946_20130406_004722_outLine +BABEL_OP1_203_94212_20130806_184552_inLine +BABEL_OP1_203_94212_20130806_184552_outLine +BABEL_OP1_203_95966_20130320_201310_inLine +BABEL_OP1_203_95966_20130320_201310_outLine +BABEL_OP1_203_96205_20130324_175526_inLine +BABEL_OP1_203_96205_20130324_175526_outLine +BABEL_OP1_203_96584_20130410_144453_inLine +BABEL_OP1_203_96584_20130410_144453_outLine +BABEL_OP1_203_96934_20130319_142928_inLine +BABEL_OP1_203_96934_20130319_142928_outLine +BABEL_OP1_203_96985_20130313_141845_inLine +BABEL_OP1_203_96985_20130313_141845_outLine +BABEL_OP1_203_97136_20130410_190244_inLine +BABEL_OP1_203_97136_20130410_190244_outLine +BABEL_OP1_203_98506_20130423_152625_inLine +BABEL_OP1_203_98506_20130423_152625_outLine +BABEL_OP1_203_98678_20130721_152255_inLine +BABEL_OP1_203_98678_20130721_152255_outLine +BABEL_OP1_203_99487_20130311_174358_inLine +BABEL_OP1_203_99487_20130311_174358_outLine +BABEL_OP1_203_99516_20130309_164733_inLine +BABEL_OP1_203_99516_20130309_164733_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/dev.list b/egs/babel/s5d/conf/lists/204-tamil/dev.list new file mode 100644 index 00000000000..f793b6bf7fa --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/dev.list @@ -0,0 +1,125 @@ +BABEL_OP1_204_13189_20130613_161247_inLine +BABEL_OP1_204_13189_20130613_161247_outLine +BABEL_OP1_204_17881_20130219_205442_inLine +BABEL_OP1_204_17881_20130219_205442_outLine +BABEL_OP1_204_18924_20130224_150538_inLine +BABEL_OP1_204_18924_20130224_150538_outLine +BABEL_OP1_204_20682_20130209_174057_inLine +BABEL_OP1_204_20682_20130209_174057_outLine +BABEL_OP1_204_22021_20130818_153135_inLine +BABEL_OP1_204_22021_20130818_153135_outLine +BABEL_OP1_204_22288_20130820_021043_inLine +BABEL_OP1_204_22288_20130820_021043_outLine +BABEL_OP1_204_22288_20130820_022958_inLine +BABEL_OP1_204_22288_20130820_022958_outLine +BABEL_OP1_204_22466_20121213_214935_inLine +BABEL_OP1_204_22466_20121213_214935_outLine +BABEL_OP1_204_23700_20130825_003724_inLine +BABEL_OP1_204_23700_20130825_003724_outLine +BABEL_OP1_204_23700_20130825_004922_inLine +BABEL_OP1_204_23700_20130825_004922_outLine +BABEL_OP1_204_24239_20130227_004742_inLine +BABEL_OP1_204_24239_20130227_004742_outLine +BABEL_OP1_204_24290_20130228_200830_inLine +BABEL_OP1_204_24290_20130228_200830_outLine +BABEL_OP1_204_24679_20130112_222528_inLine +BABEL_OP1_204_24679_20130112_222528_outLine +BABEL_OP1_204_25895_20130830_022140_inLine +BABEL_OP1_204_25895_20130830_022140_outLine +BABEL_OP1_204_26602_20130215_003413_inLine +BABEL_OP1_204_26602_20130215_003413_outLine +BABEL_OP1_204_27218_20130102_192252_inLine +BABEL_OP1_204_27218_20130102_192252_outLine +BABEL_OP1_204_28606_20130126_221856_inLine +BABEL_OP1_204_28606_20130126_221856_outLine +BABEL_OP1_204_28945_20130102_221003_inLine +BABEL_OP1_204_28945_20130102_221003_outLine +BABEL_OP1_204_29076_20130222_205943_inLine +BABEL_OP1_204_29076_20130222_205943_outLine +BABEL_OP1_204_31624_20130107_221428_inLine +BABEL_OP1_204_31624_20130107_221428_outLine +BABEL_OP1_204_32287_20130902_231135_inLine +BABEL_OP1_204_32287_20130902_231135_outLine +BABEL_OP1_204_33672_20130115_033234_inLine +BABEL_OP1_204_33672_20130115_033234_outLine +BABEL_OP1_204_37290_20130707_161547_inLine +BABEL_OP1_204_37290_20130707_161547_outLine +BABEL_OP1_204_37594_20130805_155303_inLine +BABEL_OP1_204_37594_20130805_155303_outLine +BABEL_OP1_204_38979_20130516_003257_inLine +BABEL_OP1_204_38979_20130516_003257_outLine +BABEL_OP1_204_42155_20130122_030534_inLine +BABEL_OP1_204_42155_20130122_030534_outLine +BABEL_OP1_204_43239_20130216_055950_inLine +BABEL_OP1_204_43239_20130216_055950_outLine +BABEL_OP1_204_44029_20130824_003907_inLine +BABEL_OP1_204_44029_20130824_003907_outLine +BABEL_OP1_204_44619_20130104_192431_inLine +BABEL_OP1_204_44619_20130104_192431_outLine +BABEL_OP1_204_44961_20130106_015828_inLine +BABEL_OP1_204_44961_20130106_015828_outLine +BABEL_OP1_204_46535_20130818_001009_inLine +BABEL_OP1_204_46535_20130818_001009_outLine +BABEL_OP1_204_47451_20130210_010011_inLine +BABEL_OP1_204_47451_20130210_010011_outLine +BABEL_OP1_204_48024_20130829_223102_inLine +BABEL_OP1_204_48024_20130829_223102_outLine +BABEL_OP1_204_50565_20121224_203735_inLine +BABEL_OP1_204_50565_20121224_203735_outLine +BABEL_OP1_204_51701_20130312_022556_inLine +BABEL_OP1_204_51701_20130312_022556_outLine +BABEL_OP1_204_54160_20121231_225532_inLine +BABEL_OP1_204_54160_20121231_225532_outLine +BABEL_OP1_204_55136_20130705_164312_inLine +BABEL_OP1_204_55136_20130705_164312_outLine +BABEL_OP1_204_57935_20130126_234131_inLine +BABEL_OP1_204_57935_20130126_234131_outLine +BABEL_OP1_204_58047_20130222_222259_inLine +BABEL_OP1_204_58047_20130222_222259_outLine +BABEL_OP1_204_59747_20121222_160946_inLine +BABEL_OP1_204_59747_20121222_160946_outLine +BABEL_OP1_204_61440_20130627_182754_inLine +BABEL_OP1_204_61440_20130627_182754_outLine +BABEL_OP1_204_62545_20130703_202255_inLine +BABEL_OP1_204_62545_20130703_202255_outLine +BABEL_OP1_204_63484_20130821_005511_inLine +BABEL_OP1_204_63484_20130821_005511_outLine +BABEL_OP1_204_64350_20130102_195330_inLine +BABEL_OP1_204_64350_20130102_195330_outLine +BABEL_OP1_204_64902_20130215_191500_inLine +BABEL_OP1_204_64902_20130215_191500_outLine +BABEL_OP1_204_68244_20130129_184054_inLine +BABEL_OP1_204_70639_20130704_165905_inLine +BABEL_OP1_204_70639_20130704_165905_outLine +BABEL_OP1_204_71121_20130522_213640_inLine +BABEL_OP1_204_71121_20130522_213640_outLine +BABEL_OP1_204_73990_20130521_162632_inLine +BABEL_OP1_204_73990_20130521_162632_outLine +BABEL_OP1_204_78161_20130521_152635_inLine +BABEL_OP1_204_78161_20130521_152635_outLine +BABEL_OP1_204_83238_20130121_201216_inLine +BABEL_OP1_204_83238_20130121_201216_outLine +BABEL_OP1_204_84177_20130901_213641_inLine +BABEL_OP1_204_84177_20130901_213641_outLine +BABEL_OP1_204_84815_20130209_040750_inLine +BABEL_OP1_204_84815_20130209_040750_outLine +BABEL_OP1_204_86557_20130103_183044_inLine +BABEL_OP1_204_86557_20130103_183044_outLine +BABEL_OP1_204_87074_20130107_181209_inLine +BABEL_OP1_204_87074_20130107_181209_outLine +BABEL_OP1_204_87298_20130114_172850_inLine +BABEL_OP1_204_87298_20130114_172850_outLine +BABEL_OP1_204_90937_20130516_224543_inLine +BABEL_OP1_204_90937_20130516_224543_outLine +BABEL_OP1_204_91808_20130603_193623_inLine +BABEL_OP1_204_91808_20130603_193623_outLine +BABEL_OP1_204_92509_20130107_011707_inLine +BABEL_OP1_204_92509_20130107_011707_outLine +BABEL_OP1_204_94465_20130212_212918_inLine +BABEL_OP1_204_94465_20130212_212918_outLine +BABEL_OP1_204_94923_20130608_143347_inLine +BABEL_OP1_204_94923_20130608_143347_outLine +BABEL_OP1_204_96059_20130225_212517_inLine +BABEL_OP1_204_96059_20130225_212517_outLine +BABEL_OP1_204_97286_20130520_145640_inLine +BABEL_OP1_204_97286_20130520_145640_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/eval.list b/egs/babel/s5d/conf/lists/204-tamil/eval.list new file mode 100644 index 00000000000..1887ca15694 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/eval.list @@ -0,0 +1,947 @@ +BABEL_OP1_204_10058_20130305_040021_inLine +BABEL_OP1_204_10058_20130305_040021_outLine +BABEL_OP1_204_10313_20130705_155607_inLine +BABEL_OP1_204_10313_20130705_155607_outLine +BABEL_OP1_204_10524_20130219_145437_inLine +BABEL_OP1_204_10524_20130219_145437_outLine +BABEL_OP1_204_10524_20130219_235944_inLine +BABEL_OP1_204_10524_20130219_235944_outLine +BABEL_OP1_204_10524_20130220_000643_inLine +BABEL_OP1_204_10524_20130220_000643_outLine +BABEL_OP1_204_10638_20130510_124441_inLine +BABEL_OP1_204_10638_20130510_124441_outLine +BABEL_OP1_204_11768_20130825_151244_inLine +BABEL_OP1_204_11768_20130825_151244_outLine +BABEL_OP1_204_12321_20130220_211618_inLine +BABEL_OP1_204_12321_20130220_211618_outLine +BABEL_OP1_204_12635_20130601_152113_inLine +BABEL_OP1_204_12635_20130601_152113_outLine +BABEL_OP1_204_12916_20130107_224212_inLine +BABEL_OP1_204_12916_20130107_224212_outLine +BABEL_OP1_204_13561_20130120_185547_inLine +BABEL_OP1_204_13561_20130120_185547_outLine +BABEL_OP1_204_13664_20121218_221847_inLine +BABEL_OP1_204_13664_20121218_221847_outLine +BABEL_OP1_204_13909_20130313_210114_inLine +BABEL_OP1_204_13909_20130313_210114_outLine +BABEL_OP1_204_13929_20130716_200759_inLine +BABEL_OP1_204_13929_20130716_200759_outLine +BABEL_OP1_204_14028_20130820_214748_inLine +BABEL_OP1_204_14028_20130820_214748_outLine +BABEL_OP1_204_14229_20130112_024917_inLine +BABEL_OP1_204_14229_20130112_024917_outLine +BABEL_OP1_204_14350_20130113_023333_inLine +BABEL_OP1_204_14350_20130113_023333_outLine +BABEL_OP1_204_14537_20130303_005043_inLine +BABEL_OP1_204_14537_20130303_005043_outLine +BABEL_OP1_204_14723_20130710_180819_inLine +BABEL_OP1_204_14723_20130710_180819_outLine +BABEL_OP1_204_14875_20130111_192622_inLine +BABEL_OP1_204_14875_20130111_192622_outLine +BABEL_OP1_204_14929_20130131_002309_inLine +BABEL_OP1_204_14929_20130131_002309_outLine +BABEL_OP1_204_15163_20130130_004303_inLine +BABEL_OP1_204_15163_20130130_004303_outLine +BABEL_OP1_204_15227_20130624_180548_inLine +BABEL_OP1_204_15227_20130624_180548_outLine +BABEL_OP1_204_15382_20130126_201407_inLine +BABEL_OP1_204_15382_20130126_201407_outLine +BABEL_OP1_204_15420_20130901_223125_inLine +BABEL_OP1_204_15420_20130901_223125_outLine +BABEL_OP1_204_15466_20130521_205553_inLine +BABEL_OP1_204_15466_20130521_205553_outLine +BABEL_OP1_204_15848_20121218_180011_inLine +BABEL_OP1_204_15848_20121218_180011_outLine +BABEL_OP1_204_16056_20130102_234300_inLine +BABEL_OP1_204_16056_20130102_234300_outLine +BABEL_OP1_204_16184_20121220_210106_inLine +BABEL_OP1_204_16184_20121220_210106_outLine +BABEL_OP1_204_16351_20130705_205024_inLine +BABEL_OP1_204_16351_20130705_205024_outLine +BABEL_OP1_204_16749_20130224_215355_inLine +BABEL_OP1_204_16749_20130224_215355_outLine +BABEL_OP1_204_16787_20130121_045651_inLine +BABEL_OP1_204_16787_20130121_045651_outLine +BABEL_OP1_204_17165_20130130_191341_inLine +BABEL_OP1_204_17165_20130130_191341_outLine +BABEL_OP1_204_17511_20130716_180322_inLine +BABEL_OP1_204_17511_20130716_180322_outLine +BABEL_OP1_204_17520_20130122_040744_inLine +BABEL_OP1_204_17520_20130122_040744_outLine +BABEL_OP1_204_17914_20130311_035117_inLine +BABEL_OP1_204_17914_20130311_035117_outLine +BABEL_OP1_204_17937_20130803_170049_inLine +BABEL_OP1_204_17937_20130803_170049_outLine +BABEL_OP1_204_18033_20130906_011555_inLine +BABEL_OP1_204_18033_20130906_011555_outLine +BABEL_OP1_204_18863_20130210_164314_inLine +BABEL_OP1_204_18863_20130210_164314_outLine +BABEL_OP1_204_19545_20130122_164148_inLine +BABEL_OP1_204_19545_20130122_164148_outLine +BABEL_OP1_204_19663_20130126_195459_inLine +BABEL_OP1_204_19663_20130126_195459_outLine +BABEL_OP1_204_19749_20130707_153432_inLine +BABEL_OP1_204_19749_20130707_153432_outLine +BABEL_OP1_204_19773_20130217_220127_inLine +BABEL_OP1_204_19773_20130217_220127_outLine +BABEL_OP1_204_19773_20130217_234204_inLine +BABEL_OP1_204_19773_20130217_234204_outLine +BABEL_OP1_204_19782_20130209_175552_inLine +BABEL_OP1_204_19782_20130209_175552_outLine +BABEL_OP1_204_20133_20121218_172017_inLine +BABEL_OP1_204_20133_20121218_172017_outLine +BABEL_OP1_204_20800_20130102_180915_inLine +BABEL_OP1_204_20800_20130102_180915_outLine +BABEL_OP1_204_20896_20130822_163553_inLine +BABEL_OP1_204_20896_20130822_163553_outLine +BABEL_OP1_204_21004_20130209_230509_inLine +BABEL_OP1_204_21004_20130209_230509_outLine +BABEL_OP1_204_21029_20130107_212248_inLine +BABEL_OP1_204_21029_20130107_212248_outLine +BABEL_OP1_204_21109_20130301_151421_inLine +BABEL_OP1_204_21109_20130301_151421_outLine +BABEL_OP1_204_21159_20130607_143737_inLine +BABEL_OP1_204_21159_20130607_143737_outLine +BABEL_OP1_204_21244_20130627_172541_inLine +BABEL_OP1_204_21244_20130627_172541_outLine +BABEL_OP1_204_21581_20130118_192005_inLine +BABEL_OP1_204_21581_20130118_192005_outLine +BABEL_OP1_204_21794_20130129_233131_inLine +BABEL_OP1_204_21794_20130129_233131_outLine +BABEL_OP1_204_22280_20130222_191834_inLine +BABEL_OP1_204_22280_20130222_191834_outLine +BABEL_OP1_204_22641_20121224_195014_inLine +BABEL_OP1_204_22641_20121224_195014_outLine +BABEL_OP1_204_23196_20130605_144617_inLine +BABEL_OP1_204_23196_20130605_144617_outLine +BABEL_OP1_204_23355_20130812_203203_inLine +BABEL_OP1_204_23355_20130812_203203_outLine +BABEL_OP1_204_23355_20130812_204058_inLine +BABEL_OP1_204_23355_20130812_204058_outLine +BABEL_OP1_204_23395_20130126_013244_inLine +BABEL_OP1_204_23395_20130126_013244_outLine +BABEL_OP1_204_23505_20130108_173244_inLine +BABEL_OP1_204_23505_20130108_173244_outLine +BABEL_OP1_204_23681_20130625_175654_inLine +BABEL_OP1_204_23681_20130625_175654_outLine +BABEL_OP1_204_23983_20130707_144157_inLine +BABEL_OP1_204_23983_20130707_144157_outLine +BABEL_OP1_204_23983_20130707_145156_inLine +BABEL_OP1_204_23983_20130707_145156_outLine +BABEL_OP1_204_23995_20130209_202505_inLine +BABEL_OP1_204_23995_20130209_202505_outLine +BABEL_OP1_204_24017_20130209_202828_inLine +BABEL_OP1_204_24017_20130209_202828_outLine +BABEL_OP1_204_24037_20130708_184129_inLine +BABEL_OP1_204_24037_20130708_184129_outLine +BABEL_OP1_204_24323_20130121_041043_inLine +BABEL_OP1_204_24323_20130121_041043_outLine +BABEL_OP1_204_24589_20130111_223930_inLine +BABEL_OP1_204_24589_20130111_223930_outLine +BABEL_OP1_204_24779_20130607_183107_inLine +BABEL_OP1_204_24779_20130607_183107_outLine +BABEL_OP1_204_25012_20130705_184756_inLine +BABEL_OP1_204_25012_20130705_184756_outLine +BABEL_OP1_204_25015_20130329_012535_inLine +BABEL_OP1_204_25015_20130329_012535_outLine +BABEL_OP1_204_25068_20130901_230020_inLine +BABEL_OP1_204_25068_20130901_230020_outLine +BABEL_OP1_204_25068_20130901_235001_inLine +BABEL_OP1_204_25068_20130901_235001_outLine +BABEL_OP1_204_25085_20130612_170506_inLine +BABEL_OP1_204_25085_20130612_170506_outLine +BABEL_OP1_204_25198_20130625_185430_inLine +BABEL_OP1_204_25198_20130625_185430_outLine +BABEL_OP1_204_25220_20130715_192051_inLine +BABEL_OP1_204_25220_20130715_192051_outLine +BABEL_OP1_204_25767_20130107_180931_inLine +BABEL_OP1_204_25767_20130107_180931_outLine +BABEL_OP1_204_26206_20130129_191521_inLine +BABEL_OP1_204_26206_20130129_191521_outLine +BABEL_OP1_204_26574_20130216_002354_inLine +BABEL_OP1_204_26574_20130216_002354_outLine +BABEL_OP1_204_26574_20130218_013612_inLine +BABEL_OP1_204_26574_20130218_013612_outLine +BABEL_OP1_204_26869_20130815_190057_inLine +BABEL_OP1_204_26869_20130815_190057_outLine +BABEL_OP1_204_27014_20130708_191739_inLine +BABEL_OP1_204_27014_20130708_191739_outLine +BABEL_OP1_204_27367_20130708_153816_inLine +BABEL_OP1_204_27367_20130708_153816_outLine +BABEL_OP1_204_28012_20130211_213147_inLine +BABEL_OP1_204_28012_20130211_213147_outLine +BABEL_OP1_204_28303_20130112_003656_inLine +BABEL_OP1_204_28303_20130112_003656_outLine +BABEL_OP1_204_28585_20130208_014141_inLine +BABEL_OP1_204_28585_20130208_014141_outLine +BABEL_OP1_204_28775_20130119_000600_inLine +BABEL_OP1_204_28775_20130119_000600_outLine +BABEL_OP1_204_28814_20130224_202343_inLine +BABEL_OP1_204_28814_20130224_202343_outLine +BABEL_OP1_204_28871_20121219_184300_inLine +BABEL_OP1_204_28871_20121219_184300_outLine +BABEL_OP1_204_29021_20130227_043427_inLine +BABEL_OP1_204_29021_20130227_043427_outLine +BABEL_OP1_204_29072_20130127_023330_inLine +BABEL_OP1_204_29072_20130127_023330_outLine +BABEL_OP1_204_29168_20130112_230634_inLine +BABEL_OP1_204_29168_20130112_230634_outLine +BABEL_OP1_204_29208_20130127_011057_inLine +BABEL_OP1_204_29208_20130127_011057_outLine +BABEL_OP1_204_29352_20130628_145610_inLine +BABEL_OP1_204_29352_20130628_145610_outLine +BABEL_OP1_204_29663_20130829_225524_inLine +BABEL_OP1_204_29663_20130829_225524_outLine +BABEL_OP1_204_29765_20130607_162026_inLine +BABEL_OP1_204_29765_20130607_162026_outLine +BABEL_OP1_204_29777_20130211_193239_inLine +BABEL_OP1_204_29777_20130211_193239_outLine +BABEL_OP1_204_30461_20130628_160212_inLine +BABEL_OP1_204_30461_20130628_160212_outLine +BABEL_OP1_204_30653_20130216_171325_inLine +BABEL_OP1_204_30653_20130216_171325_outLine +BABEL_OP1_204_30720_20130524_153314_inLine +BABEL_OP1_204_30720_20130524_153314_outLine +BABEL_OP1_204_30869_20130211_004250_inLine +BABEL_OP1_204_30869_20130211_004250_outLine +BABEL_OP1_204_30974_20130508_113119_inLine +BABEL_OP1_204_30974_20130508_113119_outLine +BABEL_OP1_204_31109_20130121_195304_inLine +BABEL_OP1_204_31109_20130121_195304_outLine +BABEL_OP1_204_31267_20130311_024343_inLine +BABEL_OP1_204_31267_20130311_024343_outLine +BABEL_OP1_204_31668_20130603_155703_inLine +BABEL_OP1_204_31668_20130603_155703_outLine +BABEL_OP1_204_31919_20130322_030728_inLine +BABEL_OP1_204_31919_20130322_030728_outLine +BABEL_OP1_204_32380_20130812_163206_inLine +BABEL_OP1_204_32380_20130812_163206_outLine +BABEL_OP1_204_32630_20130618_150743_inLine +BABEL_OP1_204_32630_20130618_150743_outLine +BABEL_OP1_204_32708_20130107_000057_inLine +BABEL_OP1_204_32708_20130107_000057_outLine +BABEL_OP1_204_32832_20130208_200126_inLine +BABEL_OP1_204_32832_20130208_200126_outLine +BABEL_OP1_204_32837_20130211_011900_inLine +BABEL_OP1_204_32837_20130211_011900_outLine +BABEL_OP1_204_32914_20130218_021836_inLine +BABEL_OP1_204_32914_20130218_021836_outLine +BABEL_OP1_204_32914_20130218_023337_inLine +BABEL_OP1_204_32914_20130218_023337_outLine +BABEL_OP1_204_32961_20130518_164254_inLine +BABEL_OP1_204_32961_20130518_164254_outLine +BABEL_OP1_204_33149_20130901_211119_inLine +BABEL_OP1_204_33149_20130901_211119_outLine +BABEL_OP1_204_33333_20130818_163046_inLine +BABEL_OP1_204_33333_20130818_163046_outLine +BABEL_OP1_204_33635_20130127_024601_inLine +BABEL_OP1_204_33635_20130127_024601_outLine +BABEL_OP1_204_33992_20130625_183028_inLine +BABEL_OP1_204_33992_20130625_183028_outLine +BABEL_OP1_204_34208_20130815_173402_inLine +BABEL_OP1_204_34208_20130815_173402_outLine +BABEL_OP1_204_34336_20130111_190838_inLine +BABEL_OP1_204_34336_20130111_190838_outLine +BABEL_OP1_204_34564_20130217_024252_inLine +BABEL_OP1_204_34564_20130217_024252_outLine +BABEL_OP1_204_34629_20130524_214401_inLine +BABEL_OP1_204_34629_20130524_214401_outLine +BABEL_OP1_204_35069_20130211_183408_inLine +BABEL_OP1_204_35069_20130211_183408_outLine +BABEL_OP1_204_35139_20130114_222544_inLine +BABEL_OP1_204_35139_20130114_222544_outLine +BABEL_OP1_204_35467_20121221_225338_inLine +BABEL_OP1_204_35467_20121221_225338_outLine +BABEL_OP1_204_35583_20130224_214957_inLine +BABEL_OP1_204_35583_20130224_214957_outLine +BABEL_OP1_204_35786_20130625_191629_inLine +BABEL_OP1_204_35786_20130625_191629_outLine +BABEL_OP1_204_35885_20130225_225544_inLine +BABEL_OP1_204_35885_20130225_225544_outLine +BABEL_OP1_204_36147_20130902_003850_inLine +BABEL_OP1_204_36147_20130902_003850_outLine +BABEL_OP1_204_36219_20130116_023001_inLine +BABEL_OP1_204_36219_20130116_023001_outLine +BABEL_OP1_204_36300_20130802_173230_inLine +BABEL_OP1_204_36300_20130802_173230_outLine +BABEL_OP1_204_36364_20130802_160044_inLine +BABEL_OP1_204_36364_20130802_160044_outLine +BABEL_OP1_204_36505_20130209_151150_inLine +BABEL_OP1_204_36505_20130209_151150_outLine +BABEL_OP1_204_36505_20130212_211726_inLine +BABEL_OP1_204_36505_20130212_211726_outLine +BABEL_OP1_204_36632_20130725_160202_inLine +BABEL_OP1_204_36632_20130725_160202_outLine +BABEL_OP1_204_36900_20130210_013355_inLine +BABEL_OP1_204_36900_20130210_013355_outLine +BABEL_OP1_204_37007_20130708_211216_inLine +BABEL_OP1_204_37007_20130708_211216_outLine +BABEL_OP1_204_37068_20130815_173112_inLine +BABEL_OP1_204_37068_20130815_173112_outLine +BABEL_OP1_204_37281_20130131_020847_inLine +BABEL_OP1_204_37281_20130131_020847_outLine +BABEL_OP1_204_37499_20130627_150627_inLine +BABEL_OP1_204_37499_20130627_150627_outLine +BABEL_OP1_204_37598_20130607_165958_inLine +BABEL_OP1_204_37598_20130607_165958_outLine +BABEL_OP1_204_38323_20130311_030447_inLine +BABEL_OP1_204_38323_20130311_030447_outLine +BABEL_OP1_204_38554_20121221_210925_inLine +BABEL_OP1_204_38554_20121221_210925_outLine +BABEL_OP1_204_38741_20130103_233022_inLine +BABEL_OP1_204_38741_20130103_233022_outLine +BABEL_OP1_204_38963_20130830_013927_inLine +BABEL_OP1_204_38963_20130830_013927_outLine +BABEL_OP1_204_39006_20130310_042623_inLine +BABEL_OP1_204_39006_20130310_042623_outLine +BABEL_OP1_204_39277_20130710_203344_inLine +BABEL_OP1_204_39277_20130710_203344_outLine +BABEL_OP1_204_39426_20130218_002812_inLine +BABEL_OP1_204_39426_20130218_002812_outLine +BABEL_OP1_204_39579_20130724_163251_inLine +BABEL_OP1_204_39579_20130724_163251_outLine +BABEL_OP1_204_41073_20130211_210606_inLine +BABEL_OP1_204_41073_20130211_210606_outLine +BABEL_OP1_204_41100_20130108_172156_inLine +BABEL_OP1_204_41100_20130108_172156_outLine +BABEL_OP1_204_41109_20130211_003851_inLine +BABEL_OP1_204_41109_20130211_003851_outLine +BABEL_OP1_204_41174_20130117_215826_inLine +BABEL_OP1_204_41174_20130117_215826_outLine +BABEL_OP1_204_41400_20130702_161025_inLine +BABEL_OP1_204_41400_20130702_161025_outLine +BABEL_OP1_204_41493_20121218_185431_inLine +BABEL_OP1_204_41493_20121218_185431_outLine +BABEL_OP1_204_41609_20130102_232356_inLine +BABEL_OP1_204_41609_20130102_232356_outLine +BABEL_OP1_204_41680_20121219_175709_inLine +BABEL_OP1_204_41680_20121219_175709_outLine +BABEL_OP1_204_41692_20130624_195718_inLine +BABEL_OP1_204_41692_20130624_195718_outLine +BABEL_OP1_204_42243_20121222_194916_inLine +BABEL_OP1_204_42243_20121222_194916_outLine +BABEL_OP1_204_42309_20130521_001029_inLine +BABEL_OP1_204_42309_20130521_001029_outLine +BABEL_OP1_204_42434_20130116_230135_inLine +BABEL_OP1_204_42434_20130116_230135_outLine +BABEL_OP1_204_42600_20130111_202254_inLine +BABEL_OP1_204_42600_20130111_202254_outLine +BABEL_OP1_204_42771_20130228_025042_inLine +BABEL_OP1_204_42771_20130228_025042_outLine +BABEL_OP1_204_42848_20130627_222753_inLine +BABEL_OP1_204_42848_20130627_222753_outLine +BABEL_OP1_204_42877_20130815_164740_inLine +BABEL_OP1_204_42877_20130815_164740_outLine +BABEL_OP1_204_42883_20130624_202703_inLine +BABEL_OP1_204_42883_20130624_202703_outLine +BABEL_OP1_204_43074_20130509_115450_inLine +BABEL_OP1_204_43074_20130509_115450_outLine +BABEL_OP1_204_43285_20130130_012851_inLine +BABEL_OP1_204_43285_20130130_012851_outLine +BABEL_OP1_204_43388_20130129_230503_inLine +BABEL_OP1_204_43388_20130129_230503_outLine +BABEL_OP1_204_43395_20130313_164710_inLine +BABEL_OP1_204_43395_20130313_164710_outLine +BABEL_OP1_204_43646_20121218_215728_inLine +BABEL_OP1_204_43646_20121218_215728_outLine +BABEL_OP1_204_43990_20130521_142553_inLine +BABEL_OP1_204_44255_20130225_230219_inLine +BABEL_OP1_204_44255_20130225_230219_outLine +BABEL_OP1_204_44681_20130830_000000_inLine +BABEL_OP1_204_44681_20130830_000000_outLine +BABEL_OP1_204_44847_20130126_212511_inLine +BABEL_OP1_204_44847_20130126_212511_outLine +BABEL_OP1_204_45106_20130325_003034_inLine +BABEL_OP1_204_45106_20130325_003034_outLine +BABEL_OP1_204_45106_20130325_004324_inLine +BABEL_OP1_204_45106_20130325_004324_outLine +BABEL_OP1_204_45201_20130312_021424_inLine +BABEL_OP1_204_45201_20130312_021424_outLine +BABEL_OP1_204_45536_20130217_014642_inLine +BABEL_OP1_204_45536_20130217_014642_outLine +BABEL_OP1_204_45559_20130303_234142_inLine +BABEL_OP1_204_45559_20130303_234142_outLine +BABEL_OP1_204_45560_20130107_224441_inLine +BABEL_OP1_204_45560_20130107_224441_outLine +BABEL_OP1_204_45642_20130106_040244_inLine +BABEL_OP1_204_45642_20130106_040244_outLine +BABEL_OP1_204_45771_20130626_191013_inLine +BABEL_OP1_204_45771_20130626_191013_outLine +BABEL_OP1_204_45908_20130607_213719_inLine +BABEL_OP1_204_45908_20130607_213719_outLine +BABEL_OP1_204_46202_20130524_162004_inLine +BABEL_OP1_204_46202_20130524_162004_outLine +BABEL_OP1_204_46310_20130103_163932_inLine +BABEL_OP1_204_46310_20130103_163932_outLine +BABEL_OP1_204_46315_20130129_014152_inLine +BABEL_OP1_204_46315_20130129_014152_outLine +BABEL_OP1_204_46625_20121219_193926_inLine +BABEL_OP1_204_46625_20121219_193926_outLine +BABEL_OP1_204_46712_20130111_175849_inLine +BABEL_OP1_204_46712_20130111_175849_outLine +BABEL_OP1_204_46763_20130216_235210_inLine +BABEL_OP1_204_46763_20130216_235210_outLine +BABEL_OP1_204_46770_20130224_204253_inLine +BABEL_OP1_204_46770_20130224_204253_outLine +BABEL_OP1_204_46881_20121222_190526_inLine +BABEL_OP1_204_46881_20121222_190526_outLine +BABEL_OP1_204_47309_20130705_182329_inLine +BABEL_OP1_204_47309_20130705_182329_outLine +BABEL_OP1_204_47405_20130829_233945_inLine +BABEL_OP1_204_47405_20130829_233945_outLine +BABEL_OP1_204_47799_20130516_193711_inLine +BABEL_OP1_204_47799_20130516_193711_outLine +BABEL_OP1_204_47882_20130705_203354_inLine +BABEL_OP1_204_47882_20130705_203354_outLine +BABEL_OP1_204_48016_20130311_033904_inLine +BABEL_OP1_204_48016_20130311_033904_outLine +BABEL_OP1_204_48200_20130209_211626_inLine +BABEL_OP1_204_48200_20130209_211626_outLine +BABEL_OP1_204_48399_20130112_205650_inLine +BABEL_OP1_204_48399_20130112_205650_outLine +BABEL_OP1_204_48663_20130303_002413_inLine +BABEL_OP1_204_48663_20130303_002413_outLine +BABEL_OP1_204_48663_20130303_023530_inLine +BABEL_OP1_204_48663_20130303_023530_outLine +BABEL_OP1_204_49216_20130112_225803_inLine +BABEL_OP1_204_49216_20130112_225803_outLine +BABEL_OP1_204_49630_20130130_014200_inLine +BABEL_OP1_204_49630_20130130_014200_outLine +BABEL_OP1_204_49637_20130112_201836_inLine +BABEL_OP1_204_49637_20130112_201836_outLine +BABEL_OP1_204_49870_20130824_181019_inLine +BABEL_OP1_204_49870_20130824_181019_outLine +BABEL_OP1_204_49902_20130128_182704_inLine +BABEL_OP1_204_49902_20130128_182704_outLine +BABEL_OP1_204_50090_20130122_180653_inLine +BABEL_OP1_204_50090_20130122_180653_outLine +BABEL_OP1_204_50940_20130522_185117_inLine +BABEL_OP1_204_50940_20130522_185117_outLine +BABEL_OP1_204_50958_20130129_195029_inLine +BABEL_OP1_204_50958_20130129_195029_outLine +BABEL_OP1_204_51414_20130531_173250_inLine +BABEL_OP1_204_51414_20130531_173250_outLine +BABEL_OP1_204_51417_20130212_002429_inLine +BABEL_OP1_204_51417_20130212_002429_outLine +BABEL_OP1_204_51484_20130209_174419_inLine +BABEL_OP1_204_51484_20130209_174419_outLine +BABEL_OP1_204_51540_20130228_021352_inLine +BABEL_OP1_204_51540_20130228_021352_outLine +BABEL_OP1_204_51611_20130608_155952_inLine +BABEL_OP1_204_51611_20130608_155952_outLine +BABEL_OP1_204_52058_20130710_173207_inLine +BABEL_OP1_204_52058_20130710_173207_outLine +BABEL_OP1_204_52070_20130607_210255_inLine +BABEL_OP1_204_52070_20130607_210255_outLine +BABEL_OP1_204_52222_20130524_171039_inLine +BABEL_OP1_204_52222_20130524_171039_outLine +BABEL_OP1_204_52246_20130122_172528_inLine +BABEL_OP1_204_52246_20130122_172528_outLine +BABEL_OP1_204_52265_20130516_202551_inLine +BABEL_OP1_204_52265_20130516_202551_outLine +BABEL_OP1_204_52272_20130112_213528_inLine +BABEL_OP1_204_52272_20130112_213528_outLine +BABEL_OP1_204_52438_20130103_172024_inLine +BABEL_OP1_204_52438_20130103_172024_outLine +BABEL_OP1_204_52447_20130624_184145_inLine +BABEL_OP1_204_52447_20130624_184145_outLine +BABEL_OP1_204_53010_20130825_210105_inLine +BABEL_OP1_204_53010_20130825_210105_outLine +BABEL_OP1_204_53072_20130605_225249_inLine +BABEL_OP1_204_53072_20130605_225249_outLine +BABEL_OP1_204_53206_20130704_211512_inLine +BABEL_OP1_204_53206_20130704_211512_outLine +BABEL_OP1_204_53492_20130322_020510_inLine +BABEL_OP1_204_53492_20130322_020510_outLine +BABEL_OP1_204_53665_20130301_171513_inLine +BABEL_OP1_204_53665_20130301_171513_outLine +BABEL_OP1_204_53957_20130219_004311_inLine +BABEL_OP1_204_53957_20130219_004311_outLine +BABEL_OP1_204_53957_20130219_004930_inLine +BABEL_OP1_204_53957_20130219_004930_outLine +BABEL_OP1_204_54477_20130217_014421_inLine +BABEL_OP1_204_54477_20130217_014421_outLine +BABEL_OP1_204_54477_20130220_020436_inLine +BABEL_OP1_204_54477_20130220_020436_outLine +BABEL_OP1_204_54697_20130209_190625_inLine +BABEL_OP1_204_54697_20130209_190625_outLine +BABEL_OP1_204_54735_20130830_002922_inLine +BABEL_OP1_204_54735_20130830_002922_outLine +BABEL_OP1_204_54735_20130830_004018_inLine +BABEL_OP1_204_54735_20130830_004018_outLine +BABEL_OP1_204_55042_20130820_010539_inLine +BABEL_OP1_204_55042_20130820_010539_outLine +BABEL_OP1_204_55818_20130115_173558_inLine +BABEL_OP1_204_55818_20130115_173558_outLine +BABEL_OP1_204_55968_20121219_172146_inLine +BABEL_OP1_204_55968_20121219_172146_outLine +BABEL_OP1_204_56019_20130301_165116_inLine +BABEL_OP1_204_56019_20130301_165116_outLine +BABEL_OP1_204_56076_20130223_224429_inLine +BABEL_OP1_204_56076_20130223_224429_outLine +BABEL_OP1_204_56331_20130318_211453_inLine +BABEL_OP1_204_56331_20130318_211453_outLine +BABEL_OP1_204_56345_20130524_143829_inLine +BABEL_OP1_204_56345_20130524_143829_outLine +BABEL_OP1_204_56465_20130312_022322_inLine +BABEL_OP1_204_56465_20130312_022322_outLine +BABEL_OP1_204_56468_20130606_150512_inLine +BABEL_OP1_204_56468_20130606_150512_outLine +BABEL_OP1_204_56674_20130725_164519_inLine +BABEL_OP1_204_56674_20130725_164519_outLine +BABEL_OP1_204_56826_20130215_030029_inLine +BABEL_OP1_204_56826_20130215_030029_outLine +BABEL_OP1_204_57219_20130311_044204_inLine +BABEL_OP1_204_57219_20130311_044204_outLine +BABEL_OP1_204_57566_20130304_024842_inLine +BABEL_OP1_204_57566_20130304_024842_outLine +BABEL_OP1_204_57609_20130122_194937_inLine +BABEL_OP1_204_57609_20130122_194937_outLine +BABEL_OP1_204_57654_20130114_074621_inLine +BABEL_OP1_204_57654_20130114_074621_outLine +BABEL_OP1_204_57919_20130902_232635_inLine +BABEL_OP1_204_57919_20130902_232635_outLine +BABEL_OP1_204_58061_20130605_182326_inLine +BABEL_OP1_204_58061_20130605_182326_outLine +BABEL_OP1_204_58145_20130123_042048_inLine +BABEL_OP1_204_58145_20130123_042048_outLine +BABEL_OP1_204_58853_20130709_164717_inLine +BABEL_OP1_204_58853_20130709_164717_outLine +BABEL_OP1_204_58915_20130531_170755_inLine +BABEL_OP1_204_58915_20130531_170755_outLine +BABEL_OP1_204_59635_20130211_170439_inLine +BABEL_OP1_204_59635_20130211_170439_outLine +BABEL_OP1_204_59993_20130104_172518_inLine +BABEL_OP1_204_59993_20130104_172518_outLine +BABEL_OP1_204_60458_20130618_144323_inLine +BABEL_OP1_204_60458_20130618_144323_outLine +BABEL_OP1_204_60498_20130624_192541_inLine +BABEL_OP1_204_60498_20130624_192541_outLine +BABEL_OP1_204_60650_20130709_185316_inLine +BABEL_OP1_204_60650_20130709_185316_outLine +BABEL_OP1_204_60836_20130116_024921_inLine +BABEL_OP1_204_60836_20130116_024921_outLine +BABEL_OP1_204_61219_20130114_220900_inLine +BABEL_OP1_204_61219_20130114_220900_outLine +BABEL_OP1_204_61357_20130124_034332_inLine +BABEL_OP1_204_61357_20130124_034332_outLine +BABEL_OP1_204_61678_20121220_171940_inLine +BABEL_OP1_204_61678_20121220_171940_outLine +BABEL_OP1_204_61684_20130523_114244_inLine +BABEL_OP1_204_61684_20130523_114244_outLine +BABEL_OP1_204_62047_20130211_213702_inLine +BABEL_OP1_204_62047_20130211_213702_outLine +BABEL_OP1_204_62155_20130215_213833_inLine +BABEL_OP1_204_62155_20130215_213833_outLine +BABEL_OP1_204_62158_20130508_122027_inLine +BABEL_OP1_204_62158_20130508_122027_outLine +BABEL_OP1_204_62286_20130126_212818_inLine +BABEL_OP1_204_62286_20130126_212818_outLine +BABEL_OP1_204_62323_20130820_221917_inLine +BABEL_OP1_204_62323_20130820_221917_outLine +BABEL_OP1_204_62360_20130228_184057_inLine +BABEL_OP1_204_62360_20130228_184057_outLine +BABEL_OP1_204_62362_20130626_225421_inLine +BABEL_OP1_204_62362_20130626_225421_outLine +BABEL_OP1_204_62434_20130114_192752_inLine +BABEL_OP1_204_62434_20130114_192752_outLine +BABEL_OP1_204_62471_20130818_161031_inLine +BABEL_OP1_204_62471_20130818_161031_outLine +BABEL_OP1_204_62714_20130215_213205_inLine +BABEL_OP1_204_62714_20130215_213205_outLine +BABEL_OP1_204_63265_20130821_232031_inLine +BABEL_OP1_204_63265_20130821_232031_outLine +BABEL_OP1_204_63425_20130318_220104_inLine +BABEL_OP1_204_63425_20130318_220104_outLine +BABEL_OP1_204_63481_20121224_021602_inLine +BABEL_OP1_204_63481_20121224_021602_outLine +BABEL_OP1_204_63511_20130515_175657_inLine +BABEL_OP1_204_63511_20130515_175657_outLine +BABEL_OP1_204_63523_20130301_162515_inLine +BABEL_OP1_204_63523_20130301_162515_outLine +BABEL_OP1_204_63757_20130222_193431_inLine +BABEL_OP1_204_63757_20130222_193431_outLine +BABEL_OP1_204_63757_20130222_194438_inLine +BABEL_OP1_204_63757_20130222_194438_outLine +BABEL_OP1_204_63906_20130322_030332_inLine +BABEL_OP1_204_63906_20130322_030332_outLine +BABEL_OP1_204_63938_20130212_150410_inLine +BABEL_OP1_204_63938_20130212_150410_outLine +BABEL_OP1_204_64014_20130707_165607_inLine +BABEL_OP1_204_64014_20130707_165607_outLine +BABEL_OP1_204_64635_20130730_203724_inLine +BABEL_OP1_204_64635_20130730_203724_outLine +BABEL_OP1_204_64638_20130314_012822_inLine +BABEL_OP1_204_64638_20130314_012822_outLine +BABEL_OP1_204_64768_20130112_061048_inLine +BABEL_OP1_204_64768_20130112_061048_outLine +BABEL_OP1_204_65077_20121219_175859_inLine +BABEL_OP1_204_65077_20121219_175859_outLine +BABEL_OP1_204_65339_20130821_194428_inLine +BABEL_OP1_204_65339_20130821_194428_outLine +BABEL_OP1_204_65367_20130224_214222_inLine +BABEL_OP1_204_65367_20130224_214222_outLine +BABEL_OP1_204_65370_20130508_125401_inLine +BABEL_OP1_204_65370_20130508_125401_outLine +BABEL_OP1_204_65639_20130703_191008_inLine +BABEL_OP1_204_65639_20130703_191008_outLine +BABEL_OP1_204_65723_20130118_220849_inLine +BABEL_OP1_204_65723_20130118_220849_outLine +BABEL_OP1_204_66967_20130107_185021_inLine +BABEL_OP1_204_66967_20130107_185021_outLine +BABEL_OP1_204_67304_20130906_005328_inLine +BABEL_OP1_204_67304_20130906_005328_outLine +BABEL_OP1_204_67389_20130523_220733_inLine +BABEL_OP1_204_67389_20130523_220733_outLine +BABEL_OP1_204_67592_20130211_032508_inLine +BABEL_OP1_204_67592_20130211_032508_outLine +BABEL_OP1_204_68908_20130805_144908_inLine +BABEL_OP1_204_68908_20130805_144908_outLine +BABEL_OP1_204_69107_20130217_222355_inLine +BABEL_OP1_204_69107_20130217_222355_outLine +BABEL_OP1_204_69972_20130802_175346_inLine +BABEL_OP1_204_69972_20130802_175346_outLine +BABEL_OP1_204_69982_20130310_050949_inLine +BABEL_OP1_204_69982_20130310_050949_outLine +BABEL_OP1_204_70110_20121219_223303_inLine +BABEL_OP1_204_70110_20121219_223303_outLine +BABEL_OP1_204_70121_20130116_162838_inLine +BABEL_OP1_204_70121_20130116_162838_outLine +BABEL_OP1_204_70182_20130517_002241_inLine +BABEL_OP1_204_70182_20130517_002241_outLine +BABEL_OP1_204_70251_20130117_003349_inLine +BABEL_OP1_204_70251_20130117_003349_outLine +BABEL_OP1_204_70343_20130129_203836_inLine +BABEL_OP1_204_70343_20130129_203836_outLine +BABEL_OP1_204_70526_20130310_204157_inLine +BABEL_OP1_204_70526_20130310_204157_outLine +BABEL_OP1_204_70526_20130311_194113_inLine +BABEL_OP1_204_70526_20130311_194113_outLine +BABEL_OP1_204_70986_20130531_211537_inLine +BABEL_OP1_204_70986_20130531_211537_outLine +BABEL_OP1_204_71038_20130225_191007_inLine +BABEL_OP1_204_71038_20130225_191007_outLine +BABEL_OP1_204_71263_20130130_171712_inLine +BABEL_OP1_204_71263_20130130_171712_outLine +BABEL_OP1_204_71263_20130130_172902_inLine +BABEL_OP1_204_71263_20130130_172902_outLine +BABEL_OP1_204_71333_20130111_181914_inLine +BABEL_OP1_204_71333_20130111_181914_outLine +BABEL_OP1_204_71419_20130710_200227_inLine +BABEL_OP1_204_71419_20130710_200227_outLine +BABEL_OP1_204_71460_20130902_003219_inLine +BABEL_OP1_204_71460_20130902_003219_outLine +BABEL_OP1_204_71559_20130217_032759_inLine +BABEL_OP1_204_71559_20130217_032759_outLine +BABEL_OP1_204_71566_20130209_235200_inLine +BABEL_OP1_204_71566_20130209_235200_outLine +BABEL_OP1_204_71704_20130114_182140_inLine +BABEL_OP1_204_71704_20130114_182140_outLine +BABEL_OP1_204_71754_20130822_005036_inLine +BABEL_OP1_204_71754_20130822_005036_outLine +BABEL_OP1_204_71780_20130104_180509_inLine +BABEL_OP1_204_71780_20130104_180509_outLine +BABEL_OP1_204_72844_20121221_181459_inLine +BABEL_OP1_204_72844_20121221_181459_outLine +BABEL_OP1_204_73072_20130107_173326_inLine +BABEL_OP1_204_73072_20130107_173326_outLine +BABEL_OP1_204_73301_20130116_023950_inLine +BABEL_OP1_204_73301_20130116_023950_outLine +BABEL_OP1_204_73518_20130225_195225_inLine +BABEL_OP1_204_73518_20130225_195225_outLine +BABEL_OP1_204_73622_20130108_180939_inLine +BABEL_OP1_204_73622_20130108_180939_outLine +BABEL_OP1_204_73814_20130122_170515_inLine +BABEL_OP1_204_73814_20130122_170515_outLine +BABEL_OP1_204_73837_20130115_213251_inLine +BABEL_OP1_204_73837_20130115_213251_outLine +BABEL_OP1_204_73909_20130209_171219_inLine +BABEL_OP1_204_73909_20130209_171219_outLine +BABEL_OP1_204_74078_20130901_220513_inLine +BABEL_OP1_204_74078_20130901_220513_outLine +BABEL_OP1_204_75223_20130106_180539_inLine +BABEL_OP1_204_75223_20130106_180539_outLine +BABEL_OP1_204_75460_20130515_225130_inLine +BABEL_OP1_204_75460_20130515_225130_outLine +BABEL_OP1_204_75505_20121220_175919_inLine +BABEL_OP1_204_75505_20121220_175919_outLine +BABEL_OP1_204_75869_20130614_143452_inLine +BABEL_OP1_204_75869_20130614_143452_outLine +BABEL_OP1_204_75981_20130304_014705_inLine +BABEL_OP1_204_75981_20130304_014705_outLine +BABEL_OP1_204_76069_20130821_001213_inLine +BABEL_OP1_204_76069_20130821_001213_outLine +BABEL_OP1_204_76155_20130129_210554_inLine +BABEL_OP1_204_76155_20130129_210554_outLine +BABEL_OP1_204_76793_20130812_204256_inLine +BABEL_OP1_204_76793_20130812_204256_outLine +BABEL_OP1_204_76902_20130520_161816_inLine +BABEL_OP1_204_76902_20130520_161816_outLine +BABEL_OP1_204_77139_20130103_214953_inLine +BABEL_OP1_204_77139_20130103_214953_outLine +BABEL_OP1_204_77225_20130825_155026_inLine +BABEL_OP1_204_77225_20130825_155026_outLine +BABEL_OP1_204_77225_20130825_160328_inLine +BABEL_OP1_204_77225_20130825_160328_outLine +BABEL_OP1_204_77242_20130310_031438_inLine +BABEL_OP1_204_77242_20130310_031438_outLine +BABEL_OP1_204_77391_20130114_214011_inLine +BABEL_OP1_204_77391_20130114_214011_outLine +BABEL_OP1_204_77909_20130822_005415_inLine +BABEL_OP1_204_77909_20130822_005415_outLine +BABEL_OP1_204_77974_20130305_023753_inLine +BABEL_OP1_204_77974_20130305_023753_outLine +BABEL_OP1_204_78360_20130227_174048_inLine +BABEL_OP1_204_78360_20130227_174048_outLine +BABEL_OP1_204_78482_20130208_181819_inLine +BABEL_OP1_204_78482_20130208_181819_outLine +BABEL_OP1_204_78749_20130607_175636_inLine +BABEL_OP1_204_78749_20130607_175636_outLine +BABEL_OP1_204_79028_20130818_170543_inLine +BABEL_OP1_204_79028_20130818_170543_outLine +BABEL_OP1_204_79107_20130311_033735_inLine +BABEL_OP1_204_79107_20130311_033735_outLine +BABEL_OP1_204_79429_20130522_180804_inLine +BABEL_OP1_204_79429_20130522_180804_outLine +BABEL_OP1_204_79723_20130815_161014_inLine +BABEL_OP1_204_79723_20130815_161014_outLine +BABEL_OP1_204_79858_20130108_175702_inLine +BABEL_OP1_204_79858_20130108_175702_outLine +BABEL_OP1_204_79898_20130607_173143_inLine +BABEL_OP1_204_79898_20130607_173143_outLine +BABEL_OP1_204_80577_20130310_051912_inLine +BABEL_OP1_204_80577_20130310_051912_outLine +BABEL_OP1_204_80622_20130325_141431_inLine +BABEL_OP1_204_80622_20130325_141431_outLine +BABEL_OP1_204_80897_20130130_194208_inLine +BABEL_OP1_204_80897_20130130_194208_outLine +BABEL_OP1_204_81392_20130129_021012_inLine +BABEL_OP1_204_81392_20130129_021012_outLine +BABEL_OP1_204_81427_20130118_211419_inLine +BABEL_OP1_204_81427_20130118_211419_outLine +BABEL_OP1_204_81433_20130217_234814_inLine +BABEL_OP1_204_81433_20130217_234814_outLine +BABEL_OP1_204_81553_20130225_183924_inLine +BABEL_OP1_204_81553_20130225_183924_outLine +BABEL_OP1_204_81581_20130726_141606_inLine +BABEL_OP1_204_81581_20130726_141606_outLine +BABEL_OP1_204_81674_20130522_172505_inLine +BABEL_OP1_204_81674_20130522_172505_outLine +BABEL_OP1_204_81769_20130710_161840_inLine +BABEL_OP1_204_81769_20130710_161840_outLine +BABEL_OP1_204_82224_20130224_184149_inLine +BABEL_OP1_204_82224_20130224_184149_outLine +BABEL_OP1_204_83436_20130111_223716_inLine +BABEL_OP1_204_83436_20130111_223716_outLine +BABEL_OP1_204_83643_20130830_005334_inLine +BABEL_OP1_204_83643_20130830_005334_outLine +BABEL_OP1_204_83813_20130704_172117_inLine +BABEL_OP1_204_83813_20130704_172117_outLine +BABEL_OP1_204_83851_20130114_065704_inLine +BABEL_OP1_204_83851_20130114_065704_outLine +BABEL_OP1_204_83974_20130607_152537_inLine +BABEL_OP1_204_83974_20130607_152537_outLine +BABEL_OP1_204_84079_20130821_203040_inLine +BABEL_OP1_204_84079_20130821_203040_outLine +BABEL_OP1_204_84194_20130716_194041_inLine +BABEL_OP1_204_84194_20130716_194041_outLine +BABEL_OP1_204_84370_20130310_050228_inLine +BABEL_OP1_204_84370_20130310_050228_outLine +BABEL_OP1_204_84469_20130210_003435_inLine +BABEL_OP1_204_84469_20130210_003435_outLine +BABEL_OP1_204_84541_20130820_230752_inLine +BABEL_OP1_204_84541_20130820_230752_outLine +BABEL_OP1_204_84709_20130518_125528_inLine +BABEL_OP1_204_84709_20130518_125528_outLine +BABEL_OP1_204_84768_20130106_033700_inLine +BABEL_OP1_204_84768_20130106_033700_outLine +BABEL_OP1_204_84823_20130218_212443_inLine +BABEL_OP1_204_84823_20130218_212443_outLine +BABEL_OP1_204_85179_20130209_014947_inLine +BABEL_OP1_204_85179_20130209_014947_outLine +BABEL_OP1_204_85246_20130516_211538_inLine +BABEL_OP1_204_85246_20130516_211538_outLine +BABEL_OP1_204_85254_20130312_035109_inLine +BABEL_OP1_204_85254_20130312_035109_outLine +BABEL_OP1_204_85322_20130107_192937_inLine +BABEL_OP1_204_85322_20130107_192937_outLine +BABEL_OP1_204_85340_20130111_212907_inLine +BABEL_OP1_204_85340_20130111_212907_outLine +BABEL_OP1_204_85519_20130301_161437_inLine +BABEL_OP1_204_85519_20130301_161437_outLine +BABEL_OP1_204_85651_20130216_204250_inLine +BABEL_OP1_204_85651_20130216_204250_outLine +BABEL_OP1_204_86597_20130310_031951_inLine +BABEL_OP1_204_86597_20130310_031951_outLine +BABEL_OP1_204_86722_20130114_025704_inLine +BABEL_OP1_204_86722_20130114_025704_outLine +BABEL_OP1_204_86826_20130627_190707_inLine +BABEL_OP1_204_86826_20130627_190707_outLine +BABEL_OP1_204_86830_20130613_181407_inLine +BABEL_OP1_204_86830_20130613_181407_outLine +BABEL_OP1_204_86845_20130705_192829_inLine +BABEL_OP1_204_86845_20130705_192829_outLine +BABEL_OP1_204_86845_20130705_193447_inLine +BABEL_OP1_204_86845_20130705_193447_outLine +BABEL_OP1_204_86885_20130825_200228_inLine +BABEL_OP1_204_86885_20130825_200228_outLine +BABEL_OP1_204_86952_20121231_204819_inLine +BABEL_OP1_204_86952_20121231_204819_outLine +BABEL_OP1_204_87280_20130209_180508_inLine +BABEL_OP1_204_87280_20130209_180508_outLine +BABEL_OP1_204_87470_20130122_032958_inLine +BABEL_OP1_204_87470_20130122_032958_outLine +BABEL_OP1_204_87889_20130225_183607_inLine +BABEL_OP1_204_87889_20130225_183607_outLine +BABEL_OP1_204_88394_20130708_175704_inLine +BABEL_OP1_204_88394_20130708_175704_outLine +BABEL_OP1_204_88686_20121222_200228_inLine +BABEL_OP1_204_88686_20121222_200228_outLine +BABEL_OP1_204_88873_20130108_214456_inLine +BABEL_OP1_204_88873_20130108_214456_outLine +BABEL_OP1_204_88982_20130129_004023_inLine +BABEL_OP1_204_88982_20130129_004023_outLine +BABEL_OP1_204_89372_20121219_192043_inLine +BABEL_OP1_204_89372_20121219_192043_outLine +BABEL_OP1_204_89665_20130122_035608_inLine +BABEL_OP1_204_89665_20130122_035608_outLine +BABEL_OP1_204_89718_20130821_214732_inLine +BABEL_OP1_204_89718_20130821_214732_outLine +BABEL_OP1_204_89794_20130321_180037_inLine +BABEL_OP1_204_89794_20130321_180037_outLine +BABEL_OP1_204_89794_20130321_181250_inLine +BABEL_OP1_204_89794_20130321_181250_outLine +BABEL_OP1_204_89888_20130115_181504_inLine +BABEL_OP1_204_89888_20130115_181504_outLine +BABEL_OP1_204_90440_20130509_133501_inLine +BABEL_OP1_204_90440_20130509_133501_outLine +BABEL_OP1_204_90740_20130605_163314_inLine +BABEL_OP1_204_90740_20130605_163314_outLine +BABEL_OP1_204_90832_20130310_045516_inLine +BABEL_OP1_204_90832_20130310_045516_outLine +BABEL_OP1_204_90930_20130901_200839_inLine +BABEL_OP1_204_90930_20130901_200839_outLine +BABEL_OP1_204_90935_20130116_220822_inLine +BABEL_OP1_204_90935_20130116_220822_outLine +BABEL_OP1_204_91125_20130112_215414_inLine +BABEL_OP1_204_91125_20130112_215414_outLine +BABEL_OP1_204_91189_20130516_000538_inLine +BABEL_OP1_204_91189_20130516_000538_outLine +BABEL_OP1_204_91252_20130821_000400_inLine +BABEL_OP1_204_91252_20130821_000400_outLine +BABEL_OP1_204_91411_20130710_193521_inLine +BABEL_OP1_204_91411_20130710_193521_outLine +BABEL_OP1_204_91463_20130120_184700_inLine +BABEL_OP1_204_91463_20130120_184700_outLine +BABEL_OP1_204_91581_20130210_013423_inLine +BABEL_OP1_204_91581_20130210_013423_outLine +BABEL_OP1_204_91825_20121224_185428_inLine +BABEL_OP1_204_91825_20121224_185428_outLine +BABEL_OP1_204_91825_20121224_191424_inLine +BABEL_OP1_204_91825_20121224_191424_outLine +BABEL_OP1_204_91884_20130215_205051_inLine +BABEL_OP1_204_91884_20130215_205051_outLine +BABEL_OP1_204_91971_20130818_152604_inLine +BABEL_OP1_204_91971_20130818_152604_outLine +BABEL_OP1_204_92077_20130614_165026_inLine +BABEL_OP1_204_92077_20130614_165026_outLine +BABEL_OP1_204_92176_20130120_165309_inLine +BABEL_OP1_204_92176_20130120_165309_outLine +BABEL_OP1_204_92252_20130812_220232_inLine +BABEL_OP1_204_92252_20130812_220232_outLine +BABEL_OP1_204_92941_20130120_230410_inLine +BABEL_OP1_204_92941_20130120_230410_outLine +BABEL_OP1_204_93320_20130311_022333_inLine +BABEL_OP1_204_93320_20130311_022333_outLine +BABEL_OP1_204_93320_20130311_023402_inLine +BABEL_OP1_204_93320_20130311_023402_outLine +BABEL_OP1_204_93443_20130803_153015_inLine +BABEL_OP1_204_93443_20130803_153015_outLine +BABEL_OP1_204_93858_20130311_005700_inLine +BABEL_OP1_204_93858_20130311_005700_outLine +BABEL_OP1_204_93937_20130313_172438_inLine +BABEL_OP1_204_93937_20130313_172438_outLine +BABEL_OP1_204_93946_20130210_172621_inLine +BABEL_OP1_204_93946_20130210_172621_outLine +BABEL_OP1_204_93946_20130210_175020_inLine +BABEL_OP1_204_93946_20130210_175020_outLine +BABEL_OP1_204_94035_20130704_185858_inLine +BABEL_OP1_204_94035_20130704_185858_outLine +BABEL_OP1_204_94166_20130212_185608_inLine +BABEL_OP1_204_94166_20130212_185608_outLine +BABEL_OP1_204_94212_20130709_195201_inLine +BABEL_OP1_204_94212_20130709_195201_outLine +BABEL_OP1_204_94237_20130227_204940_inLine +BABEL_OP1_204_94237_20130227_204940_outLine +BABEL_OP1_204_94262_20130307_222214_inLine +BABEL_OP1_204_94262_20130307_222214_outLine +BABEL_OP1_204_94409_20130130_012526_inLine +BABEL_OP1_204_94409_20130130_012526_outLine +BABEL_OP1_204_94713_20130710_173705_inLine +BABEL_OP1_204_94713_20130710_173705_outLine +BABEL_OP1_204_94803_20130524_125715_inLine +BABEL_OP1_204_94803_20130524_125715_outLine +BABEL_OP1_204_94891_20130520_200303_inLine +BABEL_OP1_204_94891_20130520_200303_outLine +BABEL_OP1_204_94969_20130516_174057_inLine +BABEL_OP1_204_94969_20130516_174057_outLine +BABEL_OP1_204_95124_20130521_171211_inLine +BABEL_OP1_204_95124_20130521_171211_outLine +BABEL_OP1_204_95269_20130121_040957_inLine +BABEL_OP1_204_95269_20130121_040957_outLine +BABEL_OP1_204_95338_20130617_183230_inLine +BABEL_OP1_204_95338_20130617_183230_outLine +BABEL_OP1_204_95467_20130310_041013_inLine +BABEL_OP1_204_95467_20130310_041013_outLine +BABEL_OP1_204_95571_20130605_173956_inLine +BABEL_OP1_204_95571_20130605_173956_outLine +BABEL_OP1_204_95583_20130107_233706_inLine +BABEL_OP1_204_95583_20130107_233706_outLine +BABEL_OP1_204_95598_20121218_225349_inLine +BABEL_OP1_204_95598_20121218_225349_outLine +BABEL_OP1_204_96504_20130111_012757_inLine +BABEL_OP1_204_96504_20130111_012757_outLine +BABEL_OP1_204_96820_20130120_015641_inLine +BABEL_OP1_204_96820_20130120_015641_outLine +BABEL_OP1_204_96842_20130614_172939_inLine +BABEL_OP1_204_96842_20130614_172939_outLine +BABEL_OP1_204_96934_20130119_033411_inLine +BABEL_OP1_204_96934_20130119_033411_outLine +BABEL_OP1_204_96940_20130520_190004_inLine +BABEL_OP1_204_96940_20130520_190004_outLine +BABEL_OP1_204_97345_20130705_170655_inLine +BABEL_OP1_204_97345_20130705_170655_outLine +BABEL_OP1_204_97448_20130830_013253_inLine +BABEL_OP1_204_97448_20130830_013253_outLine +BABEL_OP1_204_97588_20130106_172133_inLine +BABEL_OP1_204_97588_20130106_172133_outLine +BABEL_OP1_204_97604_20130224_214511_inLine +BABEL_OP1_204_97604_20130224_214511_outLine +BABEL_OP1_204_97911_20130701_170644_inLine +BABEL_OP1_204_97911_20130701_170644_outLine +BABEL_OP1_204_98255_20130716_204027_inLine +BABEL_OP1_204_98255_20130716_204027_outLine +BABEL_OP1_204_98311_20130114_061903_inLine +BABEL_OP1_204_98311_20130114_061903_outLine +BABEL_OP1_204_98390_20130114_195309_inLine +BABEL_OP1_204_98390_20130114_195309_outLine +BABEL_OP1_204_98580_20130130_233406_inLine +BABEL_OP1_204_98580_20130130_233406_outLine +BABEL_OP1_204_98678_20130215_215447_inLine +BABEL_OP1_204_98678_20130215_215447_outLine +BABEL_OP1_204_98888_20130130_200414_inLine +BABEL_OP1_204_98888_20130130_200414_outLine +BABEL_OP1_204_98909_20130118_224024_inLine +BABEL_OP1_204_98909_20130118_224024_outLine +BABEL_OP1_204_99264_20130211_183956_inLine +BABEL_OP1_204_99264_20130211_183956_outLine +BABEL_OP1_204_99344_20130705_180532_inLine +BABEL_OP1_204_99344_20130705_180532_outLine +BABEL_OP1_204_99487_20130111_175232_inLine +BABEL_OP1_204_99487_20130111_175232_outLine +BABEL_OP1_204_99516_20130103_172113_inLine +BABEL_OP1_204_99516_20130103_172113_outLine +BABEL_OP1_204_99718_20130114_221147_inLine +BABEL_OP1_204_99718_20130114_221147_outLine +BABEL_OP1_204_99975_20130812_220558_inLine +BABEL_OP1_204_99975_20130812_220558_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/evalpart1.list b/egs/babel/s5d/conf/lists/204-tamil/evalpart1.list new file mode 100644 index 00000000000..c5dbddb1867 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/evalpart1.list @@ -0,0 +1,186 @@ +BABEL_OP1_204_10638_20130510_124441_inLine +BABEL_OP1_204_10638_20130510_124441_outLine +BABEL_OP1_204_12321_20130220_211618_inLine +BABEL_OP1_204_12321_20130220_211618_outLine +BABEL_OP1_204_12635_20130601_152113_inLine +BABEL_OP1_204_12635_20130601_152113_outLine +BABEL_OP1_204_14350_20130113_023333_inLine +BABEL_OP1_204_14350_20130113_023333_outLine +BABEL_OP1_204_14723_20130710_180819_inLine +BABEL_OP1_204_14723_20130710_180819_outLine +BABEL_OP1_204_14875_20130111_192622_inLine +BABEL_OP1_204_14875_20130111_192622_outLine +BABEL_OP1_204_15227_20130624_180548_inLine +BABEL_OP1_204_15227_20130624_180548_outLine +BABEL_OP1_204_15848_20121218_180011_inLine +BABEL_OP1_204_15848_20121218_180011_outLine +BABEL_OP1_204_16351_20130705_205024_inLine +BABEL_OP1_204_16351_20130705_205024_outLine +BABEL_OP1_204_17165_20130130_191341_inLine +BABEL_OP1_204_17165_20130130_191341_outLine +BABEL_OP1_204_18863_20130210_164314_inLine +BABEL_OP1_204_18863_20130210_164314_outLine +BABEL_OP1_204_19545_20130122_164148_inLine +BABEL_OP1_204_19545_20130122_164148_outLine +BABEL_OP1_204_21029_20130107_212248_inLine +BABEL_OP1_204_21029_20130107_212248_outLine +BABEL_OP1_204_21159_20130607_143737_inLine +BABEL_OP1_204_21159_20130607_143737_outLine +BABEL_OP1_204_21794_20130129_233131_inLine +BABEL_OP1_204_21794_20130129_233131_outLine +BABEL_OP1_204_22641_20121224_195014_inLine +BABEL_OP1_204_22641_20121224_195014_outLine +BABEL_OP1_204_23196_20130605_144617_inLine +BABEL_OP1_204_23196_20130605_144617_outLine +BABEL_OP1_204_23395_20130126_013244_inLine +BABEL_OP1_204_23395_20130126_013244_outLine +BABEL_OP1_204_25068_20130901_230020_inLine +BABEL_OP1_204_25068_20130901_230020_outLine +BABEL_OP1_204_25068_20130901_235001_inLine +BABEL_OP1_204_25068_20130901_235001_outLine +BABEL_OP1_204_28585_20130208_014141_inLine +BABEL_OP1_204_28585_20130208_014141_outLine +BABEL_OP1_204_28871_20121219_184300_inLine +BABEL_OP1_204_28871_20121219_184300_outLine +BABEL_OP1_204_29208_20130127_011057_inLine +BABEL_OP1_204_29208_20130127_011057_outLine +BABEL_OP1_204_29352_20130628_145610_inLine +BABEL_OP1_204_29352_20130628_145610_outLine +BABEL_OP1_204_29777_20130211_193239_inLine +BABEL_OP1_204_29777_20130211_193239_outLine +BABEL_OP1_204_32832_20130208_200126_inLine +BABEL_OP1_204_32832_20130208_200126_outLine +BABEL_OP1_204_32961_20130518_164254_inLine +BABEL_OP1_204_32961_20130518_164254_outLine +BABEL_OP1_204_33635_20130127_024601_inLine +BABEL_OP1_204_33635_20130127_024601_outLine +BABEL_OP1_204_37281_20130131_020847_inLine +BABEL_OP1_204_37281_20130131_020847_outLine +BABEL_OP1_204_39579_20130724_163251_inLine +BABEL_OP1_204_39579_20130724_163251_outLine +BABEL_OP1_204_41493_20121218_185431_inLine +BABEL_OP1_204_41493_20121218_185431_outLine +BABEL_OP1_204_44255_20130225_230219_inLine +BABEL_OP1_204_44255_20130225_230219_outLine +BABEL_OP1_204_44681_20130830_000000_inLine +BABEL_OP1_204_44681_20130830_000000_outLine +BABEL_OP1_204_45106_20130325_003034_inLine +BABEL_OP1_204_45106_20130325_003034_outLine +BABEL_OP1_204_45106_20130325_004324_inLine +BABEL_OP1_204_45106_20130325_004324_outLine +BABEL_OP1_204_46202_20130524_162004_inLine +BABEL_OP1_204_46202_20130524_162004_outLine +BABEL_OP1_204_46625_20121219_193926_inLine +BABEL_OP1_204_46625_20121219_193926_outLine +BABEL_OP1_204_47882_20130705_203354_inLine +BABEL_OP1_204_47882_20130705_203354_outLine +BABEL_OP1_204_48016_20130311_033904_inLine +BABEL_OP1_204_48016_20130311_033904_outLine +BABEL_OP1_204_48399_20130112_205650_inLine +BABEL_OP1_204_48399_20130112_205650_outLine +BABEL_OP1_204_50958_20130129_195029_inLine +BABEL_OP1_204_50958_20130129_195029_outLine +BABEL_OP1_204_53206_20130704_211512_inLine +BABEL_OP1_204_53206_20130704_211512_outLine +BABEL_OP1_204_56019_20130301_165116_inLine +BABEL_OP1_204_56019_20130301_165116_outLine +BABEL_OP1_204_57219_20130311_044204_inLine +BABEL_OP1_204_57219_20130311_044204_outLine +BABEL_OP1_204_57609_20130122_194937_inLine +BABEL_OP1_204_57609_20130122_194937_outLine +BABEL_OP1_204_57654_20130114_074621_inLine +BABEL_OP1_204_57654_20130114_074621_outLine +BABEL_OP1_204_59993_20130104_172518_inLine +BABEL_OP1_204_59993_20130104_172518_outLine +BABEL_OP1_204_62155_20130215_213833_inLine +BABEL_OP1_204_62155_20130215_213833_outLine +BABEL_OP1_204_63481_20121224_021602_inLine +BABEL_OP1_204_63481_20121224_021602_outLine +BABEL_OP1_204_63523_20130301_162515_inLine +BABEL_OP1_204_63523_20130301_162515_outLine +BABEL_OP1_204_65339_20130821_194428_inLine +BABEL_OP1_204_65339_20130821_194428_outLine +BABEL_OP1_204_67592_20130211_032508_inLine +BABEL_OP1_204_67592_20130211_032508_outLine +BABEL_OP1_204_69972_20130802_175346_inLine +BABEL_OP1_204_69972_20130802_175346_outLine +BABEL_OP1_204_69982_20130310_050949_inLine +BABEL_OP1_204_69982_20130310_050949_outLine +BABEL_OP1_204_70110_20121219_223303_inLine +BABEL_OP1_204_70110_20121219_223303_outLine +BABEL_OP1_204_71038_20130225_191007_inLine +BABEL_OP1_204_71038_20130225_191007_outLine +BABEL_OP1_204_71333_20130111_181914_inLine +BABEL_OP1_204_71333_20130111_181914_outLine +BABEL_OP1_204_71704_20130114_182140_inLine +BABEL_OP1_204_71704_20130114_182140_outLine +BABEL_OP1_204_71754_20130822_005036_inLine +BABEL_OP1_204_71754_20130822_005036_outLine +BABEL_OP1_204_73622_20130108_180939_inLine +BABEL_OP1_204_73622_20130108_180939_outLine +BABEL_OP1_204_73837_20130115_213251_inLine +BABEL_OP1_204_73837_20130115_213251_outLine +BABEL_OP1_204_77909_20130822_005415_inLine +BABEL_OP1_204_77909_20130822_005415_outLine +BABEL_OP1_204_81427_20130118_211419_inLine +BABEL_OP1_204_81427_20130118_211419_outLine +BABEL_OP1_204_84370_20130310_050228_inLine +BABEL_OP1_204_84370_20130310_050228_outLine +BABEL_OP1_204_84709_20130518_125528_inLine +BABEL_OP1_204_84709_20130518_125528_outLine +BABEL_OP1_204_84823_20130218_212443_inLine +BABEL_OP1_204_84823_20130218_212443_outLine +BABEL_OP1_204_86830_20130613_181407_inLine +BABEL_OP1_204_86830_20130613_181407_outLine +BABEL_OP1_204_88394_20130708_175704_inLine +BABEL_OP1_204_88394_20130708_175704_outLine +BABEL_OP1_204_88686_20121222_200228_inLine +BABEL_OP1_204_88686_20121222_200228_outLine +BABEL_OP1_204_88873_20130108_214456_inLine +BABEL_OP1_204_88873_20130108_214456_outLine +BABEL_OP1_204_88982_20130129_004023_inLine +BABEL_OP1_204_88982_20130129_004023_outLine +BABEL_OP1_204_89372_20121219_192043_inLine +BABEL_OP1_204_89372_20121219_192043_outLine +BABEL_OP1_204_89718_20130821_214732_inLine +BABEL_OP1_204_89718_20130821_214732_outLine +BABEL_OP1_204_89794_20130321_180037_inLine +BABEL_OP1_204_89794_20130321_180037_outLine +BABEL_OP1_204_89794_20130321_181250_inLine +BABEL_OP1_204_89794_20130321_181250_outLine +BABEL_OP1_204_90930_20130901_200839_inLine +BABEL_OP1_204_90930_20130901_200839_outLine +BABEL_OP1_204_90935_20130116_220822_inLine +BABEL_OP1_204_90935_20130116_220822_outLine +BABEL_OP1_204_91252_20130821_000400_inLine +BABEL_OP1_204_91252_20130821_000400_outLine +BABEL_OP1_204_91884_20130215_205051_inLine +BABEL_OP1_204_91884_20130215_205051_outLine +BABEL_OP1_204_91971_20130818_152604_inLine +BABEL_OP1_204_91971_20130818_152604_outLine +BABEL_OP1_204_92176_20130120_165309_inLine +BABEL_OP1_204_92176_20130120_165309_outLine +BABEL_OP1_204_92941_20130120_230410_inLine +BABEL_OP1_204_92941_20130120_230410_outLine +BABEL_OP1_204_94166_20130212_185608_inLine +BABEL_OP1_204_94166_20130212_185608_outLine +BABEL_OP1_204_94212_20130709_195201_inLine +BABEL_OP1_204_94212_20130709_195201_outLine +BABEL_OP1_204_95598_20121218_225349_inLine +BABEL_OP1_204_95598_20121218_225349_outLine +BABEL_OP1_204_96934_20130119_033411_inLine +BABEL_OP1_204_96934_20130119_033411_outLine +BABEL_OP1_204_97345_20130705_170655_inLine +BABEL_OP1_204_97345_20130705_170655_outLine +BABEL_OP1_204_97448_20130830_013253_inLine +BABEL_OP1_204_97448_20130830_013253_outLine +BABEL_OP1_204_98580_20130130_233406_inLine +BABEL_OP1_204_98580_20130130_233406_outLine +BABEL_OP1_204_98888_20130130_200414_inLine +BABEL_OP1_204_98888_20130130_200414_outLine +BABEL_OP1_204_99264_20130211_183956_inLine +BABEL_OP1_204_99264_20130211_183956_outLine +BABEL_OP1_204_99344_20130705_180532_inLine +BABEL_OP1_204_99344_20130705_180532_outLine +BABEL_OP1_204_99516_20130103_172113_inLine +BABEL_OP1_204_99516_20130103_172113_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list b/egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list new file mode 100644 index 00000000000..84a8b1815a2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list @@ -0,0 +1,778 @@ +BABEL_OP1_204_10002_20130523_142107_inLine +BABEL_OP1_204_10002_20130523_142107_outLine +BABEL_OP1_204_10036_20130116_163652_inLine +BABEL_OP1_204_10036_20130116_163652_outLine +BABEL_OP1_204_10184_20130217_232154_inLine +BABEL_OP1_204_10184_20130217_232154_outLine +BABEL_OP1_204_10411_20130313_042405_inLine +BABEL_OP1_204_10411_20130313_042405_outLine +BABEL_OP1_204_10469_20130708_201653_inLine +BABEL_OP1_204_10469_20130708_201653_outLine +BABEL_OP1_204_10647_20130225_175457_inLine +BABEL_OP1_204_10647_20130225_175457_outLine +BABEL_OP1_204_10647_20130225_184106_inLine +BABEL_OP1_204_10647_20130225_184106_outLine +BABEL_OP1_204_10901_20130120_220533_inLine +BABEL_OP1_204_10901_20130120_220533_outLine +BABEL_OP1_204_10938_20130118_213056_inLine +BABEL_OP1_204_10938_20130118_213056_outLine +BABEL_OP1_204_10966_20130114_210156_inLine +BABEL_OP1_204_10966_20130114_210156_outLine +BABEL_OP1_204_11310_20130705_180254_inLine +BABEL_OP1_204_11310_20130705_180254_outLine +BABEL_OP1_204_11352_20130220_023807_inLine +BABEL_OP1_204_11352_20130220_023807_outLine +BABEL_OP1_204_11486_20130607_155406_inLine +BABEL_OP1_204_11486_20130607_155406_outLine +BABEL_OP1_204_11581_20130222_215500_inLine +BABEL_OP1_204_11581_20130222_215500_outLine +BABEL_OP1_204_11581_20130222_220101_inLine +BABEL_OP1_204_11581_20130222_220101_outLine +BABEL_OP1_204_11663_20130319_201815_inLine +BABEL_OP1_204_11663_20130319_201815_outLine +BABEL_OP1_204_11673_20121220_214236_inLine +BABEL_OP1_204_11673_20121220_214236_outLine +BABEL_OP1_204_11723_20130803_144247_inLine +BABEL_OP1_204_11723_20130803_144247_outLine +BABEL_OP1_204_11797_20130107_214732_inLine +BABEL_OP1_204_11797_20130107_214732_outLine +BABEL_OP1_204_12036_20130102_170500_inLine +BABEL_OP1_204_12036_20130102_170500_outLine +BABEL_OP1_204_12036_20130102_171149_inLine +BABEL_OP1_204_12036_20130102_171149_outLine +BABEL_OP1_204_12220_20130120_183204_inLine +BABEL_OP1_204_12220_20130120_183204_outLine +BABEL_OP1_204_12242_20130111_014802_inLine +BABEL_OP1_204_12242_20130111_014802_outLine +BABEL_OP1_204_12846_20130515_220132_inLine +BABEL_OP1_204_12846_20130515_220132_outLine +BABEL_OP1_204_12851_20121219_172018_inLine +BABEL_OP1_204_12851_20121219_172018_outLine +BABEL_OP1_204_13030_20130120_210514_inLine +BABEL_OP1_204_13184_20130228_032847_inLine +BABEL_OP1_204_13184_20130228_032847_outLine +BABEL_OP1_204_13324_20130103_211640_inLine +BABEL_OP1_204_13324_20130103_211640_outLine +BABEL_OP1_204_13490_20130314_031843_inLine +BABEL_OP1_204_13490_20130314_031843_outLine +BABEL_OP1_204_13744_20130106_232543_inLine +BABEL_OP1_204_13744_20130106_232543_outLine +BABEL_OP1_204_13776_20130626_215241_inLine +BABEL_OP1_204_13776_20130626_215241_outLine +BABEL_OP1_204_13792_20121231_015544_inLine +BABEL_OP1_204_13792_20121231_015544_outLine +BABEL_OP1_204_14719_20130219_231741_inLine +BABEL_OP1_204_14719_20130219_231741_outLine +BABEL_OP1_204_14719_20130219_232513_inLine +BABEL_OP1_204_14719_20130219_232513_outLine +BABEL_OP1_204_14725_20130111_204740_inLine +BABEL_OP1_204_14725_20130111_204740_outLine +BABEL_OP1_204_14807_20130222_213831_inLine +BABEL_OP1_204_14807_20130222_213831_outLine +BABEL_OP1_204_15730_20130103_154749_inLine +BABEL_OP1_204_15730_20130103_154749_outLine +BABEL_OP1_204_15985_20130627_154935_inLine +BABEL_OP1_204_15985_20130627_154935_outLine +BABEL_OP1_204_16249_20130906_003049_inLine +BABEL_OP1_204_16249_20130906_003049_outLine +BABEL_OP1_204_16726_20130815_164352_inLine +BABEL_OP1_204_16726_20130815_164352_outLine +BABEL_OP1_204_16800_20130307_025108_inLine +BABEL_OP1_204_16800_20130307_025108_outLine +BABEL_OP1_204_16802_20130821_234724_inLine +BABEL_OP1_204_16802_20130821_234724_outLine +BABEL_OP1_204_16838_20130703_183021_inLine +BABEL_OP1_204_16838_20130703_183021_outLine +BABEL_OP1_204_17032_20130129_012026_inLine +BABEL_OP1_204_17032_20130129_012026_outLine +BABEL_OP1_204_17420_20130426_172522_inLine +BABEL_OP1_204_17420_20130426_172522_outLine +BABEL_OP1_204_17420_20130426_174314_inLine +BABEL_OP1_204_17420_20130426_174314_outLine +BABEL_OP1_204_17496_20130325_015543_inLine +BABEL_OP1_204_17496_20130325_015543_outLine +BABEL_OP1_204_18037_20130825_200728_inLine +BABEL_OP1_204_18037_20130825_200728_outLine +BABEL_OP1_204_18280_20130818_172915_inLine +BABEL_OP1_204_18280_20130818_172915_outLine +BABEL_OP1_204_18939_20130110_214704_inLine +BABEL_OP1_204_18939_20130110_214704_outLine +BABEL_OP1_204_18992_20130830_001646_inLine +BABEL_OP1_204_18992_20130830_001646_outLine +BABEL_OP1_204_19134_20130120_191037_inLine +BABEL_OP1_204_19134_20130120_191037_outLine +BABEL_OP1_204_19461_20130704_154920_inLine +BABEL_OP1_204_19461_20130704_154920_outLine +BABEL_OP1_204_19589_20130304_020747_inLine +BABEL_OP1_204_19589_20130304_020747_outLine +BABEL_OP1_204_19688_20130708_194740_inLine +BABEL_OP1_204_19688_20130708_194740_outLine +BABEL_OP1_204_20330_20130217_225055_inLine +BABEL_OP1_204_20330_20130217_225055_outLine +BABEL_OP1_204_20367_20130312_024055_inLine +BABEL_OP1_204_20367_20130312_024055_outLine +BABEL_OP1_204_20437_20130523_235611_inLine +BABEL_OP1_204_20437_20130523_235611_outLine +BABEL_OP1_204_20721_20130704_183621_inLine +BABEL_OP1_204_20721_20130704_183621_outLine +BABEL_OP1_204_20916_20121218_174604_inLine +BABEL_OP1_204_20916_20121218_174604_outLine +BABEL_OP1_204_20985_20130129_225135_inLine +BABEL_OP1_204_20985_20130129_225135_outLine +BABEL_OP1_204_21426_20130515_212900_inLine +BABEL_OP1_204_21426_20130515_212900_outLine +BABEL_OP1_204_21435_20130215_200722_inLine +BABEL_OP1_204_21435_20130215_200722_outLine +BABEL_OP1_204_21543_20130901_203127_inLine +BABEL_OP1_204_21543_20130901_203127_outLine +BABEL_OP1_204_21807_20130127_033626_inLine +BABEL_OP1_204_21807_20130127_033626_outLine +BABEL_OP1_204_21807_20130127_041609_inLine +BABEL_OP1_204_21807_20130127_041609_outLine +BABEL_OP1_204_22321_20130107_231204_inLine +BABEL_OP1_204_22321_20130107_231204_outLine +BABEL_OP1_204_22643_20130709_192909_inLine +BABEL_OP1_204_22643_20130709_192909_outLine +BABEL_OP1_204_23006_20130115_200742_inLine +BABEL_OP1_204_23006_20130115_200742_outLine +BABEL_OP1_204_23046_20130114_165057_inLine +BABEL_OP1_204_23046_20130114_165057_outLine +BABEL_OP1_204_23153_20130128_223235_inLine +BABEL_OP1_204_23153_20130128_223235_outLine +BABEL_OP1_204_23190_20130116_191153_inLine +BABEL_OP1_204_23190_20130116_191153_outLine +BABEL_OP1_204_23752_20130517_181521_inLine +BABEL_OP1_204_23752_20130517_181521_outLine +BABEL_OP1_204_23980_20130127_031636_inLine +BABEL_OP1_204_23980_20130127_031636_outLine +BABEL_OP1_204_24010_20130510_160627_inLine +BABEL_OP1_204_24010_20130510_160627_outLine +BABEL_OP1_204_24221_20130803_162307_inLine +BABEL_OP1_204_24221_20130803_162307_outLine +BABEL_OP1_204_24231_20130702_165725_inLine +BABEL_OP1_204_24231_20130702_165725_outLine +BABEL_OP1_204_24253_20130216_173828_inLine +BABEL_OP1_204_24253_20130216_173828_outLine +BABEL_OP1_204_24587_20130812_201846_inLine +BABEL_OP1_204_24587_20130812_201846_outLine +BABEL_OP1_204_24605_20130111_185213_inLine +BABEL_OP1_204_24605_20130111_185213_outLine +BABEL_OP1_204_26381_20130906_003653_inLine +BABEL_OP1_204_26381_20130906_003653_outLine +BABEL_OP1_204_26388_20121222_180059_inLine +BABEL_OP1_204_26388_20121222_180059_outLine +BABEL_OP1_204_26478_20130628_163250_inLine +BABEL_OP1_204_26478_20130628_163250_outLine +BABEL_OP1_204_27042_20130215_015654_inLine +BABEL_OP1_204_27042_20130215_015654_outLine +BABEL_OP1_204_27203_20130123_034459_inLine +BABEL_OP1_204_27203_20130123_034459_outLine +BABEL_OP1_204_27478_20130219_233409_inLine +BABEL_OP1_204_27478_20130219_233409_outLine +BABEL_OP1_204_27841_20130225_192938_inLine +BABEL_OP1_204_27841_20130225_192938_outLine +BABEL_OP1_204_28522_20130130_021159_inLine +BABEL_OP1_204_28522_20130130_021159_outLine +BABEL_OP1_204_28595_20130515_165745_inLine +BABEL_OP1_204_28595_20130515_165745_outLine +BABEL_OP1_204_29135_20121226_012303_inLine +BABEL_OP1_204_29404_20130225_222910_inLine +BABEL_OP1_204_29404_20130225_222910_outLine +BABEL_OP1_204_29633_20130219_205935_inLine +BABEL_OP1_204_29633_20130219_205935_outLine +BABEL_OP1_204_29911_20130704_163449_inLine +BABEL_OP1_204_29911_20130704_163449_outLine +BABEL_OP1_204_30013_20130129_224621_inLine +BABEL_OP1_204_30013_20130129_224621_outLine +BABEL_OP1_204_30098_20130302_223148_inLine +BABEL_OP1_204_30098_20130302_223148_outLine +BABEL_OP1_204_30345_20130211_192641_inLine +BABEL_OP1_204_30345_20130211_192641_outLine +BABEL_OP1_204_30432_20130128_194847_inLine +BABEL_OP1_204_30432_20130128_194847_outLine +BABEL_OP1_204_31039_20130817_183417_inLine +BABEL_OP1_204_31039_20130817_183417_outLine +BABEL_OP1_204_31490_20130106_234029_inLine +BABEL_OP1_204_31490_20130106_234029_outLine +BABEL_OP1_204_31728_20130730_183500_inLine +BABEL_OP1_204_31728_20130730_183500_outLine +BABEL_OP1_204_32097_20121218_192753_inLine +BABEL_OP1_204_32097_20121218_192753_outLine +BABEL_OP1_204_32122_20130119_232805_inLine +BABEL_OP1_204_32122_20130119_232805_outLine +BABEL_OP1_204_32169_20130820_205304_inLine +BABEL_OP1_204_32169_20130820_205304_outLine +BABEL_OP1_204_32244_20130617_175424_inLine +BABEL_OP1_204_32244_20130617_175424_outLine +BABEL_OP1_204_32328_20130218_020809_inLine +BABEL_OP1_204_32328_20130218_020809_outLine +BABEL_OP1_204_32727_20130224_174507_inLine +BABEL_OP1_204_32727_20130224_174507_outLine +BABEL_OP1_204_33273_20130126_234135_inLine +BABEL_OP1_204_33273_20130126_234135_outLine +BABEL_OP1_204_33355_20130110_222048_inLine +BABEL_OP1_204_33355_20130110_222048_outLine +BABEL_OP1_204_33424_20130617_192727_inLine +BABEL_OP1_204_33424_20130617_192727_outLine +BABEL_OP1_204_33774_20130601_164240_inLine +BABEL_OP1_204_33774_20130601_164240_outLine +BABEL_OP1_204_33806_20130310_041206_inLine +BABEL_OP1_204_33806_20130310_041206_outLine +BABEL_OP1_204_33913_20130205_155246_inLine +BABEL_OP1_204_34197_20121229_204615_inLine +BABEL_OP1_204_34197_20121229_204615_outLine +BABEL_OP1_204_34486_20130626_205810_inLine +BABEL_OP1_204_34486_20130626_205810_outLine +BABEL_OP1_204_34688_20121231_163152_inLine +BABEL_OP1_204_34713_20130516_164824_inLine +BABEL_OP1_204_34713_20130516_164824_outLine +BABEL_OP1_204_34811_20130130_015529_inLine +BABEL_OP1_204_34811_20130130_015529_outLine +BABEL_OP1_204_34860_20130524_205736_inLine +BABEL_OP1_204_34860_20130524_205736_outLine +BABEL_OP1_204_35000_20130217_021526_inLine +BABEL_OP1_204_35000_20130217_021526_outLine +BABEL_OP1_204_36293_20130107_173251_inLine +BABEL_OP1_204_36293_20130107_173251_outLine +BABEL_OP1_204_37228_20130224_205648_inLine +BABEL_OP1_204_37228_20130224_205648_outLine +BABEL_OP1_204_38588_20130119_231312_inLine +BABEL_OP1_204_38588_20130119_231312_outLine +BABEL_OP1_204_38664_20130116_202337_inLine +BABEL_OP1_204_38664_20130116_202337_outLine +BABEL_OP1_204_38750_20130208_003349_inLine +BABEL_OP1_204_38750_20130208_003349_outLine +BABEL_OP1_204_39099_20130302_210320_inLine +BABEL_OP1_204_39099_20130302_210320_outLine +BABEL_OP1_204_39307_20130104_021512_inLine +BABEL_OP1_204_39307_20130104_021512_outLine +BABEL_OP1_204_39638_20130605_153521_inLine +BABEL_OP1_204_39638_20130605_153521_outLine +BABEL_OP1_204_39848_20130130_204605_inLine +BABEL_OP1_204_39848_20130130_204605_outLine +BABEL_OP1_204_39893_20130313_023055_inLine +BABEL_OP1_204_39893_20130313_023055_outLine +BABEL_OP1_204_40196_20130902_001447_inLine +BABEL_OP1_204_40196_20130902_001447_outLine +BABEL_OP1_204_40565_20130129_202204_inLine +BABEL_OP1_204_40565_20130129_202204_outLine +BABEL_OP1_204_40648_20130710_170435_inLine +BABEL_OP1_204_40648_20130710_170435_outLine +BABEL_OP1_204_40686_20130704_204726_inLine +BABEL_OP1_204_40686_20130704_204726_outLine +BABEL_OP1_204_41233_20130209_215355_inLine +BABEL_OP1_204_41233_20130209_215355_outLine +BABEL_OP1_204_41334_20130311_032651_inLine +BABEL_OP1_204_41334_20130311_032651_outLine +BABEL_OP1_204_41598_20130227_193020_inLine +BABEL_OP1_204_41598_20130227_193020_outLine +BABEL_OP1_204_41720_20130524_184216_inLine +BABEL_OP1_204_41720_20130524_184216_outLine +BABEL_OP1_204_41890_20130227_233410_inLine +BABEL_OP1_204_41890_20130227_233410_outLine +BABEL_OP1_204_41920_20130101_031856_inLine +BABEL_OP1_204_41958_20130120_013639_inLine +BABEL_OP1_204_41958_20130120_013639_outLine +BABEL_OP1_204_41958_20130120_014156_inLine +BABEL_OP1_204_41958_20130120_014156_outLine +BABEL_OP1_204_41958_20130120_015222_inLine +BABEL_OP1_204_41958_20130120_015222_outLine +BABEL_OP1_204_42299_20130613_164705_inLine +BABEL_OP1_204_42299_20130613_164705_outLine +BABEL_OP1_204_42526_20130225_185629_inLine +BABEL_OP1_204_42526_20130225_185629_outLine +BABEL_OP1_204_42942_20130127_014343_inLine +BABEL_OP1_204_42942_20130127_014343_outLine +BABEL_OP1_204_43157_20130514_222203_inLine +BABEL_OP1_204_43157_20130514_222203_outLine +BABEL_OP1_204_43286_20130104_031805_inLine +BABEL_OP1_204_43286_20130104_031805_outLine +BABEL_OP1_204_43323_20130523_152627_inLine +BABEL_OP1_204_43323_20130523_152627_outLine +BABEL_OP1_204_43368_20130118_201259_inLine +BABEL_OP1_204_43368_20130118_201259_outLine +BABEL_OP1_204_43794_20130627_212826_inLine +BABEL_OP1_204_43794_20130627_212826_outLine +BABEL_OP1_204_44347_20130220_035919_inLine +BABEL_OP1_204_44347_20130220_035919_outLine +BABEL_OP1_204_44898_20130705_195912_inLine +BABEL_OP1_204_44898_20130705_195912_outLine +BABEL_OP1_204_45121_20130618_153308_inLine +BABEL_OP1_204_45121_20130618_153308_outLine +BABEL_OP1_204_45374_20130906_011341_inLine +BABEL_OP1_204_45374_20130906_011341_outLine +BABEL_OP1_204_45459_20130302_031028_inLine +BABEL_OP1_204_45459_20130302_031028_outLine +BABEL_OP1_204_45699_20130815_000115_inLine +BABEL_OP1_204_45699_20130815_000115_outLine +BABEL_OP1_204_45770_20130116_214623_inLine +BABEL_OP1_204_45770_20130116_214623_outLine +BABEL_OP1_204_46066_20130226_201734_inLine +BABEL_OP1_204_46066_20130226_201734_outLine +BABEL_OP1_204_46169_20130218_214523_inLine +BABEL_OP1_204_46558_20130103_175101_inLine +BABEL_OP1_204_46558_20130103_175101_outLine +BABEL_OP1_204_46905_20130704_183507_inLine +BABEL_OP1_204_46905_20130704_183507_outLine +BABEL_OP1_204_47156_20130310_000732_inLine +BABEL_OP1_204_47156_20130310_000732_outLine +BABEL_OP1_204_47283_20130102_220157_inLine +BABEL_OP1_204_47283_20130102_220157_outLine +BABEL_OP1_204_47802_20130614_155949_inLine +BABEL_OP1_204_47802_20130614_155949_outLine +BABEL_OP1_204_47823_20130209_191710_inLine +BABEL_OP1_204_47823_20130209_191710_outLine +BABEL_OP1_204_47878_20130128_213649_inLine +BABEL_OP1_204_47878_20130128_213649_outLine +BABEL_OP1_204_47878_20130128_214921_inLine +BABEL_OP1_204_47878_20130128_214921_outLine +BABEL_OP1_204_47923_20130812_172435_inLine +BABEL_OP1_204_47923_20130812_172435_outLine +BABEL_OP1_204_48299_20130531_202054_inLine +BABEL_OP1_204_48299_20130531_202054_outLine +BABEL_OP1_204_48610_20130114_165811_inLine +BABEL_OP1_204_48610_20130114_165811_outLine +BABEL_OP1_204_49027_20130606_142005_inLine +BABEL_OP1_204_49027_20130606_142005_outLine +BABEL_OP1_204_49768_20130115_220927_inLine +BABEL_OP1_204_49768_20130115_220927_outLine +BABEL_OP1_204_49775_20121219_214712_inLine +BABEL_OP1_204_49912_20130313_040643_inLine +BABEL_OP1_204_49912_20130313_040643_outLine +BABEL_OP1_204_49945_20130624_173403_inLine +BABEL_OP1_204_49945_20130624_173403_outLine +BABEL_OP1_204_50745_20130216_163145_inLine +BABEL_OP1_204_50745_20130216_163145_outLine +BABEL_OP1_204_50810_20121218_184451_inLine +BABEL_OP1_204_50810_20121218_184451_outLine +BABEL_OP1_204_51156_20130821_223730_inLine +BABEL_OP1_204_51156_20130821_223730_outLine +BABEL_OP1_204_51185_20130517_170655_inLine +BABEL_OP1_204_51185_20130517_170655_outLine +BABEL_OP1_204_51407_20130127_042921_inLine +BABEL_OP1_204_51407_20130127_044800_inLine +BABEL_OP1_204_52301_20130113_034941_inLine +BABEL_OP1_204_52301_20130113_034941_outLine +BABEL_OP1_204_52322_20130524_175752_inLine +BABEL_OP1_204_52322_20130524_175752_outLine +BABEL_OP1_204_52717_20130107_043805_inLine +BABEL_OP1_204_52717_20130107_043805_outLine +BABEL_OP1_204_52803_20130802_163814_inLine +BABEL_OP1_204_52803_20130802_163814_outLine +BABEL_OP1_204_52804_20130103_212424_inLine +BABEL_OP1_204_52804_20130103_212424_outLine +BABEL_OP1_204_53068_20130830_003817_inLine +BABEL_OP1_204_53068_20130830_003817_outLine +BABEL_OP1_204_53144_20130217_224136_inLine +BABEL_OP1_204_53144_20130217_224136_outLine +BABEL_OP1_204_53144_20130217_225527_inLine +BABEL_OP1_204_53144_20130217_225527_outLine +BABEL_OP1_204_53441_20130825_001938_inLine +BABEL_OP1_204_53441_20130825_001938_outLine +BABEL_OP1_204_53917_20130217_215053_inLine +BABEL_OP1_204_53917_20130217_215053_outLine +BABEL_OP1_204_54066_20130514_211116_inLine +BABEL_OP1_204_54066_20130514_211116_outLine +BABEL_OP1_204_54074_20130131_005828_inLine +BABEL_OP1_204_54074_20130131_005828_outLine +BABEL_OP1_204_54104_20130107_180959_inLine +BABEL_OP1_204_54104_20130107_180959_outLine +BABEL_OP1_204_54162_20130130_185332_inLine +BABEL_OP1_204_54162_20130130_185332_outLine +BABEL_OP1_204_54390_20130104_174530_inLine +BABEL_OP1_204_54390_20130104_174530_outLine +BABEL_OP1_204_54567_20130222_184721_inLine +BABEL_OP1_204_54567_20130222_184721_outLine +BABEL_OP1_204_54594_20130704_191249_inLine +BABEL_OP1_204_54594_20130704_191249_outLine +BABEL_OP1_204_54634_20130626_181537_inLine +BABEL_OP1_204_54634_20130626_181537_outLine +BABEL_OP1_204_54923_20130313_190841_inLine +BABEL_OP1_204_54923_20130313_190841_outLine +BABEL_OP1_204_54923_20130313_192534_inLine +BABEL_OP1_204_54923_20130313_192534_outLine +BABEL_OP1_204_54923_20130313_194117_inLine +BABEL_OP1_204_54923_20130313_194117_outLine +BABEL_OP1_204_55259_20130119_230219_inLine +BABEL_OP1_204_55259_20130119_230219_outLine +BABEL_OP1_204_55815_20130821_003003_inLine +BABEL_OP1_204_55815_20130821_003003_outLine +BABEL_OP1_204_56023_20130216_222455_inLine +BABEL_OP1_204_56023_20130216_222455_outLine +BABEL_OP1_204_56117_20130815_152303_inLine +BABEL_OP1_204_56117_20130815_152303_outLine +BABEL_OP1_204_56326_20130704_194950_inLine +BABEL_OP1_204_56326_20130704_194950_outLine +BABEL_OP1_204_56606_20130730_211609_inLine +BABEL_OP1_204_56606_20130730_211609_outLine +BABEL_OP1_204_56925_20130901_220934_inLine +BABEL_OP1_204_56925_20130901_220934_outLine +BABEL_OP1_204_57067_20130227_191402_outLine +BABEL_OP1_204_57233_20130524_200041_inLine +BABEL_OP1_204_57233_20130524_200041_outLine +BABEL_OP1_204_57782_20130417_212234_inLine +BABEL_OP1_204_57782_20130417_212234_outLine +BABEL_OP1_204_57887_20130705_183438_inLine +BABEL_OP1_204_57887_20130705_183438_outLine +BABEL_OP1_204_58006_20130325_011740_inLine +BABEL_OP1_204_58006_20130325_011740_outLine +BABEL_OP1_204_58026_20130310_194418_inLine +BABEL_OP1_204_58026_20130310_194418_outLine +BABEL_OP1_204_58103_20130118_221354_inLine +BABEL_OP1_204_58103_20130118_221354_outLine +BABEL_OP1_204_58313_20130127_023416_inLine +BABEL_OP1_204_58313_20130127_023416_outLine +BABEL_OP1_204_58489_20130209_220922_inLine +BABEL_OP1_204_58489_20130209_220922_outLine +BABEL_OP1_204_58489_20130209_221554_inLine +BABEL_OP1_204_58489_20130209_221554_outLine +BABEL_OP1_204_58636_20130812_211303_inLine +BABEL_OP1_204_58636_20130812_211303_outLine +BABEL_OP1_204_58734_20130108_172420_inLine +BABEL_OP1_204_58734_20130108_172420_outLine +BABEL_OP1_204_59028_20130507_123451_inLine +BABEL_OP1_204_59028_20130507_123451_outLine +BABEL_OP1_204_59291_20130719_200731_inLine +BABEL_OP1_204_59291_20130719_200731_outLine +BABEL_OP1_204_59307_20130218_000435_inLine +BABEL_OP1_204_59307_20130218_000435_outLine +BABEL_OP1_204_59307_20130218_001152_inLine +BABEL_OP1_204_59307_20130218_001152_outLine +BABEL_OP1_204_59685_20130812_185114_inLine +BABEL_OP1_204_59685_20130812_185114_outLine +BABEL_OP1_204_59864_20130302_195039_inLine +BABEL_OP1_204_59864_20130302_195039_outLine +BABEL_OP1_204_59928_20130103_190414_inLine +BABEL_OP1_204_59928_20130103_190414_outLine +BABEL_OP1_204_60026_20130107_002905_inLine +BABEL_OP1_204_60026_20130107_002905_outLine +BABEL_OP1_204_60282_20130815_161243_inLine +BABEL_OP1_204_60282_20130815_161243_outLine +BABEL_OP1_204_60299_20130313_025357_inLine +BABEL_OP1_204_60299_20130313_025357_outLine +BABEL_OP1_204_60299_20130313_030001_inLine +BABEL_OP1_204_60299_20130313_030001_outLine +BABEL_OP1_204_60397_20130822_013145_inLine +BABEL_OP1_204_60397_20130822_013145_outLine +BABEL_OP1_204_60477_20130521_010650_inLine +BABEL_OP1_204_60477_20130521_010650_outLine +BABEL_OP1_204_61190_20130111_183015_inLine +BABEL_OP1_204_61190_20130111_183015_outLine +BABEL_OP1_204_61435_20130217_214434_inLine +BABEL_OP1_204_61435_20130217_214434_outLine +BABEL_OP1_204_61438_20130719_233853_inLine +BABEL_OP1_204_61438_20130719_233853_outLine +BABEL_OP1_204_61731_20130107_035739_inLine +BABEL_OP1_204_61731_20130107_035739_outLine +BABEL_OP1_204_62177_20130719_152209_inLine +BABEL_OP1_204_62177_20130719_152209_outLine +BABEL_OP1_204_62656_20130902_220800_inLine +BABEL_OP1_204_62656_20130902_220800_outLine +BABEL_OP1_204_62734_20130119_222114_inLine +BABEL_OP1_204_62810_20130106_161333_inLine +BABEL_OP1_204_62810_20130106_161333_outLine +BABEL_OP1_204_62976_20130129_174043_inLine +BABEL_OP1_204_62976_20130129_174043_outLine +BABEL_OP1_204_63334_20130729_183108_inLine +BABEL_OP1_204_63334_20130729_183108_outLine +BABEL_OP1_204_63671_20130817_171243_inLine +BABEL_OP1_204_63671_20130817_171243_outLine +BABEL_OP1_204_63730_20130310_032536_inLine +BABEL_OP1_204_63766_20130824_010950_inLine +BABEL_OP1_204_63766_20130824_010950_outLine +BABEL_OP1_204_63920_20130822_001336_inLine +BABEL_OP1_204_63920_20130822_001336_outLine +BABEL_OP1_204_64065_20130102_231436_inLine +BABEL_OP1_204_64065_20130102_231436_outLine +BABEL_OP1_204_64259_20130610_224356_inLine +BABEL_OP1_204_64259_20130610_224356_outLine +BABEL_OP1_204_64398_20130319_024434_inLine +BABEL_OP1_204_64398_20130319_024434_outLine +BABEL_OP1_204_64469_20130818_174134_inLine +BABEL_OP1_204_64469_20130818_174134_outLine +BABEL_OP1_204_64722_20130215_020559_inLine +BABEL_OP1_204_64722_20130215_020559_outLine +BABEL_OP1_204_65048_20130901_235622_inLine +BABEL_OP1_204_65048_20130901_235622_outLine +BABEL_OP1_204_65268_20130603_220955_inLine +BABEL_OP1_204_65268_20130603_220955_outLine +BABEL_OP1_204_66305_20130218_004015_inLine +BABEL_OP1_204_66305_20130218_004015_outLine +BABEL_OP1_204_66472_20130308_022324_inLine +BABEL_OP1_204_66822_20130121_042919_inLine +BABEL_OP1_204_66822_20130121_042919_outLine +BABEL_OP1_204_66837_20130209_003706_inLine +BABEL_OP1_204_66971_20130617_172242_inLine +BABEL_OP1_204_66971_20130617_172242_outLine +BABEL_OP1_204_67053_20130522_161823_inLine +BABEL_OP1_204_67053_20130522_161823_outLine +BABEL_OP1_204_67283_20130113_013031_inLine +BABEL_OP1_204_67283_20130113_013031_outLine +BABEL_OP1_204_67401_20130222_205647_inLine +BABEL_OP1_204_67401_20130222_205647_outLine +BABEL_OP1_204_67659_20130111_193800_inLine +BABEL_OP1_204_67659_20130111_193800_outLine +BABEL_OP1_204_68384_20130719_175720_inLine +BABEL_OP1_204_68384_20130719_175720_outLine +BABEL_OP1_204_68910_20130816_191414_inLine +BABEL_OP1_204_68910_20130816_191414_outLine +BABEL_OP1_204_68924_20130129_165613_inLine +BABEL_OP1_204_68924_20130129_165613_outLine +BABEL_OP1_204_69096_20130303_195234_inLine +BABEL_OP1_204_69096_20130303_195234_outLine +BABEL_OP1_204_69574_20121218_220812_inLine +BABEL_OP1_204_69574_20121218_220812_outLine +BABEL_OP1_204_69937_20130715_192435_inLine +BABEL_OP1_204_69937_20130715_192435_outLine +BABEL_OP1_204_69964_20130704_161248_inLine +BABEL_OP1_204_69964_20130704_161248_outLine +BABEL_OP1_204_69992_20130107_234311_inLine +BABEL_OP1_204_69992_20130107_234311_outLine +BABEL_OP1_204_70216_20130628_200952_inLine +BABEL_OP1_204_70216_20130628_200952_outLine +BABEL_OP1_204_70257_20130716_194637_inLine +BABEL_OP1_204_70257_20130716_194637_outLine +BABEL_OP1_204_70257_20130716_195558_inLine +BABEL_OP1_204_70257_20130716_195558_outLine +BABEL_OP1_204_70293_20130902_214220_inLine +BABEL_OP1_204_70293_20130902_214220_outLine +BABEL_OP1_204_70601_20130122_030105_inLine +BABEL_OP1_204_70601_20130122_030105_outLine +BABEL_OP1_204_70794_20121220_222614_inLine +BABEL_OP1_204_70794_20121220_222614_outLine +BABEL_OP1_204_71067_20130319_205826_inLine +BABEL_OP1_204_71067_20130319_205826_outLine +BABEL_OP1_204_71189_20130215_200359_inLine +BABEL_OP1_204_71189_20130215_200359_outLine +BABEL_OP1_204_71976_20130730_180338_inLine +BABEL_OP1_204_71976_20130730_180338_outLine +BABEL_OP1_204_72073_20130823_001235_inLine +BABEL_OP1_204_72073_20130823_001235_outLine +BABEL_OP1_204_72110_20130208_235019_inLine +BABEL_OP1_204_72110_20130208_235019_outLine +BABEL_OP1_204_73549_20130701_155700_inLine +BABEL_OP1_204_73549_20130701_155700_outLine +BABEL_OP1_204_73696_20130310_022514_inLine +BABEL_OP1_204_73696_20130310_022514_outLine +BABEL_OP1_204_73822_20130515_221842_inLine +BABEL_OP1_204_73822_20130515_221842_outLine +BABEL_OP1_204_74121_20130129_170655_inLine +BABEL_OP1_204_74121_20130129_170655_outLine +BABEL_OP1_204_74280_20121220_170635_inLine +BABEL_OP1_204_74280_20121220_170635_outLine +BABEL_OP1_204_74280_20121220_172100_inLine +BABEL_OP1_204_74280_20121220_172100_outLine +BABEL_OP1_204_74763_20130825_175903_inLine +BABEL_OP1_204_74763_20130825_175903_outLine +BABEL_OP1_204_75064_20130111_180636_inLine +BABEL_OP1_204_75064_20130111_180636_outLine +BABEL_OP1_204_75365_20130516_010147_inLine +BABEL_OP1_204_75365_20130516_010147_outLine +BABEL_OP1_204_75975_20130902_224807_inLine +BABEL_OP1_204_75975_20130902_224807_outLine +BABEL_OP1_204_76126_20130217_205227_outLine +BABEL_OP1_204_76238_20130205_022020_inLine +BABEL_OP1_204_76482_20130310_023337_inLine +BABEL_OP1_204_76482_20130310_023337_outLine +BABEL_OP1_204_76730_20130825_010524_inLine +BABEL_OP1_204_76730_20130825_010524_outLine +BABEL_OP1_204_77427_20130116_173650_inLine +BABEL_OP1_204_77427_20130116_173650_outLine +BABEL_OP1_204_77803_20121219_215121_inLine +BABEL_OP1_204_77803_20121219_215121_outLine +BABEL_OP1_204_78016_20130118_223813_inLine +BABEL_OP1_204_78016_20130118_223813_outLine +BABEL_OP1_204_78016_20130118_224939_inLine +BABEL_OP1_204_78016_20130118_224939_outLine +BABEL_OP1_204_78116_20130130_004511_inLine +BABEL_OP1_204_78116_20130130_004511_outLine +BABEL_OP1_204_78254_20130114_224850_inLine +BABEL_OP1_204_78254_20130114_224850_outLine +BABEL_OP1_204_78313_20130223_202010_inLine +BABEL_OP1_204_78313_20130223_202010_outLine +BABEL_OP1_204_78543_20130313_200956_inLine +BABEL_OP1_204_78743_20130210_214804_inLine +BABEL_OP1_204_78743_20130210_214804_outLine +BABEL_OP1_204_78829_20130724_210413_inLine +BABEL_OP1_204_78829_20130724_210413_outLine +BABEL_OP1_204_79045_20130213_233402_inLine +BABEL_OP1_204_79045_20130213_233402_outLine +BABEL_OP1_204_79080_20130224_194409_inLine +BABEL_OP1_204_79080_20130224_194409_outLine +BABEL_OP1_204_79129_20130222_200128_inLine +BABEL_OP1_204_79129_20130222_200128_outLine +BABEL_OP1_204_79367_20130110_223433_inLine +BABEL_OP1_204_79367_20130110_223433_outLine +BABEL_OP1_204_79505_20130223_203535_inLine +BABEL_OP1_204_79505_20130223_203535_outLine +BABEL_OP1_204_80069_20130310_201210_inLine +BABEL_OP1_204_80241_20130825_143825_inLine +BABEL_OP1_204_80241_20130825_143825_outLine +BABEL_OP1_204_80439_20130115_225051_inLine +BABEL_OP1_204_80439_20130115_225051_outLine +BABEL_OP1_204_81213_20130114_221437_inLine +BABEL_OP1_204_81213_20130114_221437_outLine +BABEL_OP1_204_81622_20130130_223905_inLine +BABEL_OP1_204_81622_20130130_223905_outLine +BABEL_OP1_204_81810_20130319_043547_inLine +BABEL_OP1_204_81810_20130319_043547_outLine +BABEL_OP1_204_81854_20130303_025438_inLine +BABEL_OP1_204_81854_20130303_025438_outLine +BABEL_OP1_204_82425_20130108_181556_inLine +BABEL_OP1_204_82425_20130108_181556_outLine +BABEL_OP1_204_82935_20130208_135243_inLine +BABEL_OP1_204_82935_20130208_135243_outLine +BABEL_OP1_204_82979_20130103_191447_inLine +BABEL_OP1_204_82979_20130103_191447_outLine +BABEL_OP1_204_83394_20130313_005013_inLine +BABEL_OP1_204_83394_20130313_005013_outLine +BABEL_OP1_204_83430_20130603_202255_inLine +BABEL_OP1_204_83430_20130603_202255_outLine +BABEL_OP1_204_83455_20130119_213254_inLine +BABEL_OP1_204_83455_20130119_213254_outLine +BABEL_OP1_204_83625_20130531_181104_inLine +BABEL_OP1_204_83625_20130531_181104_outLine +BABEL_OP1_204_83771_20130625_172000_inLine +BABEL_OP1_204_83771_20130625_172000_outLine +BABEL_OP1_204_84055_20130228_202242_inLine +BABEL_OP1_204_84055_20130228_202242_outLine +BABEL_OP1_204_84077_20130812_184211_inLine +BABEL_OP1_204_84077_20130812_184211_outLine +BABEL_OP1_204_84430_20130817_164608_inLine +BABEL_OP1_204_84430_20130817_164608_outLine +BABEL_OP1_204_84430_20130901_201534_inLine +BABEL_OP1_204_84430_20130901_201534_outLine +BABEL_OP1_204_84466_20130220_015953_inLine +BABEL_OP1_204_84466_20130220_015953_outLine +BABEL_OP1_204_84583_20130122_032028_outLine +BABEL_OP1_204_84715_20130225_194321_inLine +BABEL_OP1_204_84715_20130225_194321_outLine +BABEL_OP1_204_85010_20130531_160005_inLine +BABEL_OP1_204_85010_20130531_160005_outLine +BABEL_OP1_204_85028_20130301_204938_inLine +BABEL_OP1_204_85028_20130301_222343_inLine +BABEL_OP1_204_85048_20130423_000346_inLine +BABEL_OP1_204_85048_20130423_000346_outLine +BABEL_OP1_204_85331_20130310_030345_inLine +BABEL_OP1_204_85331_20130310_030345_outLine +BABEL_OP1_204_85331_20130310_033244_inLine +BABEL_OP1_204_85331_20130310_033244_outLine +BABEL_OP1_204_85647_20130120_023041_inLine +BABEL_OP1_204_85647_20130120_023041_outLine +BABEL_OP1_204_86433_20130126_230445_inLine +BABEL_OP1_204_86433_20130126_230445_outLine +BABEL_OP1_204_86715_20130313_002453_inLine +BABEL_OP1_204_86715_20130313_002453_outLine +BABEL_OP1_204_86715_20130313_003416_inLine +BABEL_OP1_204_86715_20130313_003416_outLine +BABEL_OP1_204_86891_20130605_215220_inLine +BABEL_OP1_204_86891_20130605_215220_outLine +BABEL_OP1_204_87073_20121220_221057_inLine +BABEL_OP1_204_87073_20121220_221057_outLine +BABEL_OP1_204_87073_20121220_221600_inLine +BABEL_OP1_204_87073_20121220_221600_outLine +BABEL_OP1_204_87073_20121220_222957_inLine +BABEL_OP1_204_87073_20121220_222957_outLine +BABEL_OP1_204_87305_20130515_233922_inLine +BABEL_OP1_204_87305_20130515_233922_outLine +BABEL_OP1_204_87731_20130523_205109_inLine +BABEL_OP1_204_87731_20130523_205109_outLine +BABEL_OP1_204_88445_20130129_191832_inLine +BABEL_OP1_204_88445_20130129_191832_outLine +BABEL_OP1_204_88673_20130705_173732_inLine +BABEL_OP1_204_88673_20130705_173732_outLine +BABEL_OP1_204_88865_20130707_151620_inLine +BABEL_OP1_204_88865_20130707_151620_outLine +BABEL_OP1_204_89516_20130729_214127_inLine +BABEL_OP1_204_89516_20130729_214127_outLine +BABEL_OP1_204_89695_20130130_001218_inLine +BABEL_OP1_204_89695_20130130_001218_outLine +BABEL_OP1_204_89877_20130129_192538_inLine +BABEL_OP1_204_89877_20130129_192538_outLine +BABEL_OP1_204_90347_20130124_030740_inLine +BABEL_OP1_204_90347_20130124_030740_outLine +BABEL_OP1_204_90709_20130107_232337_inLine +BABEL_OP1_204_90709_20130107_232337_outLine +BABEL_OP1_204_91319_20130225_184203_inLine +BABEL_OP1_204_91319_20130225_184203_outLine +BABEL_OP1_204_91383_20130702_173202_inLine +BABEL_OP1_204_91383_20130702_173202_outLine +BABEL_OP1_204_91475_20130701_163859_inLine +BABEL_OP1_204_91475_20130701_163859_outLine +BABEL_OP1_204_91606_20130312_032420_inLine +BABEL_OP1_204_91606_20130312_032420_outLine +BABEL_OP1_204_91760_20130618_160303_inLine +BABEL_OP1_204_91760_20130618_160303_outLine +BABEL_OP1_204_92605_20130518_145958_inLine +BABEL_OP1_204_92605_20130518_145958_outLine +BABEL_OP1_204_92809_20130116_171026_inLine +BABEL_OP1_204_92809_20130116_171026_outLine +BABEL_OP1_204_92942_20130127_233540_inLine +BABEL_OP1_204_92942_20130127_233540_outLine +BABEL_OP1_204_93222_20130604_000913_inLine +BABEL_OP1_204_93222_20130604_000913_outLine +BABEL_OP1_204_93411_20130128_182958_inLine +BABEL_OP1_204_93411_20130128_182958_outLine +BABEL_OP1_204_93469_20130302_033019_inLine +BABEL_OP1_204_93469_20130302_033019_outLine +BABEL_OP1_204_93490_20130209_033837_inLine +BABEL_OP1_204_93490_20130209_033837_outLine +BABEL_OP1_204_93490_20130209_140440_inLine +BABEL_OP1_204_93490_20130209_140440_outLine +BABEL_OP1_204_93681_20130901_204636_inLine +BABEL_OP1_204_93681_20130901_204636_outLine +BABEL_OP1_204_94442_20130617_164306_inLine +BABEL_OP1_204_94442_20130617_164306_outLine +BABEL_OP1_204_95028_20130518_173442_inLine +BABEL_OP1_204_95028_20130518_173442_outLine +BABEL_OP1_204_95399_20130125_184030_outLine +BABEL_OP1_204_95446_20130225_185013_inLine +BABEL_OP1_204_95446_20130225_185013_outLine +BABEL_OP1_204_95663_20121221_214944_inLine +BABEL_OP1_204_95663_20121221_214944_outLine +BABEL_OP1_204_95942_20130215_204023_inLine +BABEL_OP1_204_95942_20130215_204023_outLine +BABEL_OP1_204_96158_20130721_235954_inLine +BABEL_OP1_204_96158_20130721_235954_outLine +BABEL_OP1_204_96190_20130116_041341_inLine +BABEL_OP1_204_96190_20130116_041341_outLine +BABEL_OP1_204_96247_20130319_165606_inLine +BABEL_OP1_204_96247_20130319_165606_outLine +BABEL_OP1_204_96690_20130129_191200_inLine +BABEL_OP1_204_96690_20130129_191200_outLine +BABEL_OP1_204_96730_20130225_193316_inLine +BABEL_OP1_204_96730_20130225_193316_outLine +BABEL_OP1_204_96808_20130617_185713_inLine +BABEL_OP1_204_96808_20130617_185713_outLine +BABEL_OP1_204_96910_20130115_215424_inLine +BABEL_OP1_204_96910_20130115_215424_outLine +BABEL_OP1_204_97063_20130227_185803_inLine +BABEL_OP1_204_97063_20130227_185803_outLine +BABEL_OP1_204_97063_20130306_232138_inLine +BABEL_OP1_204_97063_20130306_232138_outLine +BABEL_OP1_204_97220_20130310_023745_inLine +BABEL_OP1_204_97220_20130310_023745_outLine +BABEL_OP1_204_97376_20130128_213930_inLine +BABEL_OP1_204_97376_20130128_213930_outLine +BABEL_OP1_204_97461_20130127_014703_inLine +BABEL_OP1_204_97461_20130127_014703_outLine +BABEL_OP1_204_97461_20130127_015849_inLine +BABEL_OP1_204_97461_20130127_015849_outLine +BABEL_OP1_204_97731_20130210_235215_inLine +BABEL_OP1_204_97731_20130210_235215_outLine +BABEL_OP1_204_97772_20121218_224525_inLine +BABEL_OP1_204_97772_20121218_224525_outLine +BABEL_OP1_204_97836_20130220_015139_inLine +BABEL_OP1_204_97836_20130220_015139_outLine +BABEL_OP1_204_98365_20130224_175209_inLine +BABEL_OP1_204_98365_20130224_175209_outLine +BABEL_OP1_204_98565_20130817_171905_inLine +BABEL_OP1_204_98565_20130817_171905_outLine +BABEL_OP1_204_99289_20130215_210617_inLine +BABEL_OP1_204_99289_20130215_210617_outLine +BABEL_OP1_204_99401_20130108_180622_inLine +BABEL_OP1_204_99401_20130108_180622_outLine +BABEL_OP1_204_99594_20130126_192710_inLine +BABEL_OP1_204_99594_20130126_192710_outLine +BABEL_OP1_204_99887_20130210_212207_inLine +BABEL_OP1_204_99887_20130210_212207_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list new file mode 100644 index 00000000000..4c5afd85381 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list @@ -0,0 +1,125 @@ +BABEL_OP1_204_10184_20130217_232154_inLine +BABEL_OP1_204_10184_20130217_232154_outLine +BABEL_OP1_204_11723_20130803_144247_inLine +BABEL_OP1_204_11723_20130803_144247_outLine +BABEL_OP1_204_12220_20130120_183204_inLine +BABEL_OP1_204_12220_20130120_183204_outLine +BABEL_OP1_204_13324_20130103_211640_inLine +BABEL_OP1_204_13324_20130103_211640_outLine +BABEL_OP1_204_13490_20130314_031843_inLine +BABEL_OP1_204_13490_20130314_031843_outLine +BABEL_OP1_204_13792_20121231_015544_inLine +BABEL_OP1_204_13792_20121231_015544_outLine +BABEL_OP1_204_14807_20130222_213831_inLine +BABEL_OP1_204_14807_20130222_213831_outLine +BABEL_OP1_204_16249_20130906_003049_inLine +BABEL_OP1_204_16249_20130906_003049_outLine +BABEL_OP1_204_17032_20130129_012026_inLine +BABEL_OP1_204_17032_20130129_012026_outLine +BABEL_OP1_204_20330_20130217_225055_inLine +BABEL_OP1_204_20330_20130217_225055_outLine +BABEL_OP1_204_20367_20130312_024055_inLine +BABEL_OP1_204_20367_20130312_024055_outLine +BABEL_OP1_204_22321_20130107_231204_inLine +BABEL_OP1_204_22321_20130107_231204_outLine +BABEL_OP1_204_23980_20130127_031636_inLine +BABEL_OP1_204_23980_20130127_031636_outLine +BABEL_OP1_204_24605_20130111_185213_inLine +BABEL_OP1_204_24605_20130111_185213_outLine +BABEL_OP1_204_27042_20130215_015654_inLine +BABEL_OP1_204_27042_20130215_015654_outLine +BABEL_OP1_204_27478_20130219_233409_inLine +BABEL_OP1_204_27478_20130219_233409_outLine +BABEL_OP1_204_27841_20130225_192938_inLine +BABEL_OP1_204_27841_20130225_192938_outLine +BABEL_OP1_204_31728_20130730_183500_inLine +BABEL_OP1_204_31728_20130730_183500_outLine +BABEL_OP1_204_32727_20130224_174507_inLine +BABEL_OP1_204_32727_20130224_174507_outLine +BABEL_OP1_204_33355_20130110_222048_inLine +BABEL_OP1_204_33355_20130110_222048_outLine +BABEL_OP1_204_34713_20130516_164824_inLine +BABEL_OP1_204_34713_20130516_164824_outLine +BABEL_OP1_204_38750_20130208_003349_inLine +BABEL_OP1_204_38750_20130208_003349_outLine +BABEL_OP1_204_39099_20130302_210320_inLine +BABEL_OP1_204_39099_20130302_210320_outLine +BABEL_OP1_204_40196_20130902_001447_inLine +BABEL_OP1_204_40196_20130902_001447_outLine +BABEL_OP1_204_40686_20130704_204726_inLine +BABEL_OP1_204_40686_20130704_204726_outLine +BABEL_OP1_204_41233_20130209_215355_inLine +BABEL_OP1_204_41233_20130209_215355_outLine +BABEL_OP1_204_42942_20130127_014343_inLine +BABEL_OP1_204_42942_20130127_014343_outLine +BABEL_OP1_204_43157_20130514_222203_inLine +BABEL_OP1_204_43157_20130514_222203_outLine +BABEL_OP1_204_43368_20130118_201259_inLine +BABEL_OP1_204_43368_20130118_201259_outLine +BABEL_OP1_204_45121_20130618_153308_inLine +BABEL_OP1_204_45121_20130618_153308_outLine +BABEL_OP1_204_45374_20130906_011341_inLine +BABEL_OP1_204_45374_20130906_011341_outLine +BABEL_OP1_204_45770_20130116_214623_inLine +BABEL_OP1_204_45770_20130116_214623_outLine +BABEL_OP1_204_49027_20130606_142005_inLine +BABEL_OP1_204_49027_20130606_142005_outLine +BABEL_OP1_204_50745_20130216_163145_inLine +BABEL_OP1_204_50745_20130216_163145_outLine +BABEL_OP1_204_53917_20130217_215053_inLine +BABEL_OP1_204_53917_20130217_215053_outLine +BABEL_OP1_204_58026_20130310_194418_inLine +BABEL_OP1_204_58026_20130310_194418_outLine +BABEL_OP1_204_60282_20130815_161243_inLine +BABEL_OP1_204_60282_20130815_161243_outLine +BABEL_OP1_204_63766_20130824_010950_inLine +BABEL_OP1_204_63766_20130824_010950_outLine +BABEL_OP1_204_68924_20130129_165613_inLine +BABEL_OP1_204_68924_20130129_165613_outLine +BABEL_OP1_204_69574_20121218_220812_inLine +BABEL_OP1_204_69574_20121218_220812_outLine +BABEL_OP1_204_70257_20130716_194637_inLine +BABEL_OP1_204_70257_20130716_194637_outLine +BABEL_OP1_204_70257_20130716_195558_inLine +BABEL_OP1_204_70257_20130716_195558_outLine +BABEL_OP1_204_73822_20130515_221842_inLine +BABEL_OP1_204_73822_20130515_221842_outLine +BABEL_OP1_204_74280_20121220_170635_inLine +BABEL_OP1_204_74280_20121220_170635_outLine +BABEL_OP1_204_74280_20121220_172100_inLine +BABEL_OP1_204_74280_20121220_172100_outLine +BABEL_OP1_204_79045_20130213_233402_inLine +BABEL_OP1_204_79045_20130213_233402_outLine +BABEL_OP1_204_79129_20130222_200128_inLine +BABEL_OP1_204_79129_20130222_200128_outLine +BABEL_OP1_204_80241_20130825_143825_inLine +BABEL_OP1_204_80241_20130825_143825_outLine +BABEL_OP1_204_81854_20130303_025438_inLine +BABEL_OP1_204_81854_20130303_025438_outLine +BABEL_OP1_204_83625_20130531_181104_inLine +BABEL_OP1_204_83625_20130531_181104_outLine +BABEL_OP1_204_85048_20130423_000346_inLine +BABEL_OP1_204_85048_20130423_000346_outLine +BABEL_OP1_204_87731_20130523_205109_inLine +BABEL_OP1_204_87731_20130523_205109_outLine +BABEL_OP1_204_89516_20130729_214127_inLine +BABEL_OP1_204_89516_20130729_214127_outLine +BABEL_OP1_204_91319_20130225_184203_inLine +BABEL_OP1_204_91319_20130225_184203_outLine +BABEL_OP1_204_91383_20130702_173202_inLine +BABEL_OP1_204_91383_20130702_173202_outLine +BABEL_OP1_204_91475_20130701_163859_inLine +BABEL_OP1_204_91475_20130701_163859_outLine +BABEL_OP1_204_91606_20130312_032420_inLine +BABEL_OP1_204_91606_20130312_032420_outLine +BABEL_OP1_204_93411_20130128_182958_inLine +BABEL_OP1_204_93411_20130128_182958_outLine +BABEL_OP1_204_95399_20130125_184030_outLine +BABEL_OP1_204_96910_20130115_215424_inLine +BABEL_OP1_204_96910_20130115_215424_outLine +BABEL_OP1_204_97731_20130210_235215_inLine +BABEL_OP1_204_97731_20130210_235215_outLine +BABEL_OP1_204_97836_20130220_015139_inLine +BABEL_OP1_204_97836_20130220_015139_outLine +BABEL_OP1_204_98565_20130817_171905_inLine +BABEL_OP1_204_98565_20130817_171905_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..09510717b52 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list @@ -0,0 +1,653 @@ +BABEL_OP1_204_10002_20130523_142107_inLine +BABEL_OP1_204_10002_20130523_142107_outLine +BABEL_OP1_204_10036_20130116_163652_inLine +BABEL_OP1_204_10036_20130116_163652_outLine +BABEL_OP1_204_10411_20130313_042405_inLine +BABEL_OP1_204_10411_20130313_042405_outLine +BABEL_OP1_204_10469_20130708_201653_inLine +BABEL_OP1_204_10469_20130708_201653_outLine +BABEL_OP1_204_10647_20130225_175457_inLine +BABEL_OP1_204_10647_20130225_175457_outLine +BABEL_OP1_204_10647_20130225_184106_inLine +BABEL_OP1_204_10647_20130225_184106_outLine +BABEL_OP1_204_10901_20130120_220533_inLine +BABEL_OP1_204_10901_20130120_220533_outLine +BABEL_OP1_204_10938_20130118_213056_inLine +BABEL_OP1_204_10938_20130118_213056_outLine +BABEL_OP1_204_10966_20130114_210156_inLine +BABEL_OP1_204_10966_20130114_210156_outLine +BABEL_OP1_204_11310_20130705_180254_inLine +BABEL_OP1_204_11310_20130705_180254_outLine +BABEL_OP1_204_11352_20130220_023807_inLine +BABEL_OP1_204_11352_20130220_023807_outLine +BABEL_OP1_204_11486_20130607_155406_inLine +BABEL_OP1_204_11486_20130607_155406_outLine +BABEL_OP1_204_11581_20130222_215500_inLine +BABEL_OP1_204_11581_20130222_215500_outLine +BABEL_OP1_204_11581_20130222_220101_inLine +BABEL_OP1_204_11581_20130222_220101_outLine +BABEL_OP1_204_11663_20130319_201815_inLine +BABEL_OP1_204_11663_20130319_201815_outLine +BABEL_OP1_204_11673_20121220_214236_inLine +BABEL_OP1_204_11673_20121220_214236_outLine +BABEL_OP1_204_11797_20130107_214732_inLine +BABEL_OP1_204_11797_20130107_214732_outLine +BABEL_OP1_204_12036_20130102_170500_inLine +BABEL_OP1_204_12036_20130102_170500_outLine +BABEL_OP1_204_12036_20130102_171149_inLine +BABEL_OP1_204_12036_20130102_171149_outLine +BABEL_OP1_204_12242_20130111_014802_inLine +BABEL_OP1_204_12242_20130111_014802_outLine +BABEL_OP1_204_12846_20130515_220132_inLine +BABEL_OP1_204_12846_20130515_220132_outLine +BABEL_OP1_204_12851_20121219_172018_inLine +BABEL_OP1_204_12851_20121219_172018_outLine +BABEL_OP1_204_13030_20130120_210514_inLine +BABEL_OP1_204_13184_20130228_032847_inLine +BABEL_OP1_204_13184_20130228_032847_outLine +BABEL_OP1_204_13744_20130106_232543_inLine +BABEL_OP1_204_13744_20130106_232543_outLine +BABEL_OP1_204_13776_20130626_215241_inLine +BABEL_OP1_204_13776_20130626_215241_outLine +BABEL_OP1_204_14719_20130219_231741_inLine +BABEL_OP1_204_14719_20130219_231741_outLine +BABEL_OP1_204_14719_20130219_232513_inLine +BABEL_OP1_204_14719_20130219_232513_outLine +BABEL_OP1_204_14725_20130111_204740_inLine +BABEL_OP1_204_14725_20130111_204740_outLine +BABEL_OP1_204_15730_20130103_154749_inLine +BABEL_OP1_204_15730_20130103_154749_outLine +BABEL_OP1_204_15985_20130627_154935_inLine +BABEL_OP1_204_15985_20130627_154935_outLine +BABEL_OP1_204_16726_20130815_164352_inLine +BABEL_OP1_204_16726_20130815_164352_outLine +BABEL_OP1_204_16800_20130307_025108_inLine +BABEL_OP1_204_16800_20130307_025108_outLine +BABEL_OP1_204_16802_20130821_234724_inLine +BABEL_OP1_204_16802_20130821_234724_outLine +BABEL_OP1_204_16838_20130703_183021_inLine +BABEL_OP1_204_16838_20130703_183021_outLine +BABEL_OP1_204_17420_20130426_172522_inLine +BABEL_OP1_204_17420_20130426_172522_outLine +BABEL_OP1_204_17420_20130426_174314_inLine +BABEL_OP1_204_17420_20130426_174314_outLine +BABEL_OP1_204_17496_20130325_015543_inLine +BABEL_OP1_204_17496_20130325_015543_outLine +BABEL_OP1_204_18037_20130825_200728_inLine +BABEL_OP1_204_18037_20130825_200728_outLine +BABEL_OP1_204_18280_20130818_172915_inLine +BABEL_OP1_204_18280_20130818_172915_outLine +BABEL_OP1_204_18939_20130110_214704_inLine +BABEL_OP1_204_18939_20130110_214704_outLine +BABEL_OP1_204_18992_20130830_001646_inLine +BABEL_OP1_204_18992_20130830_001646_outLine +BABEL_OP1_204_19134_20130120_191037_inLine +BABEL_OP1_204_19134_20130120_191037_outLine +BABEL_OP1_204_19461_20130704_154920_inLine +BABEL_OP1_204_19461_20130704_154920_outLine +BABEL_OP1_204_19589_20130304_020747_inLine +BABEL_OP1_204_19589_20130304_020747_outLine +BABEL_OP1_204_19688_20130708_194740_inLine +BABEL_OP1_204_19688_20130708_194740_outLine +BABEL_OP1_204_20437_20130523_235611_inLine +BABEL_OP1_204_20437_20130523_235611_outLine +BABEL_OP1_204_20721_20130704_183621_inLine +BABEL_OP1_204_20721_20130704_183621_outLine +BABEL_OP1_204_20916_20121218_174604_inLine +BABEL_OP1_204_20916_20121218_174604_outLine +BABEL_OP1_204_20985_20130129_225135_inLine +BABEL_OP1_204_20985_20130129_225135_outLine +BABEL_OP1_204_21426_20130515_212900_inLine +BABEL_OP1_204_21426_20130515_212900_outLine +BABEL_OP1_204_21435_20130215_200722_inLine +BABEL_OP1_204_21435_20130215_200722_outLine +BABEL_OP1_204_21543_20130901_203127_inLine +BABEL_OP1_204_21543_20130901_203127_outLine +BABEL_OP1_204_21807_20130127_033626_inLine +BABEL_OP1_204_21807_20130127_033626_outLine +BABEL_OP1_204_21807_20130127_041609_inLine +BABEL_OP1_204_21807_20130127_041609_outLine +BABEL_OP1_204_22643_20130709_192909_inLine +BABEL_OP1_204_22643_20130709_192909_outLine +BABEL_OP1_204_23006_20130115_200742_inLine +BABEL_OP1_204_23006_20130115_200742_outLine +BABEL_OP1_204_23046_20130114_165057_inLine +BABEL_OP1_204_23046_20130114_165057_outLine +BABEL_OP1_204_23153_20130128_223235_inLine +BABEL_OP1_204_23153_20130128_223235_outLine +BABEL_OP1_204_23190_20130116_191153_inLine +BABEL_OP1_204_23190_20130116_191153_outLine +BABEL_OP1_204_23752_20130517_181521_inLine +BABEL_OP1_204_23752_20130517_181521_outLine +BABEL_OP1_204_24010_20130510_160627_inLine +BABEL_OP1_204_24010_20130510_160627_outLine +BABEL_OP1_204_24221_20130803_162307_inLine +BABEL_OP1_204_24221_20130803_162307_outLine +BABEL_OP1_204_24231_20130702_165725_inLine +BABEL_OP1_204_24231_20130702_165725_outLine +BABEL_OP1_204_24253_20130216_173828_inLine +BABEL_OP1_204_24253_20130216_173828_outLine +BABEL_OP1_204_24587_20130812_201846_inLine +BABEL_OP1_204_24587_20130812_201846_outLine +BABEL_OP1_204_26381_20130906_003653_inLine +BABEL_OP1_204_26381_20130906_003653_outLine +BABEL_OP1_204_26388_20121222_180059_inLine +BABEL_OP1_204_26388_20121222_180059_outLine +BABEL_OP1_204_26478_20130628_163250_inLine +BABEL_OP1_204_26478_20130628_163250_outLine +BABEL_OP1_204_27203_20130123_034459_inLine +BABEL_OP1_204_27203_20130123_034459_outLine +BABEL_OP1_204_28522_20130130_021159_inLine +BABEL_OP1_204_28522_20130130_021159_outLine +BABEL_OP1_204_28595_20130515_165745_inLine +BABEL_OP1_204_28595_20130515_165745_outLine +BABEL_OP1_204_29135_20121226_012303_inLine +BABEL_OP1_204_29404_20130225_222910_inLine +BABEL_OP1_204_29404_20130225_222910_outLine +BABEL_OP1_204_29633_20130219_205935_inLine +BABEL_OP1_204_29633_20130219_205935_outLine +BABEL_OP1_204_29911_20130704_163449_inLine +BABEL_OP1_204_29911_20130704_163449_outLine +BABEL_OP1_204_30013_20130129_224621_inLine +BABEL_OP1_204_30013_20130129_224621_outLine +BABEL_OP1_204_30098_20130302_223148_inLine +BABEL_OP1_204_30098_20130302_223148_outLine +BABEL_OP1_204_30345_20130211_192641_inLine +BABEL_OP1_204_30345_20130211_192641_outLine +BABEL_OP1_204_30432_20130128_194847_inLine +BABEL_OP1_204_30432_20130128_194847_outLine +BABEL_OP1_204_31039_20130817_183417_inLine +BABEL_OP1_204_31039_20130817_183417_outLine +BABEL_OP1_204_31490_20130106_234029_inLine +BABEL_OP1_204_31490_20130106_234029_outLine +BABEL_OP1_204_32097_20121218_192753_inLine +BABEL_OP1_204_32097_20121218_192753_outLine +BABEL_OP1_204_32122_20130119_232805_inLine +BABEL_OP1_204_32122_20130119_232805_outLine +BABEL_OP1_204_32169_20130820_205304_inLine +BABEL_OP1_204_32169_20130820_205304_outLine +BABEL_OP1_204_32244_20130617_175424_inLine +BABEL_OP1_204_32244_20130617_175424_outLine +BABEL_OP1_204_32328_20130218_020809_inLine +BABEL_OP1_204_32328_20130218_020809_outLine +BABEL_OP1_204_33273_20130126_234135_inLine +BABEL_OP1_204_33273_20130126_234135_outLine +BABEL_OP1_204_33424_20130617_192727_inLine +BABEL_OP1_204_33424_20130617_192727_outLine +BABEL_OP1_204_33774_20130601_164240_inLine +BABEL_OP1_204_33774_20130601_164240_outLine +BABEL_OP1_204_33806_20130310_041206_inLine +BABEL_OP1_204_33806_20130310_041206_outLine +BABEL_OP1_204_33913_20130205_155246_inLine +BABEL_OP1_204_34197_20121229_204615_inLine +BABEL_OP1_204_34197_20121229_204615_outLine +BABEL_OP1_204_34486_20130626_205810_inLine +BABEL_OP1_204_34486_20130626_205810_outLine +BABEL_OP1_204_34688_20121231_163152_inLine +BABEL_OP1_204_34811_20130130_015529_inLine +BABEL_OP1_204_34811_20130130_015529_outLine +BABEL_OP1_204_34860_20130524_205736_inLine +BABEL_OP1_204_34860_20130524_205736_outLine +BABEL_OP1_204_35000_20130217_021526_inLine +BABEL_OP1_204_35000_20130217_021526_outLine +BABEL_OP1_204_36293_20130107_173251_inLine +BABEL_OP1_204_36293_20130107_173251_outLine +BABEL_OP1_204_37228_20130224_205648_inLine +BABEL_OP1_204_37228_20130224_205648_outLine +BABEL_OP1_204_38588_20130119_231312_inLine +BABEL_OP1_204_38588_20130119_231312_outLine +BABEL_OP1_204_38664_20130116_202337_inLine +BABEL_OP1_204_38664_20130116_202337_outLine +BABEL_OP1_204_39307_20130104_021512_inLine +BABEL_OP1_204_39307_20130104_021512_outLine +BABEL_OP1_204_39638_20130605_153521_inLine +BABEL_OP1_204_39638_20130605_153521_outLine +BABEL_OP1_204_39848_20130130_204605_inLine +BABEL_OP1_204_39848_20130130_204605_outLine +BABEL_OP1_204_39893_20130313_023055_inLine +BABEL_OP1_204_39893_20130313_023055_outLine +BABEL_OP1_204_40565_20130129_202204_inLine +BABEL_OP1_204_40565_20130129_202204_outLine +BABEL_OP1_204_40648_20130710_170435_inLine +BABEL_OP1_204_40648_20130710_170435_outLine +BABEL_OP1_204_41334_20130311_032651_inLine +BABEL_OP1_204_41334_20130311_032651_outLine +BABEL_OP1_204_41598_20130227_193020_inLine +BABEL_OP1_204_41598_20130227_193020_outLine +BABEL_OP1_204_41720_20130524_184216_inLine +BABEL_OP1_204_41720_20130524_184216_outLine +BABEL_OP1_204_41890_20130227_233410_inLine +BABEL_OP1_204_41890_20130227_233410_outLine +BABEL_OP1_204_41920_20130101_031856_inLine +BABEL_OP1_204_41958_20130120_013639_inLine +BABEL_OP1_204_41958_20130120_013639_outLine +BABEL_OP1_204_41958_20130120_014156_inLine +BABEL_OP1_204_41958_20130120_014156_outLine +BABEL_OP1_204_41958_20130120_015222_inLine +BABEL_OP1_204_41958_20130120_015222_outLine +BABEL_OP1_204_42299_20130613_164705_inLine +BABEL_OP1_204_42299_20130613_164705_outLine +BABEL_OP1_204_42526_20130225_185629_inLine +BABEL_OP1_204_42526_20130225_185629_outLine +BABEL_OP1_204_43286_20130104_031805_inLine +BABEL_OP1_204_43286_20130104_031805_outLine +BABEL_OP1_204_43323_20130523_152627_inLine +BABEL_OP1_204_43323_20130523_152627_outLine +BABEL_OP1_204_43794_20130627_212826_inLine +BABEL_OP1_204_43794_20130627_212826_outLine +BABEL_OP1_204_44347_20130220_035919_inLine +BABEL_OP1_204_44347_20130220_035919_outLine +BABEL_OP1_204_44898_20130705_195912_inLine +BABEL_OP1_204_44898_20130705_195912_outLine +BABEL_OP1_204_45459_20130302_031028_inLine +BABEL_OP1_204_45459_20130302_031028_outLine +BABEL_OP1_204_45699_20130815_000115_inLine +BABEL_OP1_204_45699_20130815_000115_outLine +BABEL_OP1_204_46066_20130226_201734_inLine +BABEL_OP1_204_46066_20130226_201734_outLine +BABEL_OP1_204_46169_20130218_214523_inLine +BABEL_OP1_204_46558_20130103_175101_inLine +BABEL_OP1_204_46558_20130103_175101_outLine +BABEL_OP1_204_46905_20130704_183507_inLine +BABEL_OP1_204_46905_20130704_183507_outLine +BABEL_OP1_204_47156_20130310_000732_inLine +BABEL_OP1_204_47156_20130310_000732_outLine +BABEL_OP1_204_47283_20130102_220157_inLine +BABEL_OP1_204_47283_20130102_220157_outLine +BABEL_OP1_204_47802_20130614_155949_inLine +BABEL_OP1_204_47802_20130614_155949_outLine +BABEL_OP1_204_47823_20130209_191710_inLine +BABEL_OP1_204_47823_20130209_191710_outLine +BABEL_OP1_204_47878_20130128_213649_inLine +BABEL_OP1_204_47878_20130128_213649_outLine +BABEL_OP1_204_47878_20130128_214921_inLine +BABEL_OP1_204_47878_20130128_214921_outLine +BABEL_OP1_204_47923_20130812_172435_inLine +BABEL_OP1_204_47923_20130812_172435_outLine +BABEL_OP1_204_48299_20130531_202054_inLine +BABEL_OP1_204_48299_20130531_202054_outLine +BABEL_OP1_204_48610_20130114_165811_inLine +BABEL_OP1_204_48610_20130114_165811_outLine +BABEL_OP1_204_49768_20130115_220927_inLine +BABEL_OP1_204_49768_20130115_220927_outLine +BABEL_OP1_204_49775_20121219_214712_inLine +BABEL_OP1_204_49912_20130313_040643_inLine +BABEL_OP1_204_49912_20130313_040643_outLine +BABEL_OP1_204_49945_20130624_173403_inLine +BABEL_OP1_204_49945_20130624_173403_outLine +BABEL_OP1_204_50810_20121218_184451_inLine +BABEL_OP1_204_50810_20121218_184451_outLine +BABEL_OP1_204_51156_20130821_223730_inLine +BABEL_OP1_204_51156_20130821_223730_outLine +BABEL_OP1_204_51185_20130517_170655_inLine +BABEL_OP1_204_51185_20130517_170655_outLine +BABEL_OP1_204_51407_20130127_042921_inLine +BABEL_OP1_204_51407_20130127_044800_inLine +BABEL_OP1_204_52301_20130113_034941_inLine +BABEL_OP1_204_52301_20130113_034941_outLine +BABEL_OP1_204_52322_20130524_175752_inLine +BABEL_OP1_204_52322_20130524_175752_outLine +BABEL_OP1_204_52717_20130107_043805_inLine +BABEL_OP1_204_52717_20130107_043805_outLine +BABEL_OP1_204_52803_20130802_163814_inLine +BABEL_OP1_204_52803_20130802_163814_outLine +BABEL_OP1_204_52804_20130103_212424_inLine +BABEL_OP1_204_52804_20130103_212424_outLine +BABEL_OP1_204_53068_20130830_003817_inLine +BABEL_OP1_204_53068_20130830_003817_outLine +BABEL_OP1_204_53144_20130217_224136_inLine +BABEL_OP1_204_53144_20130217_224136_outLine +BABEL_OP1_204_53144_20130217_225527_inLine +BABEL_OP1_204_53144_20130217_225527_outLine +BABEL_OP1_204_53441_20130825_001938_inLine +BABEL_OP1_204_53441_20130825_001938_outLine +BABEL_OP1_204_54066_20130514_211116_inLine +BABEL_OP1_204_54066_20130514_211116_outLine +BABEL_OP1_204_54074_20130131_005828_inLine +BABEL_OP1_204_54074_20130131_005828_outLine +BABEL_OP1_204_54104_20130107_180959_inLine +BABEL_OP1_204_54104_20130107_180959_outLine +BABEL_OP1_204_54162_20130130_185332_inLine +BABEL_OP1_204_54162_20130130_185332_outLine +BABEL_OP1_204_54390_20130104_174530_inLine +BABEL_OP1_204_54390_20130104_174530_outLine +BABEL_OP1_204_54567_20130222_184721_inLine +BABEL_OP1_204_54567_20130222_184721_outLine +BABEL_OP1_204_54594_20130704_191249_inLine +BABEL_OP1_204_54594_20130704_191249_outLine +BABEL_OP1_204_54634_20130626_181537_inLine +BABEL_OP1_204_54634_20130626_181537_outLine +BABEL_OP1_204_54923_20130313_190841_inLine +BABEL_OP1_204_54923_20130313_190841_outLine +BABEL_OP1_204_54923_20130313_192534_inLine +BABEL_OP1_204_54923_20130313_192534_outLine +BABEL_OP1_204_54923_20130313_194117_inLine +BABEL_OP1_204_54923_20130313_194117_outLine +BABEL_OP1_204_55259_20130119_230219_inLine +BABEL_OP1_204_55259_20130119_230219_outLine +BABEL_OP1_204_55815_20130821_003003_inLine +BABEL_OP1_204_55815_20130821_003003_outLine +BABEL_OP1_204_56023_20130216_222455_inLine +BABEL_OP1_204_56023_20130216_222455_outLine +BABEL_OP1_204_56117_20130815_152303_inLine +BABEL_OP1_204_56117_20130815_152303_outLine +BABEL_OP1_204_56326_20130704_194950_inLine +BABEL_OP1_204_56326_20130704_194950_outLine +BABEL_OP1_204_56606_20130730_211609_inLine +BABEL_OP1_204_56606_20130730_211609_outLine +BABEL_OP1_204_56925_20130901_220934_inLine +BABEL_OP1_204_56925_20130901_220934_outLine +BABEL_OP1_204_57067_20130227_191402_outLine +BABEL_OP1_204_57233_20130524_200041_inLine +BABEL_OP1_204_57233_20130524_200041_outLine +BABEL_OP1_204_57782_20130417_212234_inLine +BABEL_OP1_204_57782_20130417_212234_outLine +BABEL_OP1_204_57887_20130705_183438_inLine +BABEL_OP1_204_57887_20130705_183438_outLine +BABEL_OP1_204_58006_20130325_011740_inLine +BABEL_OP1_204_58006_20130325_011740_outLine +BABEL_OP1_204_58103_20130118_221354_inLine +BABEL_OP1_204_58103_20130118_221354_outLine +BABEL_OP1_204_58313_20130127_023416_inLine +BABEL_OP1_204_58313_20130127_023416_outLine +BABEL_OP1_204_58489_20130209_220922_inLine +BABEL_OP1_204_58489_20130209_220922_outLine +BABEL_OP1_204_58489_20130209_221554_inLine +BABEL_OP1_204_58489_20130209_221554_outLine +BABEL_OP1_204_58636_20130812_211303_inLine +BABEL_OP1_204_58636_20130812_211303_outLine +BABEL_OP1_204_58734_20130108_172420_inLine +BABEL_OP1_204_58734_20130108_172420_outLine +BABEL_OP1_204_59028_20130507_123451_inLine +BABEL_OP1_204_59028_20130507_123451_outLine +BABEL_OP1_204_59291_20130719_200731_inLine +BABEL_OP1_204_59291_20130719_200731_outLine +BABEL_OP1_204_59307_20130218_000435_inLine +BABEL_OP1_204_59307_20130218_000435_outLine +BABEL_OP1_204_59307_20130218_001152_inLine +BABEL_OP1_204_59307_20130218_001152_outLine +BABEL_OP1_204_59685_20130812_185114_inLine +BABEL_OP1_204_59685_20130812_185114_outLine +BABEL_OP1_204_59864_20130302_195039_inLine +BABEL_OP1_204_59864_20130302_195039_outLine +BABEL_OP1_204_59928_20130103_190414_inLine +BABEL_OP1_204_59928_20130103_190414_outLine +BABEL_OP1_204_60026_20130107_002905_inLine +BABEL_OP1_204_60026_20130107_002905_outLine +BABEL_OP1_204_60299_20130313_025357_inLine +BABEL_OP1_204_60299_20130313_025357_outLine +BABEL_OP1_204_60299_20130313_030001_inLine +BABEL_OP1_204_60299_20130313_030001_outLine +BABEL_OP1_204_60397_20130822_013145_inLine +BABEL_OP1_204_60397_20130822_013145_outLine +BABEL_OP1_204_60477_20130521_010650_inLine +BABEL_OP1_204_60477_20130521_010650_outLine +BABEL_OP1_204_61190_20130111_183015_inLine +BABEL_OP1_204_61190_20130111_183015_outLine +BABEL_OP1_204_61435_20130217_214434_inLine +BABEL_OP1_204_61435_20130217_214434_outLine +BABEL_OP1_204_61438_20130719_233853_inLine +BABEL_OP1_204_61438_20130719_233853_outLine +BABEL_OP1_204_61731_20130107_035739_inLine +BABEL_OP1_204_61731_20130107_035739_outLine +BABEL_OP1_204_62177_20130719_152209_inLine +BABEL_OP1_204_62177_20130719_152209_outLine +BABEL_OP1_204_62656_20130902_220800_inLine +BABEL_OP1_204_62656_20130902_220800_outLine +BABEL_OP1_204_62734_20130119_222114_inLine +BABEL_OP1_204_62810_20130106_161333_inLine +BABEL_OP1_204_62810_20130106_161333_outLine +BABEL_OP1_204_62976_20130129_174043_inLine +BABEL_OP1_204_62976_20130129_174043_outLine +BABEL_OP1_204_63334_20130729_183108_inLine +BABEL_OP1_204_63334_20130729_183108_outLine +BABEL_OP1_204_63671_20130817_171243_inLine +BABEL_OP1_204_63671_20130817_171243_outLine +BABEL_OP1_204_63730_20130310_032536_inLine +BABEL_OP1_204_63920_20130822_001336_inLine +BABEL_OP1_204_63920_20130822_001336_outLine +BABEL_OP1_204_64065_20130102_231436_inLine +BABEL_OP1_204_64065_20130102_231436_outLine +BABEL_OP1_204_64259_20130610_224356_inLine +BABEL_OP1_204_64259_20130610_224356_outLine +BABEL_OP1_204_64398_20130319_024434_inLine +BABEL_OP1_204_64398_20130319_024434_outLine +BABEL_OP1_204_64469_20130818_174134_inLine +BABEL_OP1_204_64469_20130818_174134_outLine +BABEL_OP1_204_64722_20130215_020559_inLine +BABEL_OP1_204_64722_20130215_020559_outLine +BABEL_OP1_204_65048_20130901_235622_inLine +BABEL_OP1_204_65048_20130901_235622_outLine +BABEL_OP1_204_65268_20130603_220955_inLine +BABEL_OP1_204_65268_20130603_220955_outLine +BABEL_OP1_204_66305_20130218_004015_inLine +BABEL_OP1_204_66305_20130218_004015_outLine +BABEL_OP1_204_66472_20130308_022324_inLine +BABEL_OP1_204_66822_20130121_042919_inLine +BABEL_OP1_204_66822_20130121_042919_outLine +BABEL_OP1_204_66837_20130209_003706_inLine +BABEL_OP1_204_66971_20130617_172242_inLine +BABEL_OP1_204_66971_20130617_172242_outLine +BABEL_OP1_204_67053_20130522_161823_inLine +BABEL_OP1_204_67053_20130522_161823_outLine +BABEL_OP1_204_67283_20130113_013031_inLine +BABEL_OP1_204_67283_20130113_013031_outLine +BABEL_OP1_204_67401_20130222_205647_inLine +BABEL_OP1_204_67401_20130222_205647_outLine +BABEL_OP1_204_67659_20130111_193800_inLine +BABEL_OP1_204_67659_20130111_193800_outLine +BABEL_OP1_204_68384_20130719_175720_inLine +BABEL_OP1_204_68384_20130719_175720_outLine +BABEL_OP1_204_68910_20130816_191414_inLine +BABEL_OP1_204_68910_20130816_191414_outLine +BABEL_OP1_204_69096_20130303_195234_inLine +BABEL_OP1_204_69096_20130303_195234_outLine +BABEL_OP1_204_69937_20130715_192435_inLine +BABEL_OP1_204_69937_20130715_192435_outLine +BABEL_OP1_204_69964_20130704_161248_inLine +BABEL_OP1_204_69964_20130704_161248_outLine +BABEL_OP1_204_69992_20130107_234311_inLine +BABEL_OP1_204_69992_20130107_234311_outLine +BABEL_OP1_204_70216_20130628_200952_inLine +BABEL_OP1_204_70216_20130628_200952_outLine +BABEL_OP1_204_70293_20130902_214220_inLine +BABEL_OP1_204_70293_20130902_214220_outLine +BABEL_OP1_204_70601_20130122_030105_inLine +BABEL_OP1_204_70601_20130122_030105_outLine +BABEL_OP1_204_70794_20121220_222614_inLine +BABEL_OP1_204_70794_20121220_222614_outLine +BABEL_OP1_204_71067_20130319_205826_inLine +BABEL_OP1_204_71067_20130319_205826_outLine +BABEL_OP1_204_71189_20130215_200359_inLine +BABEL_OP1_204_71189_20130215_200359_outLine +BABEL_OP1_204_71976_20130730_180338_inLine +BABEL_OP1_204_71976_20130730_180338_outLine +BABEL_OP1_204_72073_20130823_001235_inLine +BABEL_OP1_204_72073_20130823_001235_outLine +BABEL_OP1_204_72110_20130208_235019_inLine +BABEL_OP1_204_72110_20130208_235019_outLine +BABEL_OP1_204_73549_20130701_155700_inLine +BABEL_OP1_204_73549_20130701_155700_outLine +BABEL_OP1_204_73696_20130310_022514_inLine +BABEL_OP1_204_73696_20130310_022514_outLine +BABEL_OP1_204_74121_20130129_170655_inLine +BABEL_OP1_204_74121_20130129_170655_outLine +BABEL_OP1_204_74763_20130825_175903_inLine +BABEL_OP1_204_74763_20130825_175903_outLine +BABEL_OP1_204_75064_20130111_180636_inLine +BABEL_OP1_204_75064_20130111_180636_outLine +BABEL_OP1_204_75365_20130516_010147_inLine +BABEL_OP1_204_75365_20130516_010147_outLine +BABEL_OP1_204_75975_20130902_224807_inLine +BABEL_OP1_204_75975_20130902_224807_outLine +BABEL_OP1_204_76126_20130217_205227_outLine +BABEL_OP1_204_76238_20130205_022020_inLine +BABEL_OP1_204_76482_20130310_023337_inLine +BABEL_OP1_204_76482_20130310_023337_outLine +BABEL_OP1_204_76730_20130825_010524_inLine +BABEL_OP1_204_76730_20130825_010524_outLine +BABEL_OP1_204_77427_20130116_173650_inLine +BABEL_OP1_204_77427_20130116_173650_outLine +BABEL_OP1_204_77803_20121219_215121_inLine +BABEL_OP1_204_77803_20121219_215121_outLine +BABEL_OP1_204_78016_20130118_223813_inLine +BABEL_OP1_204_78016_20130118_223813_outLine +BABEL_OP1_204_78016_20130118_224939_inLine +BABEL_OP1_204_78016_20130118_224939_outLine +BABEL_OP1_204_78116_20130130_004511_inLine +BABEL_OP1_204_78116_20130130_004511_outLine +BABEL_OP1_204_78254_20130114_224850_inLine +BABEL_OP1_204_78254_20130114_224850_outLine +BABEL_OP1_204_78313_20130223_202010_inLine +BABEL_OP1_204_78313_20130223_202010_outLine +BABEL_OP1_204_78543_20130313_200956_inLine +BABEL_OP1_204_78743_20130210_214804_inLine +BABEL_OP1_204_78743_20130210_214804_outLine +BABEL_OP1_204_78829_20130724_210413_inLine +BABEL_OP1_204_78829_20130724_210413_outLine +BABEL_OP1_204_79080_20130224_194409_inLine +BABEL_OP1_204_79080_20130224_194409_outLine +BABEL_OP1_204_79367_20130110_223433_inLine +BABEL_OP1_204_79367_20130110_223433_outLine +BABEL_OP1_204_79505_20130223_203535_inLine +BABEL_OP1_204_79505_20130223_203535_outLine +BABEL_OP1_204_80069_20130310_201210_inLine +BABEL_OP1_204_80439_20130115_225051_inLine +BABEL_OP1_204_80439_20130115_225051_outLine +BABEL_OP1_204_81213_20130114_221437_inLine +BABEL_OP1_204_81213_20130114_221437_outLine +BABEL_OP1_204_81622_20130130_223905_inLine +BABEL_OP1_204_81622_20130130_223905_outLine +BABEL_OP1_204_81810_20130319_043547_inLine +BABEL_OP1_204_81810_20130319_043547_outLine +BABEL_OP1_204_82425_20130108_181556_inLine +BABEL_OP1_204_82425_20130108_181556_outLine +BABEL_OP1_204_82935_20130208_135243_inLine +BABEL_OP1_204_82935_20130208_135243_outLine +BABEL_OP1_204_82979_20130103_191447_inLine +BABEL_OP1_204_82979_20130103_191447_outLine +BABEL_OP1_204_83394_20130313_005013_inLine +BABEL_OP1_204_83394_20130313_005013_outLine +BABEL_OP1_204_83430_20130603_202255_inLine +BABEL_OP1_204_83430_20130603_202255_outLine +BABEL_OP1_204_83455_20130119_213254_inLine +BABEL_OP1_204_83455_20130119_213254_outLine +BABEL_OP1_204_83771_20130625_172000_inLine +BABEL_OP1_204_83771_20130625_172000_outLine +BABEL_OP1_204_84055_20130228_202242_inLine +BABEL_OP1_204_84055_20130228_202242_outLine +BABEL_OP1_204_84077_20130812_184211_inLine +BABEL_OP1_204_84077_20130812_184211_outLine +BABEL_OP1_204_84430_20130817_164608_inLine +BABEL_OP1_204_84430_20130817_164608_outLine +BABEL_OP1_204_84430_20130901_201534_inLine +BABEL_OP1_204_84430_20130901_201534_outLine +BABEL_OP1_204_84466_20130220_015953_inLine +BABEL_OP1_204_84466_20130220_015953_outLine +BABEL_OP1_204_84583_20130122_032028_outLine +BABEL_OP1_204_84715_20130225_194321_inLine +BABEL_OP1_204_84715_20130225_194321_outLine +BABEL_OP1_204_85010_20130531_160005_inLine +BABEL_OP1_204_85010_20130531_160005_outLine +BABEL_OP1_204_85028_20130301_204938_inLine +BABEL_OP1_204_85028_20130301_222343_inLine +BABEL_OP1_204_85331_20130310_030345_inLine +BABEL_OP1_204_85331_20130310_030345_outLine +BABEL_OP1_204_85331_20130310_033244_inLine +BABEL_OP1_204_85331_20130310_033244_outLine +BABEL_OP1_204_85647_20130120_023041_inLine +BABEL_OP1_204_85647_20130120_023041_outLine +BABEL_OP1_204_86433_20130126_230445_inLine +BABEL_OP1_204_86433_20130126_230445_outLine +BABEL_OP1_204_86715_20130313_002453_inLine +BABEL_OP1_204_86715_20130313_002453_outLine +BABEL_OP1_204_86715_20130313_003416_inLine +BABEL_OP1_204_86715_20130313_003416_outLine +BABEL_OP1_204_86891_20130605_215220_inLine +BABEL_OP1_204_86891_20130605_215220_outLine +BABEL_OP1_204_87073_20121220_221057_inLine +BABEL_OP1_204_87073_20121220_221057_outLine +BABEL_OP1_204_87073_20121220_221600_inLine +BABEL_OP1_204_87073_20121220_221600_outLine +BABEL_OP1_204_87073_20121220_222957_inLine +BABEL_OP1_204_87073_20121220_222957_outLine +BABEL_OP1_204_87305_20130515_233922_inLine +BABEL_OP1_204_87305_20130515_233922_outLine +BABEL_OP1_204_88445_20130129_191832_inLine +BABEL_OP1_204_88445_20130129_191832_outLine +BABEL_OP1_204_88673_20130705_173732_inLine +BABEL_OP1_204_88673_20130705_173732_outLine +BABEL_OP1_204_88865_20130707_151620_inLine +BABEL_OP1_204_88865_20130707_151620_outLine +BABEL_OP1_204_89695_20130130_001218_inLine +BABEL_OP1_204_89695_20130130_001218_outLine +BABEL_OP1_204_89877_20130129_192538_inLine +BABEL_OP1_204_89877_20130129_192538_outLine +BABEL_OP1_204_90347_20130124_030740_inLine +BABEL_OP1_204_90347_20130124_030740_outLine +BABEL_OP1_204_90709_20130107_232337_inLine +BABEL_OP1_204_90709_20130107_232337_outLine +BABEL_OP1_204_91760_20130618_160303_inLine +BABEL_OP1_204_91760_20130618_160303_outLine +BABEL_OP1_204_92605_20130518_145958_inLine +BABEL_OP1_204_92605_20130518_145958_outLine +BABEL_OP1_204_92809_20130116_171026_inLine +BABEL_OP1_204_92809_20130116_171026_outLine +BABEL_OP1_204_92942_20130127_233540_inLine +BABEL_OP1_204_92942_20130127_233540_outLine +BABEL_OP1_204_93222_20130604_000913_inLine +BABEL_OP1_204_93222_20130604_000913_outLine +BABEL_OP1_204_93469_20130302_033019_inLine +BABEL_OP1_204_93469_20130302_033019_outLine +BABEL_OP1_204_93490_20130209_033837_inLine +BABEL_OP1_204_93490_20130209_033837_outLine +BABEL_OP1_204_93490_20130209_140440_inLine +BABEL_OP1_204_93490_20130209_140440_outLine +BABEL_OP1_204_93681_20130901_204636_inLine +BABEL_OP1_204_93681_20130901_204636_outLine +BABEL_OP1_204_94442_20130617_164306_inLine +BABEL_OP1_204_94442_20130617_164306_outLine +BABEL_OP1_204_95028_20130518_173442_inLine +BABEL_OP1_204_95028_20130518_173442_outLine +BABEL_OP1_204_95446_20130225_185013_inLine +BABEL_OP1_204_95446_20130225_185013_outLine +BABEL_OP1_204_95663_20121221_214944_inLine +BABEL_OP1_204_95663_20121221_214944_outLine +BABEL_OP1_204_95942_20130215_204023_inLine +BABEL_OP1_204_95942_20130215_204023_outLine +BABEL_OP1_204_96158_20130721_235954_inLine +BABEL_OP1_204_96158_20130721_235954_outLine +BABEL_OP1_204_96190_20130116_041341_inLine +BABEL_OP1_204_96190_20130116_041341_outLine +BABEL_OP1_204_96247_20130319_165606_inLine +BABEL_OP1_204_96247_20130319_165606_outLine +BABEL_OP1_204_96690_20130129_191200_inLine +BABEL_OP1_204_96690_20130129_191200_outLine +BABEL_OP1_204_96730_20130225_193316_inLine +BABEL_OP1_204_96730_20130225_193316_outLine +BABEL_OP1_204_96808_20130617_185713_inLine +BABEL_OP1_204_96808_20130617_185713_outLine +BABEL_OP1_204_97063_20130227_185803_inLine +BABEL_OP1_204_97063_20130227_185803_outLine +BABEL_OP1_204_97063_20130306_232138_inLine +BABEL_OP1_204_97063_20130306_232138_outLine +BABEL_OP1_204_97220_20130310_023745_inLine +BABEL_OP1_204_97220_20130310_023745_outLine +BABEL_OP1_204_97376_20130128_213930_inLine +BABEL_OP1_204_97376_20130128_213930_outLine +BABEL_OP1_204_97461_20130127_014703_inLine +BABEL_OP1_204_97461_20130127_014703_outLine +BABEL_OP1_204_97461_20130127_015849_inLine +BABEL_OP1_204_97461_20130127_015849_outLine +BABEL_OP1_204_97772_20121218_224525_inLine +BABEL_OP1_204_97772_20121218_224525_outLine +BABEL_OP1_204_98365_20130224_175209_inLine +BABEL_OP1_204_98365_20130224_175209_outLine +BABEL_OP1_204_99289_20130215_210617_inLine +BABEL_OP1_204_99289_20130215_210617_outLine +BABEL_OP1_204_99401_20130108_180622_inLine +BABEL_OP1_204_99401_20130108_180622_outLine +BABEL_OP1_204_99594_20130126_192710_inLine +BABEL_OP1_204_99594_20130126_192710_outLine +BABEL_OP1_204_99887_20130210_212207_inLine +BABEL_OP1_204_99887_20130210_212207_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list b/egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list new file mode 100644 index 00000000000..cacb28a9b83 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list @@ -0,0 +1,269 @@ +BABEL_OP1_204_10416_20130129_214039_outLine +BABEL_OP1_204_10464_20130816_191819_inLine +BABEL_OP1_204_10464_20130816_191819_outLine +BABEL_OP1_204_11528_20130611_211620_inLine +BABEL_OP1_204_11528_20130611_211620_outLine +BABEL_OP1_204_11859_20130313_032533_outLine +BABEL_OP1_204_12767_20130116_025609_outLine +BABEL_OP1_204_13126_20130217_010703_inLine +BABEL_OP1_204_13126_20130217_010703_outLine +BABEL_OP1_204_13178_20130325_020355_outLine +BABEL_OP1_204_13547_20130726_181100_inLine +BABEL_OP1_204_14097_20130815_163903_inLine +BABEL_OP1_204_14097_20130815_163903_outLine +BABEL_OP1_204_14137_20130111_210406_inLine +BABEL_OP1_204_14137_20130111_210406_outLine +BABEL_OP1_204_14560_20130325_002021_outLine +BABEL_OP1_204_14814_20130109_222610_outLine +BABEL_OP1_204_15024_20130312_175432_outLine +BABEL_OP1_204_15024_20130312_180805_outLine +BABEL_OP1_204_15216_20130212_014230_outLine +BABEL_OP1_204_15322_20130301_005753_outLine +BABEL_OP1_204_15869_20130818_163437_inLine +BABEL_OP1_204_15869_20130818_163437_outLine +BABEL_OP1_204_16149_20130116_033842_outLine +BABEL_OP1_204_16475_20130119_031738_outLine +BABEL_OP1_204_16839_20130215_160016_outLine +BABEL_OP1_204_16886_20130121_034643_outLine +BABEL_OP1_204_16938_20130121_204111_outLine +BABEL_OP1_204_17573_20130210_015840_outLine +BABEL_OP1_204_17751_20130313_054734_inLine +BABEL_OP1_204_18297_20130302_224344_inLine +BABEL_OP1_204_18297_20130302_224344_outLine +BABEL_OP1_204_18490_20130729_180159_inLine +BABEL_OP1_204_19120_20130216_232255_inLine +BABEL_OP1_204_19444_20130726_202328_inLine +BABEL_OP1_204_19767_20130227_011601_outLine +BABEL_OP1_204_20922_20130207_205901_inLine +BABEL_OP1_204_20972_20130426_122452_outLine +BABEL_OP1_204_21315_20130310_055422_inLine +BABEL_OP1_204_21624_20130219_005819_outLine +BABEL_OP1_204_22591_20130817_190345_inLine +BABEL_OP1_204_22591_20130817_190345_outLine +BABEL_OP1_204_22612_20130209_232523_outLine +BABEL_OP1_204_22918_20130228_021314_outLine +BABEL_OP1_204_23893_20130223_170306_inLine +BABEL_OP1_204_24209_20130814_213938_inLine +BABEL_OP1_204_24209_20130814_213938_outLine +BABEL_OP1_204_24501_20130217_012457_inLine +BABEL_OP1_204_24532_20121227_175136_outLine +BABEL_OP1_204_24586_20130217_014206_inLine +BABEL_OP1_204_24924_20130311_043001_outLine +BABEL_OP1_204_24982_20121228_191618_outLine +BABEL_OP1_204_25719_20130209_012505_outLine +BABEL_OP1_204_25961_20130107_180739_outLine +BABEL_OP1_204_26072_20130227_193336_inLine +BABEL_OP1_204_26836_20121228_170007_outLine +BABEL_OP1_204_28190_20130209_194352_inLine +BABEL_OP1_204_28190_20130225_194934_inLine +BABEL_OP1_204_28600_20130209_182228_outLine +BABEL_OP1_204_28644_20130724_180414_inLine +BABEL_OP1_204_29230_20130311_030639_outLine +BABEL_OP1_204_29563_20130724_172019_inLine +BABEL_OP1_204_29563_20130724_172019_outLine +BABEL_OP1_204_30253_20130216_045613_outLine +BABEL_OP1_204_31184_20130124_204831_outLine +BABEL_OP1_204_31346_20130216_053626_outLine +BABEL_OP1_204_32148_20130217_164600_inLine +BABEL_OP1_204_32148_20130217_164600_outLine +BABEL_OP1_204_32301_20130129_184613_outLine +BABEL_OP1_204_32861_20130227_173658_outLine +BABEL_OP1_204_32959_20130209_030319_outLine +BABEL_OP1_204_33704_20130226_220031_outLine +BABEL_OP1_204_33933_20130829_222537_inLine +BABEL_OP1_204_33933_20130829_222537_outLine +BABEL_OP1_204_34410_20130611_194642_inLine +BABEL_OP1_204_34410_20130611_194642_outLine +BABEL_OP1_204_34477_20130120_004221_inLine +BABEL_OP1_204_34477_20130120_010034_inLine +BABEL_OP1_204_34826_20130226_192804_inLine +BABEL_OP1_204_34899_20130311_034756_outLine +BABEL_OP1_204_35838_20130725_195132_inLine +BABEL_OP1_204_36341_20130107_025830_outLine +BABEL_OP1_204_36642_20130617_150620_outLine +BABEL_OP1_204_37064_20121227_220816_outLine +BABEL_OP1_204_37285_20130129_031728_inLine +BABEL_OP1_204_37285_20130129_031728_outLine +BABEL_OP1_204_38431_20130205_201344_outLine +BABEL_OP1_204_38878_20130124_001536_inLine +BABEL_OP1_204_40092_20130813_200028_inLine +BABEL_OP1_204_40092_20130813_200028_outLine +BABEL_OP1_204_40740_20130216_225837_outLine +BABEL_OP1_204_41542_20130225_183730_inLine +BABEL_OP1_204_41745_20130116_005714_outLine +BABEL_OP1_204_43115_20130302_023125_inLine +BABEL_OP1_204_43784_20121227_190820_outLine +BABEL_OP1_204_44446_20130523_123230_inLine +BABEL_OP1_204_44446_20130523_123230_outLine +BABEL_OP1_204_44477_20130121_193451_outLine +BABEL_OP1_204_44709_20130319_032435_inLine +BABEL_OP1_204_44709_20130319_032435_outLine +BABEL_OP1_204_45851_20130227_013648_inLine +BABEL_OP1_204_46389_20130814_160916_inLine +BABEL_OP1_204_46389_20130814_160916_outLine +BABEL_OP1_204_46389_20130814_161827_inLine +BABEL_OP1_204_46389_20130814_161827_outLine +BABEL_OP1_204_46808_20130829_232458_inLine +BABEL_OP1_204_46808_20130829_232458_outLine +BABEL_OP1_204_46974_20130129_181636_outLine +BABEL_OP1_204_47110_20130815_155025_inLine +BABEL_OP1_204_47110_20130815_155025_outLine +BABEL_OP1_204_48907_20130228_232925_outLine +BABEL_OP1_204_49001_20121228_172935_outLine +BABEL_OP1_204_49330_20130805_162032_inLine +BABEL_OP1_204_49330_20130805_162032_outLine +BABEL_OP1_204_49739_20130726_173931_inLine +BABEL_OP1_204_50175_20121222_205817_inLine +BABEL_OP1_204_50175_20121222_205817_outLine +BABEL_OP1_204_51015_20130130_013728_outLine +BABEL_OP1_204_51858_20130521_175757_inLine +BABEL_OP1_204_52381_20130224_210437_inLine +BABEL_OP1_204_52381_20130224_210437_outLine +BABEL_OP1_204_52404_20130119_200928_outLine +BABEL_OP1_204_52442_20130120_233503_inLine +BABEL_OP1_204_52442_20130120_233503_outLine +BABEL_OP1_204_52499_20130825_162347_inLine +BABEL_OP1_204_52499_20130825_162347_outLine +BABEL_OP1_204_53842_20130122_230928_outLine +BABEL_OP1_204_54046_20130209_030752_inLine +BABEL_OP1_204_54530_20130217_020357_inLine +BABEL_OP1_204_54953_20130224_201532_inLine +BABEL_OP1_204_55013_20130301_025827_inLine +BABEL_OP1_204_55013_20130301_025827_outLine +BABEL_OP1_204_55902_20130520_170810_inLine +BABEL_OP1_204_56370_20130107_193415_outLine +BABEL_OP1_204_56523_20130120_185614_inLine +BABEL_OP1_204_56523_20130120_185614_outLine +BABEL_OP1_204_56523_20130120_190444_inLine +BABEL_OP1_204_56523_20130120_190444_outLine +BABEL_OP1_204_56523_20130126_235544_inLine +BABEL_OP1_204_56523_20130126_235544_outLine +BABEL_OP1_204_56684_20130208_181923_inLine +BABEL_OP1_204_57141_20130216_214557_inLine +BABEL_OP1_204_57650_20130518_200728_outLine +BABEL_OP1_204_58585_20130225_194900_inLine +BABEL_OP1_204_58585_20130225_194900_outLine +BABEL_OP1_204_59163_20130829_230137_inLine +BABEL_OP1_204_59163_20130829_230137_outLine +BABEL_OP1_204_59549_20130116_224253_outLine +BABEL_OP1_204_59645_20130122_004956_inLine +BABEL_OP1_204_60436_20130303_010341_inLine +BABEL_OP1_204_60436_20130303_010341_outLine +BABEL_OP1_204_60474_20130111_204951_inLine +BABEL_OP1_204_60538_20130107_001630_outLine +BABEL_OP1_204_60626_20130104_165746_outLine +BABEL_OP1_204_61040_20130226_221158_outLine +BABEL_OP1_204_61873_20130128_002947_outLine +BABEL_OP1_204_62200_20130121_031957_inLine +BABEL_OP1_204_62200_20130121_031957_outLine +BABEL_OP1_204_62430_20130221_003525_inLine +BABEL_OP1_204_63604_20121231_023648_inLine +BABEL_OP1_204_63604_20121231_024400_inLine +BABEL_OP1_204_63787_20130115_003402_inLine +BABEL_OP1_204_63787_20130115_003402_outLine +BABEL_OP1_204_64494_20130118_033516_outLine +BABEL_OP1_204_64796_20130101_031431_inLine +BABEL_OP1_204_65477_20130121_031004_inLine +BABEL_OP1_204_66001_20130110_231018_outLine +BABEL_OP1_204_66001_20130110_232622_outLine +BABEL_OP1_204_66959_20130224_165508_inLine +BABEL_OP1_204_67622_20121224_014023_inLine +BABEL_OP1_204_69153_20130304_135528_inLine +BABEL_OP1_204_70452_20130111_164540_inLine +BABEL_OP1_204_70726_20130825_192242_inLine +BABEL_OP1_204_70726_20130825_192242_outLine +BABEL_OP1_204_71047_20130226_192147_inLine +BABEL_OP1_204_71047_20130226_192147_outLine +BABEL_OP1_204_72007_20130130_020438_outLine +BABEL_OP1_204_73258_20130129_221752_inLine +BABEL_OP1_204_73485_20130226_020310_inLine +BABEL_OP1_204_73485_20130226_020310_outLine +BABEL_OP1_204_75764_20130227_174139_inLine +BABEL_OP1_204_75764_20130227_174139_outLine +BABEL_OP1_204_76218_20130131_023737_outLine +BABEL_OP1_204_76444_20130613_172917_inLine +BABEL_OP1_204_76499_20130401_153504_inLine +BABEL_OP1_204_76499_20130401_153504_outLine +BABEL_OP1_204_77112_20130103_183710_outLine +BABEL_OP1_204_77126_20130110_175103_outLine +BABEL_OP1_204_77146_20121224_223748_outLine +BABEL_OP1_204_77567_20130103_223440_outLine +BABEL_OP1_204_78398_20130103_204208_inLine +BABEL_OP1_204_78398_20130103_204208_outLine +BABEL_OP1_204_78544_20130119_181147_outLine +BABEL_OP1_204_78630_20130116_034919_inLine +BABEL_OP1_204_78833_20130726_170037_inLine +BABEL_OP1_204_78943_20130109_215659_outLine +BABEL_OP1_204_79139_20130130_193601_inLine +BABEL_OP1_204_79190_20130127_012553_outLine +BABEL_OP1_204_79451_20121227_183417_outLine +BABEL_OP1_204_79820_20130104_184214_inLine +BABEL_OP1_204_79820_20130104_184214_outLine +BABEL_OP1_204_79973_20130729_211226_inLine +BABEL_OP1_204_79973_20130729_211226_outLine +BABEL_OP1_204_80559_20130103_205745_outLine +BABEL_OP1_204_81671_20130217_195401_outLine +BABEL_OP1_204_81971_20121225_005045_outLine +BABEL_OP1_204_82035_20130120_205546_inLine +BABEL_OP1_204_82138_20130328_213639_inLine +BABEL_OP1_204_82140_20130328_220209_inLine +BABEL_OP1_204_82140_20130328_220209_outLine +BABEL_OP1_204_82622_20121230_013735_inLine +BABEL_OP1_204_82622_20121230_013735_outLine +BABEL_OP1_204_82966_20130217_024614_outLine +BABEL_OP1_204_83609_20130716_211644_inLine +BABEL_OP1_204_83609_20130716_211644_outLine +BABEL_OP1_204_84605_20130114_234516_inLine +BABEL_OP1_204_84609_20130726_193719_inLine +BABEL_OP1_204_84737_20130614_151624_inLine +BABEL_OP1_204_85047_20130328_012807_outLine +BABEL_OP1_204_85260_20130822_000133_inLine +BABEL_OP1_204_85260_20130822_000133_outLine +BABEL_OP1_204_86467_20121224_182636_inLine +BABEL_OP1_204_86467_20121224_182636_outLine +BABEL_OP1_204_86628_20130516_235050_inLine +BABEL_OP1_204_87629_20130122_042941_inLine +BABEL_OP1_204_87629_20130122_042941_outLine +BABEL_OP1_204_87629_20130124_021257_inLine +BABEL_OP1_204_87629_20130124_021257_outLine +BABEL_OP1_204_88674_20130729_204202_inLine +BABEL_OP1_204_89059_20130224_201925_inLine +BABEL_OP1_204_89226_20130825_175510_inLine +BABEL_OP1_204_89226_20130825_175510_outLine +BABEL_OP1_204_89560_20130222_171412_outLine +BABEL_OP1_204_89560_20130222_172629_outLine +BABEL_OP1_204_89575_20130227_020958_outLine +BABEL_OP1_204_90318_20130825_173403_inLine +BABEL_OP1_204_90318_20130825_173403_outLine +BABEL_OP1_204_90572_20130221_011543_inLine +BABEL_OP1_204_90572_20130221_011543_outLine +BABEL_OP1_204_91372_20130311_005543_inLine +BABEL_OP1_204_91478_20130531_193258_inLine +BABEL_OP1_204_91478_20130531_193258_outLine +BABEL_OP1_204_92527_20130119_222341_outLine +BABEL_OP1_204_92792_20130225_210332_inLine +BABEL_OP1_204_92792_20130225_210332_outLine +BABEL_OP1_204_93007_20130628_153139_inLine +BABEL_OP1_204_93007_20130628_153139_outLine +BABEL_OP1_204_93153_20130108_171639_outLine +BABEL_OP1_204_93861_20130120_204242_inLine +BABEL_OP1_204_93861_20130120_210020_inLine +BABEL_OP1_204_94253_20130116_032205_inLine +BABEL_OP1_204_94333_20130110_220709_outLine +BABEL_OP1_204_94449_20130226_025646_outLine +BABEL_OP1_204_94666_20130122_132253_inLine +BABEL_OP1_204_95490_20130112_211544_outLine +BABEL_OP1_204_95677_20130818_153821_inLine +BABEL_OP1_204_95677_20130818_153821_outLine +BABEL_OP1_204_95750_20130830_003827_inLine +BABEL_OP1_204_95750_20130830_003827_outLine +BABEL_OP1_204_95966_20130131_013244_outLine +BABEL_OP1_204_96376_20130311_011036_inLine +BABEL_OP1_204_96405_20130104_164913_inLine +BABEL_OP1_204_96405_20130104_164913_outLine +BABEL_OP1_204_96985_20121231_002917_inLine +BABEL_OP1_204_97097_20130322_004237_inLine +BABEL_OP1_204_97264_20130216_205659_outLine +BABEL_OP1_204_97460_20130126_211058_outLine +BABEL_OP1_204_97557_20130123_172926_inLine +BABEL_OP1_204_99920_20130102_191548_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/dev.list b/egs/babel/s5d/conf/lists/205-kurmanji/dev.list new file mode 100644 index 00000000000..168081362fa --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/dev.list @@ -0,0 +1,132 @@ +BABEL_OP2_205_10019_20130330_212743_inLine +BABEL_OP2_205_10019_20130330_212743_outLine +BABEL_OP2_205_10319_20130304_201724_inLine +BABEL_OP2_205_10319_20130304_201724_outLine +BABEL_OP2_205_11096_20130410_000324_inLine +BABEL_OP2_205_11096_20130410_000324_outLine +BABEL_OP2_205_12036_20130315_061649_inLine +BABEL_OP2_205_12036_20130315_061649_outLine +BABEL_OP2_205_13792_20130307_054343_inLine +BABEL_OP2_205_13792_20130307_054343_outLine +BABEL_OP2_205_14229_20130325_212616_inLine +BABEL_OP2_205_14229_20130325_212616_outLine +BABEL_OP2_205_14440_20130327_213643_inLine +BABEL_OP2_205_14440_20130327_213643_outLine +BABEL_OP2_205_15216_20130406_215019_inLine +BABEL_OP2_205_15216_20130406_215019_outLine +BABEL_OP2_205_15216_20130406_215856_inLine +BABEL_OP2_205_15216_20130406_215856_outLine +BABEL_OP2_205_15638_20130331_200208_inLine +BABEL_OP2_205_15638_20130331_200208_outLine +BABEL_OP2_205_15730_20130303_011735_inLine +BABEL_OP2_205_15730_20130303_011735_outLine +BABEL_OP2_205_15848_20130228_192452_inLine +BABEL_OP2_205_15848_20130228_192452_outLine +BABEL_OP2_205_16056_20130323_010902_inLine +BABEL_OP2_205_16056_20130323_010902_outLine +BABEL_OP2_205_16787_20130323_072114_inLine +BABEL_OP2_205_16787_20130323_072114_outLine +BABEL_OP2_205_17127_20130407_044210_inLine +BABEL_OP2_205_17127_20130407_044210_outLine +BABEL_OP2_205_19663_20130320_062434_inLine +BABEL_OP2_205_19663_20130320_062434_outLine +BABEL_OP2_205_20454_20140125_002855_inLine +BABEL_OP2_205_20454_20140125_002855_outLine +BABEL_OP2_205_21029_20130313_025506_inLine +BABEL_OP2_205_21029_20130313_025506_outLine +BABEL_OP2_205_22288_20131228_021559_inLine +BABEL_OP2_205_22965_20130318_011526_inLine +BABEL_OP2_205_22965_20130318_011526_outLine +BABEL_OP2_205_23151_20130415_001434_inLine +BABEL_OP2_205_23151_20130415_001434_outLine +BABEL_OP2_205_23151_20130415_002727_inLine +BABEL_OP2_205_23151_20130415_002727_outLine +BABEL_OP2_205_23260_20130412_034843_inLine +BABEL_OP2_205_23260_20130412_034843_outLine +BABEL_OP2_205_24589_20130327_211515_inLine +BABEL_OP2_205_24589_20130327_211515_outLine +BABEL_OP2_205_26206_20130507_004626_inLine +BABEL_OP2_205_26206_20130507_004626_outLine +BABEL_OP2_205_26999_20130414_220838_inLine +BABEL_OP2_205_26999_20130414_220838_outLine +BABEL_OP2_205_28190_20130409_034344_inLine +BABEL_OP2_205_28190_20130409_034344_outLine +BABEL_OP2_205_28775_20130314_052506_inLine +BABEL_OP2_205_28775_20130314_052506_outLine +BABEL_OP2_205_28871_20130226_041104_inLine +BABEL_OP2_205_28871_20130226_041104_outLine +BABEL_OP2_205_28945_20130315_053607_inLine +BABEL_OP2_205_28945_20130315_053607_outLine +BABEL_OP2_205_29039_20130401_012825_inLine +BABEL_OP2_205_29039_20130401_012825_outLine +BABEL_OP2_205_29135_20130303_025305_inLine +BABEL_OP2_205_29135_20130303_025305_outLine +BABEL_OP2_205_29633_20130413_192214_inLine +BABEL_OP2_205_29633_20130413_192214_outLine +BABEL_OP2_205_29643_20130408_040750_inLine +BABEL_OP2_205_29643_20130408_040750_outLine +BABEL_OP2_205_29777_20130409_004437_inLine +BABEL_OP2_205_29777_20130409_004437_outLine +BABEL_OP2_205_30653_20130505_220845_inLine +BABEL_OP2_205_30653_20130505_220845_outLine +BABEL_OP2_205_31919_20130413_172911_inLine +BABEL_OP2_205_31919_20130413_172911_outLine +BABEL_OP2_205_33251_20130331_025243_inLine +BABEL_OP2_205_33251_20130331_025243_outLine +BABEL_OP2_205_34336_20130325_005404_inLine +BABEL_OP2_205_34336_20130325_005404_outLine +BABEL_OP2_205_35069_20130407_022433_inLine +BABEL_OP2_205_35069_20130407_022433_outLine +BABEL_OP2_205_35069_20130407_023338_inLine +BABEL_OP2_205_35069_20130407_023338_outLine +BABEL_OP2_205_35583_20130408_183143_inLine +BABEL_OP2_205_35583_20130408_183143_outLine +BABEL_OP2_205_35788_20131231_021724_inLine +BABEL_OP2_205_35788_20131231_021724_outLine +BABEL_OP2_205_36219_20130324_013816_inLine +BABEL_OP2_205_36219_20130324_013816_outLine +BABEL_OP2_205_36219_20130324_015535_inLine +BABEL_OP2_205_36219_20130324_015535_outLine +BABEL_OP2_205_36293_20130302_213235_inLine +BABEL_OP2_205_36293_20130302_213235_outLine +BABEL_OP2_205_41097_20130406_012211_inLine +BABEL_OP2_205_41097_20130406_012211_outLine +BABEL_OP2_205_44868_20130330_223802_inLine +BABEL_OP2_205_44868_20130330_223802_outLine +BABEL_OP2_205_45699_20140126_003136_inLine +BABEL_OP2_205_45699_20140126_003136_outLine +BABEL_OP2_205_46535_20140108_201338_inLine +BABEL_OP2_205_46535_20140108_201338_outLine +BABEL_OP2_205_50565_20130304_002644_inLine +BABEL_OP2_205_50565_20130304_002644_outLine +BABEL_OP2_205_51540_20130407_040411_inLine +BABEL_OP2_205_51540_20130407_040411_outLine +BABEL_OP2_205_51540_20130407_042258_inLine +BABEL_OP2_205_51540_20130407_042258_outLine +BABEL_OP2_205_54046_20130409_011916_inLine +BABEL_OP2_205_54046_20130409_011916_outLine +BABEL_OP2_205_54735_20131228_012336_inLine +BABEL_OP2_205_54735_20131228_012336_outLine +BABEL_OP2_205_60830_20131223_005744_inLine +BABEL_OP2_205_72903_20131225_002056_inLine +BABEL_OP2_205_72903_20131225_002056_outLine +BABEL_OP2_205_77225_20140106_235541_inLine +BABEL_OP2_205_77225_20140106_235541_outLine +BABEL_OP2_205_78360_20140123_011434_inLine +BABEL_OP2_205_78360_20140123_011434_outLine +BABEL_OP2_205_79139_20130621_004019_inLine +BABEL_OP2_205_79139_20130621_004019_outLine +BABEL_OP2_205_86830_20130413_224330_inLine +BABEL_OP2_205_86830_20130413_224330_outLine +BABEL_OP2_205_86830_20130413_225657_inLine +BABEL_OP2_205_86830_20130413_225657_outLine +BABEL_OP2_205_92060_20130413_223434_inLine +BABEL_OP2_205_92060_20130413_223434_outLine +BABEL_OP2_205_92643_20130413_053627_inLine +BABEL_OP2_205_92643_20130413_053627_outLine +BABEL_OP2_205_95399_20131222_015121_inLine +BABEL_OP2_205_95399_20131222_015121_outLine +BABEL_OP2_205_96808_20130412_211621_inLine +BABEL_OP2_205_96808_20130412_211621_outLine +BABEL_OP2_205_97136_20130525_003505_inLine +BABEL_OP2_205_97136_20130525_003505_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/eval.list b/egs/babel/s5d/conf/lists/205-kurmanji/eval.list new file mode 100644 index 00000000000..e0ceeb8f70d --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/eval.list @@ -0,0 +1,193 @@ +BABEL_OP2_205_10188_20130301_060141_inLine +BABEL_OP2_205_10188_20130301_060141_outLine +BABEL_OP2_205_10416_20130623_000709_inLine +BABEL_OP2_205_10416_20130623_000709_outLine +BABEL_OP2_205_11419_20140124_203146_inLine +BABEL_OP2_205_11419_20140124_203146_outLine +BABEL_OP2_205_13040_20130312_094024_inLine +BABEL_OP2_205_13040_20130312_094024_outLine +BABEL_OP2_205_13427_20130315_071728_inLine +BABEL_OP2_205_13427_20130315_071728_outLine +BABEL_OP2_205_13427_20130315_075858_inLine +BABEL_OP2_205_13427_20130315_075858_outLine +BABEL_OP2_205_14179_20130401_220334_inLine +BABEL_OP2_205_14179_20130401_220334_outLine +BABEL_OP2_205_14537_20130413_045331_inLine +BABEL_OP2_205_14537_20130413_045331_outLine +BABEL_OP2_205_14560_20130408_183055_inLine +BABEL_OP2_205_14560_20130408_183055_outLine +BABEL_OP2_205_15702_20130331_230832_inLine +BABEL_OP2_205_15702_20130331_230832_outLine +BABEL_OP2_205_16184_20130227_050048_inLine +BABEL_OP2_205_16184_20130227_050048_outLine +BABEL_OP2_205_16249_20140124_210751_inLine +BABEL_OP2_205_16249_20140124_210751_outLine +BABEL_OP2_205_16407_20140124_214655_inLine +BABEL_OP2_205_16407_20140124_214655_outLine +BABEL_OP2_205_16601_20130415_195023_inLine +BABEL_OP2_205_16601_20130415_195023_outLine +BABEL_OP2_205_17165_20130620_234702_inLine +BABEL_OP2_205_17165_20130620_234702_outLine +BABEL_OP2_205_17573_20130408_175948_inLine +BABEL_OP2_205_17573_20130408_175948_outLine +BABEL_OP2_205_17890_20130507_001713_inLine +BABEL_OP2_205_17890_20130507_001713_outLine +BABEL_OP2_205_18033_20140124_221028_inLine +BABEL_OP2_205_18033_20140124_221028_outLine +BABEL_OP2_205_18370_20140124_223813_inLine +BABEL_OP2_205_18370_20140124_223813_outLine +BABEL_OP2_205_18863_20130412_202349_inLine +BABEL_OP2_205_18863_20130412_202349_outLine +BABEL_OP2_205_19120_20130506_071138_inLine +BABEL_OP2_205_19120_20130506_071138_outLine +BABEL_OP2_205_19832_20130621_222438_inLine +BABEL_OP2_205_19832_20130621_222438_outLine +BABEL_OP2_205_20330_20130413_042945_inLine +BABEL_OP2_205_20330_20130413_042945_outLine +BABEL_OP2_205_22170_20131101_103425_inLine +BABEL_OP2_205_22170_20131101_103425_outLine +BABEL_OP2_205_22466_20130225_225235_inLine +BABEL_OP2_205_22466_20130225_225235_outLine +BABEL_OP2_205_22466_20130225_225943_inLine +BABEL_OP2_205_22466_20130225_225943_outLine +BABEL_OP2_205_22641_20130304_041448_inLine +BABEL_OP2_205_22641_20130304_041448_outLine +BABEL_OP2_205_23395_20130324_223525_inLine +BABEL_OP2_205_23395_20130324_223525_outLine +BABEL_OP2_205_23628_20130326_051335_inLine +BABEL_OP2_205_23628_20130326_051335_outLine +BABEL_OP2_205_24033_20130406_195331_inLine +BABEL_OP2_205_24033_20130406_195331_outLine +BABEL_OP2_205_24209_20140125_012503_inLine +BABEL_OP2_205_24209_20140125_012503_outLine +BABEL_OP2_205_24924_20130612_193640_inLine +BABEL_OP2_205_24924_20130612_193640_outLine +BABEL_OP2_205_25767_20130316_235631_inLine +BABEL_OP2_205_25767_20130316_235631_outLine +BABEL_OP2_205_26869_20140107_231859_inLine +BABEL_OP2_205_26869_20140107_231859_outLine +BABEL_OP2_205_28585_20130406_222735_inLine +BABEL_OP2_205_28585_20130406_222735_outLine +BABEL_OP2_205_29076_20130318_205813_inLine +BABEL_OP2_205_29076_20130318_205813_outLine +BABEL_OP2_205_29482_20140123_203957_inLine +BABEL_OP2_205_29482_20140123_203957_outLine +BABEL_OP2_205_30250_20130303_023602_inLine +BABEL_OP2_205_30250_20130303_023602_outLine +BABEL_OP2_205_30497_20130412_045747_inLine +BABEL_OP2_205_30497_20130412_045747_outLine +BABEL_OP2_205_31484_20130331_231345_inLine +BABEL_OP2_205_31484_20130331_231345_outLine +BABEL_OP2_205_31979_20130319_081826_inLine +BABEL_OP2_205_31979_20130319_081826_outLine +BABEL_OP2_205_32727_20130413_214408_inLine +BABEL_OP2_205_32727_20130413_214408_outLine +BABEL_OP2_205_33800_20140125_192240_inLine +BABEL_OP2_205_33800_20140125_192240_outLine +BABEL_OP2_205_33992_20130519_062650_inLine +BABEL_OP2_205_33992_20130519_062650_outLine +BABEL_OP2_205_34486_20130518_044858_inLine +BABEL_OP2_205_34486_20130518_044858_outLine +BABEL_OP2_205_34899_20130619_000929_inLine +BABEL_OP2_205_34899_20130619_000929_outLine +BABEL_OP2_205_35786_20130522_020532_inLine +BABEL_OP2_205_35786_20130522_020532_outLine +BABEL_OP2_205_36147_20140125_211617_inLine +BABEL_OP2_205_36147_20140125_211617_outLine +BABEL_OP2_205_37064_20130318_004959_inLine +BABEL_OP2_205_37064_20130318_004959_outLine +BABEL_OP2_205_38139_20130622_053934_inLine +BABEL_OP2_205_38139_20130622_053934_outLine +BABEL_OP2_205_38139_20130622_055315_inLine +BABEL_OP2_205_38139_20130622_055315_outLine +BABEL_OP2_205_38750_20130413_172545_inLine +BABEL_OP2_205_38750_20130413_172545_outLine +BABEL_OP2_205_38750_20130413_173308_inLine +BABEL_OP2_205_38750_20130413_173308_outLine +BABEL_OP2_205_39744_20130301_230818_inLine +BABEL_OP2_205_39744_20130301_230818_outLine +BABEL_OP2_205_41400_20140122_205716_inLine +BABEL_OP2_205_41400_20140122_205716_outLine +BABEL_OP2_205_41682_20140125_234229_inLine +BABEL_OP2_205_41682_20140125_234229_outLine +BABEL_OP2_205_42231_20130415_192437_inLine +BABEL_OP2_205_42231_20130415_192437_outLine +BABEL_OP2_205_42600_20130324_234058_inLine +BABEL_OP2_205_42600_20130324_234058_outLine +BABEL_OP2_205_43074_20130622_063932_inLine +BABEL_OP2_205_43074_20130622_063932_outLine +BABEL_OP2_205_43646_20130227_205147_inLine +BABEL_OP2_205_43646_20130227_205147_outLine +BABEL_OP2_205_44420_20130328_013519_inLine +BABEL_OP2_205_44420_20130328_013519_outLine +BABEL_OP2_205_45106_20130330_013041_inLine +BABEL_OP2_205_45106_20130330_013041_outLine +BABEL_OP2_205_45771_20130518_054435_inLine +BABEL_OP2_205_45771_20130518_054435_outLine +BABEL_OP2_205_45777_20130325_205405_inLine +BABEL_OP2_205_45777_20130325_205405_outLine +BABEL_OP2_205_45843_20130330_060240_inLine +BABEL_OP2_205_45843_20130330_060240_outLine +BABEL_OP2_205_45843_20130330_061029_inLine +BABEL_OP2_205_45843_20130330_061029_outLine +BABEL_OP2_205_46712_20130326_222120_inLine +BABEL_OP2_205_46712_20130326_222120_outLine +BABEL_OP2_205_46974_20130506_235400_inLine +BABEL_OP2_205_46974_20130506_235400_outLine +BABEL_OP2_205_46974_20130507_000125_inLine +BABEL_OP2_205_46974_20130507_000125_outLine +BABEL_OP2_205_47959_20130322_204503_inLine +BABEL_OP2_205_47959_20130322_204503_outLine +BABEL_OP2_205_50958_20130318_044644_inLine +BABEL_OP2_205_50958_20130318_044644_outLine +BABEL_OP2_205_50962_20130321_021704_inLine +BABEL_OP2_205_50962_20130321_021704_outLine +BABEL_OP2_205_51417_20130407_001304_inLine +BABEL_OP2_205_51417_20130407_001304_outLine +BABEL_OP2_205_56213_20130508_055436_inLine +BABEL_OP2_205_56213_20130508_055436_outLine +BABEL_OP2_205_56213_20130508_060404_inLine +BABEL_OP2_205_56213_20130508_060404_outLine +BABEL_OP2_205_57067_20130407_183303_inLine +BABEL_OP2_205_57067_20130407_183303_outLine +BABEL_OP2_205_57922_20130331_195052_inLine +BABEL_OP2_205_57922_20130331_195052_outLine +BABEL_OP2_205_60115_20130330_212943_inLine +BABEL_OP2_205_60115_20130330_212943_outLine +BABEL_OP2_205_62362_20130517_212752_inLine +BABEL_OP2_205_62362_20130517_212752_outLine +BABEL_OP2_205_63265_20131226_003348_inLine +BABEL_OP2_205_63265_20131226_003348_outLine +BABEL_OP2_205_63511_20131224_213929_inLine +BABEL_OP2_205_63511_20131224_213929_outLine +BABEL_OP2_205_65252_20130413_190417_inLine +BABEL_OP2_205_65252_20130413_190417_outLine +BABEL_OP2_205_65339_20131225_232144_inLine +BABEL_OP2_205_65339_20131225_232144_outLine +BABEL_OP2_205_70726_20140112_003521_inLine +BABEL_OP2_205_70726_20140112_003521_outLine +BABEL_OP2_205_76902_20140123_211702_inLine +BABEL_OP2_205_76902_20140123_211702_outLine +BABEL_OP2_205_78161_20140124_012828_inLine +BABEL_OP2_205_78161_20140124_012828_outLine +BABEL_OP2_205_78958_20140105_000039_inLine +BABEL_OP2_205_78958_20140105_000039_outLine +BABEL_OP2_205_81229_20130316_035102_inLine +BABEL_OP2_205_81229_20130316_035102_outLine +BABEL_OP2_205_85439_20130413_172716_inLine +BABEL_OP2_205_85439_20130413_172716_outLine +BABEL_OP2_205_90440_20140123_225611_inLine +BABEL_OP2_205_91930_20130413_193923_inLine +BABEL_OP2_205_91930_20130413_193923_outLine +BABEL_OP2_205_92698_20130622_032618_inLine +BABEL_OP2_205_92698_20130622_032618_outLine +BABEL_OP2_205_94141_20140118_223253_inLine +BABEL_OP2_205_94141_20140118_223253_outLine +BABEL_OP2_205_95966_20131224_023420_inLine +BABEL_OP2_205_96584_20130408_014557_inLine +BABEL_OP2_205_96584_20130408_014557_outLine +BABEL_OP2_205_96940_20140123_220447_inLine +BABEL_OP2_205_96940_20140123_220447_outLine +BABEL_OP2_205_97988_20130414_061145_inLine +BABEL_OP2_205_97988_20130414_061145_outLine +BABEL_OP2_205_98580_20131223_014628_inLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list b/egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list new file mode 100644 index 00000000000..ff7234650d1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list @@ -0,0 +1,63 @@ +BABEL_OP2_205_13040_20130312_094024_inLine +BABEL_OP2_205_13040_20130312_094024_outLine +BABEL_OP2_205_13427_20130315_071728_inLine +BABEL_OP2_205_13427_20130315_071728_outLine +BABEL_OP2_205_13427_20130315_075858_inLine +BABEL_OP2_205_13427_20130315_075858_outLine +BABEL_OP2_205_16184_20130227_050048_inLine +BABEL_OP2_205_16184_20130227_050048_outLine +BABEL_OP2_205_17165_20130620_234702_inLine +BABEL_OP2_205_17165_20130620_234702_outLine +BABEL_OP2_205_17573_20130408_175948_inLine +BABEL_OP2_205_17573_20130408_175948_outLine +BABEL_OP2_205_18863_20130412_202349_inLine +BABEL_OP2_205_18863_20130412_202349_outLine +BABEL_OP2_205_19120_20130506_071138_inLine +BABEL_OP2_205_19120_20130506_071138_outLine +BABEL_OP2_205_23628_20130326_051335_inLine +BABEL_OP2_205_23628_20130326_051335_outLine +BABEL_OP2_205_24033_20130406_195331_inLine +BABEL_OP2_205_24033_20130406_195331_outLine +BABEL_OP2_205_24209_20140125_012503_inLine +BABEL_OP2_205_24209_20140125_012503_outLine +BABEL_OP2_205_24924_20130612_193640_inLine +BABEL_OP2_205_24924_20130612_193640_outLine +BABEL_OP2_205_28585_20130406_222735_inLine +BABEL_OP2_205_28585_20130406_222735_outLine +BABEL_OP2_205_30250_20130303_023602_inLine +BABEL_OP2_205_30250_20130303_023602_outLine +BABEL_OP2_205_34899_20130619_000929_inLine +BABEL_OP2_205_34899_20130619_000929_outLine +BABEL_OP2_205_37064_20130318_004959_inLine +BABEL_OP2_205_37064_20130318_004959_outLine +BABEL_OP2_205_38750_20130413_172545_inLine +BABEL_OP2_205_38750_20130413_172545_outLine +BABEL_OP2_205_38750_20130413_173308_inLine +BABEL_OP2_205_38750_20130413_173308_outLine +BABEL_OP2_205_45106_20130330_013041_inLine +BABEL_OP2_205_45106_20130330_013041_outLine +BABEL_OP2_205_45777_20130325_205405_inLine +BABEL_OP2_205_45777_20130325_205405_outLine +BABEL_OP2_205_47959_20130322_204503_inLine +BABEL_OP2_205_47959_20130322_204503_outLine +BABEL_OP2_205_50958_20130318_044644_inLine +BABEL_OP2_205_50958_20130318_044644_outLine +BABEL_OP2_205_57067_20130407_183303_inLine +BABEL_OP2_205_57067_20130407_183303_outLine +BABEL_OP2_205_57922_20130331_195052_inLine +BABEL_OP2_205_57922_20130331_195052_outLine +BABEL_OP2_205_63511_20131224_213929_inLine +BABEL_OP2_205_63511_20131224_213929_outLine +BABEL_OP2_205_65339_20131225_232144_inLine +BABEL_OP2_205_65339_20131225_232144_outLine +BABEL_OP2_205_81229_20130316_035102_inLine +BABEL_OP2_205_81229_20130316_035102_outLine +BABEL_OP2_205_85439_20130413_172716_inLine +BABEL_OP2_205_85439_20130413_172716_outLine +BABEL_OP2_205_91930_20130413_193923_inLine +BABEL_OP2_205_91930_20130413_193923_outLine +BABEL_OP2_205_92698_20130622_032618_inLine +BABEL_OP2_205_92698_20130622_032618_outLine +BABEL_OP2_205_94141_20140118_223253_inLine +BABEL_OP2_205_94141_20140118_223253_outLine +BABEL_OP2_205_98580_20131223_014628_inLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list new file mode 100644 index 00000000000..022ddf05869 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list @@ -0,0 +1,133 @@ +BABEL_OP2_205_10184_20130315_054426_inLine +BABEL_OP2_205_10184_20130315_054426_outLine +BABEL_OP2_205_10647_20130413_190550_inLine +BABEL_OP2_205_10647_20130413_190550_outLine +BABEL_OP2_205_12220_20130323_002310_inLine +BABEL_OP2_205_12220_20130323_002310_outLine +BABEL_OP2_205_14807_20130326_065101_inLine +BABEL_OP2_205_14807_20130326_065101_outLine +BABEL_OP2_205_14807_20130326_070339_inLine +BABEL_OP2_205_14807_20130326_070339_outLine +BABEL_OP2_205_14875_20130319_211742_inLine +BABEL_OP2_205_14875_20130319_211742_outLine +BABEL_OP2_205_14875_20130319_213338_inLine +BABEL_OP2_205_14875_20130319_213338_outLine +BABEL_OP2_205_14929_20131223_022753_inLine +BABEL_OP2_205_15535_20130506_195619_inLine +BABEL_OP2_205_15535_20130506_195619_outLine +BABEL_OP2_205_17881_20130413_190631_inLine +BABEL_OP2_205_17881_20130413_190631_outLine +BABEL_OP2_205_17881_20130413_191638_inLine +BABEL_OP2_205_17881_20130413_191638_outLine +BABEL_OP2_205_17914_20130407_235720_inLine +BABEL_OP2_205_17914_20130407_235720_outLine +BABEL_OP2_205_18766_20130413_033911_inLine +BABEL_OP2_205_18766_20130413_033911_outLine +BABEL_OP2_205_19134_20130331_195936_inLine +BABEL_OP2_205_19134_20130331_195936_outLine +BABEL_OP2_205_19749_20130406_231234_inLine +BABEL_OP2_205_19749_20130406_231234_outLine +BABEL_OP2_205_20800_20130408_015430_inLine +BABEL_OP2_205_20800_20130408_015430_outLine +BABEL_OP2_205_20916_20130228_200116_inLine +BABEL_OP2_205_20916_20130228_200116_outLine +BABEL_OP2_205_21206_20130312_205638_inLine +BABEL_OP2_205_21206_20130312_205638_outLine +BABEL_OP2_205_22321_20130308_042214_inLine +BABEL_OP2_205_22321_20130308_042214_outLine +BABEL_OP2_205_23092_20130413_181637_inLine +BABEL_OP2_205_23092_20130413_181637_outLine +BABEL_OP2_205_23893_20140123_003759_inLine +BABEL_OP2_205_23893_20140123_003759_outLine +BABEL_OP2_205_24239_20130415_171824_inLine +BABEL_OP2_205_24239_20130415_171824_outLine +BABEL_OP2_205_24290_20130414_221432_inLine +BABEL_OP2_205_24290_20130414_221432_outLine +BABEL_OP2_205_24323_20130326_051101_inLine +BABEL_OP2_205_24323_20130326_051101_outLine +BABEL_OP2_205_24605_20130311_012103_inLine +BABEL_OP2_205_24605_20130311_012103_outLine +BABEL_OP2_205_25085_20130612_023620_inLine +BABEL_OP2_205_25085_20130612_023620_outLine +BABEL_OP2_205_26574_20130509_203057_inLine +BABEL_OP2_205_26574_20130509_203057_outLine +BABEL_OP2_205_26602_20130412_235831_inLine +BABEL_OP2_205_26602_20130412_235831_outLine +BABEL_OP2_205_28477_20130412_234819_inLine +BABEL_OP2_205_28477_20130412_234819_outLine +BABEL_OP2_205_28522_20130401_211215_inLine +BABEL_OP2_205_28522_20130401_211215_outLine +BABEL_OP2_205_31039_20140125_023755_inLine +BABEL_OP2_205_31039_20140125_023755_outLine +BABEL_OP2_205_32630_20130412_054815_inLine +BABEL_OP2_205_32630_20130412_054815_outLine +BABEL_OP2_205_33355_20130311_214515_inLine +BABEL_OP2_205_33355_20130311_214515_outLine +BABEL_OP2_205_33840_20130507_012940_inLine +BABEL_OP2_205_33840_20130507_012940_outLine +BABEL_OP2_205_34106_20130301_221919_inLine +BABEL_OP2_205_34106_20130301_221919_outLine +BABEL_OP2_205_34197_20130302_231101_inLine +BABEL_OP2_205_34197_20130302_231101_outLine +BABEL_OP2_205_34647_20140125_205318_inLine +BABEL_OP2_205_34647_20140125_205318_outLine +BABEL_OP2_205_37853_20130413_005407_inLine +BABEL_OP2_205_37853_20130413_005407_outLine +BABEL_OP2_205_38554_20130301_085606_inLine +BABEL_OP2_205_38554_20130301_085606_outLine +BABEL_OP2_205_38664_20130325_030156_inLine +BABEL_OP2_205_38664_20130325_030156_outLine +BABEL_OP2_205_38963_20131227_202341_inLine +BABEL_OP2_205_38963_20131227_202341_outLine +BABEL_OP2_205_39059_20130414_033146_inLine +BABEL_OP2_205_39059_20130414_033146_outLine +BABEL_OP2_205_39059_20130414_034411_inLine +BABEL_OP2_205_39059_20130414_034411_outLine +BABEL_OP2_205_40196_20140125_222906_inLine +BABEL_OP2_205_40196_20140125_222906_outLine +BABEL_OP2_205_41618_20130312_214004_inLine +BABEL_OP2_205_41618_20130312_214004_outLine +BABEL_OP2_205_41741_20130326_004056_inLine +BABEL_OP2_205_41741_20130326_004056_outLine +BABEL_OP2_205_42619_20130325_002736_inLine +BABEL_OP2_205_42619_20130325_002736_outLine +BABEL_OP2_205_43368_20130329_211826_inLine +BABEL_OP2_205_43368_20130329_211826_outLine +BABEL_OP2_205_43368_20130329_212612_inLine +BABEL_OP2_205_43368_20130329_212612_outLine +BABEL_OP2_205_45121_20130412_035841_inLine +BABEL_OP2_205_45121_20130412_035841_outLine +BABEL_OP2_205_46315_20130506_231421_inLine +BABEL_OP2_205_46315_20130506_231421_outLine +BABEL_OP2_205_49118_20130412_210858_inLine +BABEL_OP2_205_49118_20130412_210858_outLine +BABEL_OP2_205_49118_20130412_211622_inLine +BABEL_OP2_205_49118_20130412_211622_outLine +BABEL_OP2_205_50745_20130505_195625_inLine +BABEL_OP2_205_50745_20130505_195625_outLine +BABEL_OP2_205_53415_20131216_223652_inLine +BABEL_OP2_205_53415_20131216_223652_outLine +BABEL_OP2_205_58026_20131219_010750_inLine +BABEL_OP2_205_58026_20131219_010750_outLine +BABEL_OP2_205_58821_20130415_190958_inLine +BABEL_OP2_205_58821_20130415_190958_outLine +BABEL_OP2_205_59645_20130619_190548_inLine +BABEL_OP2_205_59645_20130619_190548_outLine +BABEL_OP2_205_62200_20130405_021524_inLine +BABEL_OP2_205_62200_20130405_021524_outLine +BABEL_OP2_205_62289_20140122_214709_inLine +BABEL_OP2_205_62289_20140122_214709_outLine +BABEL_OP2_205_70716_20130413_193114_inLine +BABEL_OP2_205_70716_20130413_193114_outLine +BABEL_OP2_205_77242_20130616_015950_inLine +BABEL_OP2_205_77242_20130616_015950_outLine +BABEL_OP2_205_84605_20130319_203823_inLine +BABEL_OP2_205_84605_20130319_203823_outLine +BABEL_OP2_205_84737_20130407_054058_inLine +BABEL_OP2_205_84737_20130407_054058_outLine +BABEL_OP2_205_84936_20130405_063301_inLine +BABEL_OP2_205_84936_20130405_063301_outLine +BABEL_OP2_205_86826_20130411_224207_inLine +BABEL_OP2_205_86826_20130411_224207_outLine +BABEL_OP2_205_90760_20130612_022556_inLine +BABEL_OP2_205_90760_20130612_022556_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list new file mode 100644 index 00000000000..89ee0b28779 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list @@ -0,0 +1,399 @@ +BABEL_OP2_205_10036_20130325_212656_inLine +BABEL_OP2_205_10036_20130325_212656_outLine +BABEL_OP2_205_10482_20130330_232812_inLine +BABEL_OP2_205_10482_20130330_232812_outLine +BABEL_OP2_205_10638_20140122_201207_inLine +BABEL_OP2_205_10638_20140122_201207_outLine +BABEL_OP2_205_10938_20130402_021742_inLine +BABEL_OP2_205_10938_20130402_021742_outLine +BABEL_OP2_205_10966_20130324_203837_inLine +BABEL_OP2_205_10966_20130324_203837_outLine +BABEL_OP2_205_11352_20130505_190427_inLine +BABEL_OP2_205_11352_20130505_190427_outLine +BABEL_OP2_205_11581_20130317_071927_inLine +BABEL_OP2_205_11581_20130317_071927_outLine +BABEL_OP2_205_11663_20130402_031747_inLine +BABEL_OP2_205_11663_20130402_031747_outLine +BABEL_OP2_205_11797_20130307_233702_inLine +BABEL_OP2_205_11797_20130307_233702_outLine +BABEL_OP2_205_11797_20130307_235053_inLine +BABEL_OP2_205_11797_20130307_235053_outLine +BABEL_OP2_205_12635_20130406_230527_inLine +BABEL_OP2_205_12635_20130406_230527_outLine +BABEL_OP2_205_13030_20130330_234019_inLine +BABEL_OP2_205_13030_20130330_234019_outLine +BABEL_OP2_205_13189_20130413_230649_inLine +BABEL_OP2_205_13189_20130413_230649_outLine +BABEL_OP2_205_13324_20130318_043359_inLine +BABEL_OP2_205_13324_20130318_043359_outLine +BABEL_OP2_205_13744_20130302_055938_inLine +BABEL_OP2_205_13744_20130302_055938_outLine +BABEL_OP2_205_14137_20130326_212737_inLine +BABEL_OP2_205_14137_20130326_212737_outLine +BABEL_OP2_205_14539_20130413_020822_inLine +BABEL_OP2_205_14539_20130413_020822_outLine +BABEL_OP2_205_14729_20130526_024319_inLine +BABEL_OP2_205_14729_20130526_024319_outLine +BABEL_OP2_205_14814_20130326_062123_inLine +BABEL_OP2_205_14814_20130326_062123_outLine +BABEL_OP2_205_14899_20130303_062436_inLine +BABEL_OP2_205_14899_20130303_062436_outLine +BABEL_OP2_205_14972_20130312_213702_inLine +BABEL_OP2_205_14972_20130312_213702_outLine +BABEL_OP2_205_15024_20131222_033424_inLine +BABEL_OP2_205_15024_20131222_033424_outLine +BABEL_OP2_205_15227_20130412_005202_inLine +BABEL_OP2_205_15227_20130412_005202_outLine +BABEL_OP2_205_15382_20130325_075405_inLine +BABEL_OP2_205_15382_20130325_075405_outLine +BABEL_OP2_205_16839_20130407_052530_inLine +BABEL_OP2_205_16839_20130407_052530_outLine +BABEL_OP2_205_16886_20130326_054927_inLine +BABEL_OP2_205_16886_20130326_054927_outLine +BABEL_OP2_205_16924_20130331_232254_inLine +BABEL_OP2_205_16924_20130331_232254_outLine +BABEL_OP2_205_17320_20130413_054847_inLine +BABEL_OP2_205_17320_20130413_054847_outLine +BABEL_OP2_205_17440_20130413_195207_inLine +BABEL_OP2_205_17440_20130413_195207_outLine +BABEL_OP2_205_17472_20130508_013928_inLine +BABEL_OP2_205_17472_20130508_013928_outLine +BABEL_OP2_205_17496_20130414_215325_inLine +BABEL_OP2_205_17496_20130414_215325_outLine +BABEL_OP2_205_17520_20130312_074120_inLine +BABEL_OP2_205_17520_20130312_074120_outLine +BABEL_OP2_205_17615_20130407_234405_inLine +BABEL_OP2_205_17615_20130407_234405_outLine +BABEL_OP2_205_18242_20130408_005657_inLine +BABEL_OP2_205_18242_20130408_005657_outLine +BABEL_OP2_205_18291_20130618_004811_inLine +BABEL_OP2_205_18291_20130618_004811_outLine +BABEL_OP2_205_18566_20130505_173829_inLine +BABEL_OP2_205_18566_20130505_173829_outLine +BABEL_OP2_205_19589_20130413_203154_inLine +BABEL_OP2_205_19589_20130413_203154_outLine +BABEL_OP2_205_19703_20130325_042858_inLine +BABEL_OP2_205_19703_20130325_042858_outLine +BABEL_OP2_205_19722_20130306_045231_inLine +BABEL_OP2_205_19722_20130306_045231_outLine +BABEL_OP2_205_20133_20130228_055409_inLine +BABEL_OP2_205_20133_20130228_055409_outLine +BABEL_OP2_205_20922_20130406_225439_inLine +BABEL_OP2_205_20922_20130406_225439_outLine +BABEL_OP2_205_20985_20130401_025757_inLine +BABEL_OP2_205_20985_20130401_025757_outLine +BABEL_OP2_205_21004_20130408_222653_inLine +BABEL_OP2_205_21004_20130408_222653_outLine +BABEL_OP2_205_21435_20130414_044944_inLine +BABEL_OP2_205_21435_20130414_044944_outLine +BABEL_OP2_205_21543_20140125_004741_inLine +BABEL_OP2_205_21543_20140125_004741_outLine +BABEL_OP2_205_21807_20130324_054526_inLine +BABEL_OP2_205_21892_20130507_023354_inLine +BABEL_OP2_205_21892_20130507_023354_outLine +BABEL_OP2_205_22446_20130309_073946_outLine +BABEL_OP2_205_22494_20130331_230611_inLine +BABEL_OP2_205_22494_20130331_230611_outLine +BABEL_OP2_205_22624_20130331_012106_inLine +BABEL_OP2_205_22624_20130331_012106_outLine +BABEL_OP2_205_22629_20131231_223232_inLine +BABEL_OP2_205_22629_20131231_223232_outLine +BABEL_OP2_205_22918_20130413_043023_inLine +BABEL_OP2_205_22918_20130413_043023_outLine +BABEL_OP2_205_22918_20130413_044543_inLine +BABEL_OP2_205_22918_20130413_044543_outLine +BABEL_OP2_205_23006_20130322_202429_inLine +BABEL_OP2_205_23006_20130322_202429_outLine +BABEL_OP2_205_23046_20130327_010653_inLine +BABEL_OP2_205_23046_20130327_010653_outLine +BABEL_OP2_205_23190_20130323_014750_inLine +BABEL_OP2_205_23190_20130323_014750_outLine +BABEL_OP2_205_23239_20130331_034518_inLine +BABEL_OP2_205_23239_20130331_034518_outLine +BABEL_OP2_205_23752_20140123_024924_inLine +BABEL_OP2_205_23752_20140123_024924_outLine +BABEL_OP2_205_24253_20130505_214600_inLine +BABEL_OP2_205_24253_20130505_214600_outLine +BABEL_OP2_205_24270_20130406_070358_inLine +BABEL_OP2_205_24270_20130406_070358_outLine +BABEL_OP2_205_24470_20130406_021646_inLine +BABEL_OP2_205_24470_20130406_021646_outLine +BABEL_OP2_205_24532_20130227_052040_inLine +BABEL_OP2_205_24532_20130227_052040_outLine +BABEL_OP2_205_24569_20130508_235213_inLine +BABEL_OP2_205_24569_20130508_235213_outLine +BABEL_OP2_205_24679_20130303_043753_inLine +BABEL_OP2_205_24679_20130303_043753_outLine +BABEL_OP2_205_25719_20130406_231631_inLine +BABEL_OP2_205_25719_20130406_231631_outLine +BABEL_OP2_205_25719_20130406_232555_inLine +BABEL_OP2_205_25719_20130406_232555_outLine +BABEL_OP2_205_25719_20130406_233313_inLine +BABEL_OP2_205_25719_20130406_233313_outLine +BABEL_OP2_205_25961_20130305_063202_inLine +BABEL_OP2_205_25961_20130305_063202_outLine +BABEL_OP2_205_26381_20140125_015707_inLine +BABEL_OP2_205_26381_20140125_015707_outLine +BABEL_OP2_205_26388_20130330_021001_inLine +BABEL_OP2_205_26388_20130330_021001_outLine +BABEL_OP2_205_26507_20131101_103425_inLine +BABEL_OP2_205_26507_20131101_103425_outLine +BABEL_OP2_205_27125_20130227_061700_inLine +BABEL_OP2_205_27125_20130227_061700_outLine +BABEL_OP2_205_27189_20140104_001032_inLine +BABEL_OP2_205_27189_20140104_001032_outLine +BABEL_OP2_205_27203_20130331_021946_inLine +BABEL_OP2_205_27203_20130331_021946_outLine +BABEL_OP2_205_27590_20130506_201921_inLine +BABEL_OP2_205_27590_20130506_201921_outLine +BABEL_OP2_205_27841_20130414_222155_inLine +BABEL_OP2_205_27841_20130414_222155_outLine +BABEL_OP2_205_28012_20130507_054019_inLine +BABEL_OP2_205_28012_20130507_054019_outLine +BABEL_OP2_205_28419_20130320_202136_inLine +BABEL_OP2_205_28419_20130320_202136_outLine +BABEL_OP2_205_29023_20130314_060343_inLine +BABEL_OP2_205_29023_20130314_060343_outLine +BABEL_OP2_205_29323_20130414_230355_inLine +BABEL_OP2_205_29323_20130414_230355_outLine +BABEL_OP2_205_29404_20130414_214714_inLine +BABEL_OP2_205_29404_20130414_214714_outLine +BABEL_OP2_205_29439_20130413_182356_inLine +BABEL_OP2_205_29439_20130413_182356_outLine +BABEL_OP2_205_30013_20130401_005939_inLine +BABEL_OP2_205_30013_20130401_005939_outLine +BABEL_OP2_205_30180_20130323_005331_inLine +BABEL_OP2_205_30180_20130323_005331_outLine +BABEL_OP2_205_30395_20130316_060814_inLine +BABEL_OP2_205_30395_20130316_060814_outLine +BABEL_OP2_205_30432_20130330_200303_inLine +BABEL_OP2_205_30432_20130330_200303_outLine +BABEL_OP2_205_30869_20130412_202311_inLine +BABEL_OP2_205_30869_20130412_202311_outLine +BABEL_OP2_205_31109_20130619_181905_inLine +BABEL_OP2_205_31109_20130619_181905_outLine +BABEL_OP2_205_31346_20130507_204621_inLine +BABEL_OP2_205_31346_20130507_204621_outLine +BABEL_OP2_205_32097_20130301_034527_inLine +BABEL_OP2_205_32097_20130301_034527_outLine +BABEL_OP2_205_32122_20130321_004623_inLine +BABEL_OP2_205_32122_20130321_004623_outLine +BABEL_OP2_205_32122_20130321_010341_inLine +BABEL_OP2_205_32122_20130321_010341_outLine +BABEL_OP2_205_32244_20130412_190534_inLine +BABEL_OP2_205_32244_20130412_190534_outLine +BABEL_OP2_205_32837_20130507_011223_inLine +BABEL_OP2_205_32837_20130507_011223_outLine +BABEL_OP2_205_33229_20130414_213157_inLine +BABEL_OP2_205_33229_20130414_213157_outLine +BABEL_OP2_205_33273_20130320_040141_inLine +BABEL_OP2_205_33273_20130320_040141_outLine +BABEL_OP2_205_33424_20130412_193538_inLine +BABEL_OP2_205_33424_20130412_193538_outLine +BABEL_OP2_205_33476_20130405_051711_inLine +BABEL_OP2_205_33476_20130405_051711_outLine +BABEL_OP2_205_33497_20130619_220728_inLine +BABEL_OP2_205_33497_20130619_220728_outLine +BABEL_OP2_205_33913_20130414_052534_inLine +BABEL_OP2_205_33913_20130414_052534_outLine +BABEL_OP2_205_33951_20130619_212409_inLine +BABEL_OP2_205_33951_20130619_212409_outLine +BABEL_OP2_205_34586_20140125_203417_inLine +BABEL_OP2_205_34586_20140125_203417_outLine +BABEL_OP2_205_34903_20130406_055051_inLine +BABEL_OP2_205_34903_20130406_055051_outLine +BABEL_OP2_205_35139_20130312_070415_inLine +BABEL_OP2_205_35139_20130312_070415_outLine +BABEL_OP2_205_35143_20130414_203900_inLine +BABEL_OP2_205_35143_20130414_203900_outLine +BABEL_OP2_205_35181_20130413_201739_inLine +BABEL_OP2_205_35181_20130413_201739_outLine +BABEL_OP2_205_36642_20130413_013238_inLine +BABEL_OP2_205_36642_20130413_013238_outLine +BABEL_OP2_205_37228_20130407_205807_inLine +BABEL_OP2_205_37228_20130407_205807_outLine +BABEL_OP2_205_37271_20130507_231712_inLine +BABEL_OP2_205_37271_20130507_231712_outLine +BABEL_OP2_205_37285_20130401_061737_inLine +BABEL_OP2_205_37285_20130401_061737_outLine +BABEL_OP2_205_37290_20130405_070403_inLine +BABEL_OP2_205_37290_20130405_070403_outLine +BABEL_OP2_205_37598_20130405_034853_inLine +BABEL_OP2_205_37598_20130405_034853_outLine +BABEL_OP2_205_37682_20130325_022952_inLine +BABEL_OP2_205_37682_20130325_022952_outLine +BABEL_OP2_205_37776_20140125_220835_inLine +BABEL_OP2_205_37776_20140125_220835_outLine +BABEL_OP2_205_38340_20130315_063442_inLine +BABEL_OP2_205_38340_20130315_063442_outLine +BABEL_OP2_205_38689_20130414_233704_inLine +BABEL_OP2_205_38689_20130414_233704_outLine +BABEL_OP2_205_38741_20130315_071146_inLine +BABEL_OP2_205_38741_20130315_071146_outLine +BABEL_OP2_205_38878_20130406_202135_inLine +BABEL_OP2_205_38878_20130406_202135_outLine +BABEL_OP2_205_39555_20130507_025010_inLine +BABEL_OP2_205_39555_20130507_025010_outLine +BABEL_OP2_205_40557_20130413_185709_inLine +BABEL_OP2_205_40557_20130413_185709_outLine +BABEL_OP2_205_40557_20130413_190849_inLine +BABEL_OP2_205_40557_20130413_190849_outLine +BABEL_OP2_205_40565_20130401_015506_inLine +BABEL_OP2_205_40565_20130401_015506_outLine +BABEL_OP2_205_40939_20140125_231452_inLine +BABEL_OP2_205_40939_20140125_231452_outLine +BABEL_OP2_205_41038_20130405_060002_inLine +BABEL_OP2_205_41038_20130405_060002_outLine +BABEL_OP2_205_41174_20130318_033313_inLine +BABEL_OP2_205_41174_20130318_033313_outLine +BABEL_OP2_205_42834_20130414_202256_inLine +BABEL_OP2_205_42834_20130414_202256_outLine +BABEL_OP2_205_42991_20130401_024013_inLine +BABEL_OP2_205_42991_20130401_024013_outLine +BABEL_OP2_205_42991_20130401_025044_inLine +BABEL_OP2_205_42991_20130401_025044_outLine +BABEL_OP2_205_43286_20130304_044510_inLine +BABEL_OP2_205_43286_20130304_044510_outLine +BABEL_OP2_205_43788_20130331_024429_inLine +BABEL_OP2_205_43788_20130331_024429_outLine +BABEL_OP2_205_43788_20130331_030508_inLine +BABEL_OP2_205_43788_20130331_030508_outLine +BABEL_OP2_205_44847_20130325_055635_inLine +BABEL_OP2_205_44847_20130325_055635_outLine +BABEL_OP2_205_45235_20130509_011826_inLine +BABEL_OP2_205_45235_20130509_011826_outLine +BABEL_OP2_205_45374_20140126_000904_inLine +BABEL_OP2_205_45374_20140126_000904_outLine +BABEL_OP2_205_46041_20130507_202255_inLine +BABEL_OP2_205_46041_20130507_202255_outLine +BABEL_OP2_205_46589_20130331_014535_inLine +BABEL_OP2_205_46589_20130331_014535_outLine +BABEL_OP2_205_46757_20130401_191649_inLine +BABEL_OP2_205_46757_20130401_191649_outLine +BABEL_OP2_205_46976_20131104_051409_inLine +BABEL_OP2_205_46976_20131104_051409_outLine +BABEL_OP2_205_47110_20140126_005953_inLine +BABEL_OP2_205_47110_20140126_005953_outLine +BABEL_OP2_205_47451_20130408_195325_inLine +BABEL_OP2_205_47451_20130408_195325_outLine +BABEL_OP2_205_47487_20130328_060026_inLine +BABEL_OP2_205_47487_20130328_060026_outLine +BABEL_OP2_205_47823_20130330_204952_inLine +BABEL_OP2_205_47823_20130330_204952_outLine +BABEL_OP2_205_47878_20130319_211057_inLine +BABEL_OP2_205_47878_20130319_211057_outLine +BABEL_OP2_205_48422_20130407_020759_inLine +BABEL_OP2_205_48422_20130407_020759_outLine +BABEL_OP2_205_49287_20130327_053930_inLine +BABEL_OP2_205_49287_20130327_053930_outLine +BABEL_OP2_205_49630_20130401_013908_inLine +BABEL_OP2_205_49630_20130401_013908_outLine +BABEL_OP2_205_49768_20130330_025558_inLine +BABEL_OP2_205_49768_20130330_025558_outLine +BABEL_OP2_205_50186_20140126_012415_inLine +BABEL_OP2_205_50186_20140126_012415_outLine +BABEL_OP2_205_50779_20130320_043549_inLine +BABEL_OP2_205_50779_20130320_043549_outLine +BABEL_OP2_205_50779_20130320_044244_inLine +BABEL_OP2_205_50779_20130320_044244_outLine +BABEL_OP2_205_51015_20130401_202255_inLine +BABEL_OP2_205_51015_20130401_202255_outLine +BABEL_OP2_205_52246_20130323_232916_inLine +BABEL_OP2_205_52246_20130323_232916_outLine +BABEL_OP2_205_52490_20130326_051608_inLine +BABEL_OP2_205_52490_20130326_051608_outLine +BABEL_OP2_205_53063_20130508_051415_inLine +BABEL_OP2_205_53063_20130508_051415_outLine +BABEL_OP2_205_53441_20140126_015538_inLine +BABEL_OP2_205_53441_20140126_015538_outLine +BABEL_OP2_205_53758_20131228_000238_inLine +BABEL_OP2_205_53758_20131228_000238_outLine +BABEL_OP2_205_54104_20130323_222459_inLine +BABEL_OP2_205_54104_20130323_222459_outLine +BABEL_OP2_205_54827_20130414_030516_inLine +BABEL_OP2_205_54827_20130414_030516_outLine +BABEL_OP2_205_54841_20130414_225855_inLine +BABEL_OP2_205_54841_20130414_225855_outLine +BABEL_OP2_205_54953_20130317_013652_inLine +BABEL_OP2_205_54953_20130317_013652_outLine +BABEL_OP2_205_56198_20130321_041358_inLine +BABEL_OP2_205_56198_20130321_041358_outLine +BABEL_OP2_205_56925_20140126_023234_inLine +BABEL_OP2_205_56925_20140126_023234_outLine +BABEL_OP2_205_57065_20130407_232501_inLine +BABEL_OP2_205_57065_20130407_232501_outLine +BABEL_OP2_205_57678_20130323_232415_inLine +BABEL_OP2_205_57678_20130323_232415_outLine +BABEL_OP2_205_57935_20130322_224501_inLine +BABEL_OP2_205_57935_20130322_224501_outLine +BABEL_OP2_205_59078_20130406_075721_inLine +BABEL_OP2_205_59078_20130406_075721_outLine +BABEL_OP2_205_59635_20130406_225014_inLine +BABEL_OP2_205_59635_20130406_225014_outLine +BABEL_OP2_205_60282_20140107_024858_inLine +BABEL_OP2_205_60282_20140107_024858_outLine +BABEL_OP2_205_60436_20130413_200129_inLine +BABEL_OP2_205_60436_20130413_200129_outLine +BABEL_OP2_205_61440_20130411_231312_inLine +BABEL_OP2_205_61440_20130411_231312_outLine +BABEL_OP2_205_61971_20130413_052620_inLine +BABEL_OP2_205_61971_20130413_052620_outLine +BABEL_OP2_205_62014_20130329_225214_inLine +BABEL_OP2_205_62014_20130329_225214_outLine +BABEL_OP2_205_62360_20140122_233956_inLine +BABEL_OP2_205_62810_20130304_075632_inLine +BABEL_OP2_205_62810_20130304_075632_outLine +BABEL_OP2_205_63084_20130405_025236_inLine +BABEL_OP2_205_63084_20130405_025236_outLine +BABEL_OP2_205_63787_20130310_001339_inLine +BABEL_OP2_205_63787_20130310_001339_outLine +BABEL_OP2_205_63920_20131226_014831_inLine +BABEL_OP2_205_64688_20131226_232545_inLine +BABEL_OP2_205_64688_20131226_232545_outLine +BABEL_OP2_205_66971_20130413_002731_inLine +BABEL_OP2_205_66971_20130413_002731_outLine +BABEL_OP2_205_67964_20140122_221653_inLine +BABEL_OP2_205_67964_20140122_221653_outLine +BABEL_OP2_205_68289_20130407_225726_inLine +BABEL_OP2_205_68289_20130407_225726_outLine +BABEL_OP2_205_68748_20130330_225712_inLine +BABEL_OP2_205_68748_20130330_225712_outLine +BABEL_OP2_205_70452_20130328_011715_inLine +BABEL_OP2_205_70452_20130328_011715_outLine +BABEL_OP2_205_70713_20131129_235040_inLine +BABEL_OP2_205_74799_20130407_030553_inLine +BABEL_OP2_205_74799_20130407_030553_outLine +BABEL_OP2_205_76683_20130331_201352_inLine +BABEL_OP2_205_76683_20130331_201352_outLine +BABEL_OP2_205_78254_20130323_051609_inLine +BABEL_OP2_205_78254_20130323_051609_outLine +BABEL_OP2_205_80559_20130323_224458_inLine +BABEL_OP2_205_80559_20130323_224458_outLine +BABEL_OP2_205_81149_20130412_061213_inLine +BABEL_OP2_205_81149_20130412_061213_outLine +BABEL_OP2_205_82138_20130622_210458_inLine +BABEL_OP2_205_82138_20130622_210458_outLine +BABEL_OP2_205_86191_20130323_060631_inLine +BABEL_OP2_205_86191_20130323_060631_outLine +BABEL_OP2_205_86433_20130325_084312_inLine +BABEL_OP2_205_86433_20130325_084312_outLine +BABEL_OP2_205_86676_20130331_014116_inLine +BABEL_OP2_205_86676_20130331_014116_outLine +BABEL_OP2_205_86715_20130618_002759_inLine +BABEL_OP2_205_86715_20130618_002759_outLine +BABEL_OP2_205_91336_20130622_230929_inLine +BABEL_OP2_205_91336_20130622_230929_outLine +BABEL_OP2_205_92605_20140123_032518_inLine +BABEL_OP2_205_92605_20140123_032518_outLine +BABEL_OP2_205_93964_20130623_014819_inLine +BABEL_OP2_205_93964_20130623_014819_outLine +BABEL_OP2_205_94891_20140123_222847_inLine +BABEL_OP2_205_94891_20140123_222847_outLine +BABEL_OP2_205_94978_20131126_045451_inLine +BABEL_OP2_205_94978_20131126_045451_outLine +BABEL_OP2_205_96376_20140120_211321_inLine +BABEL_OP2_205_96376_20140120_211321_outLine +BABEL_OP2_205_97772_20130301_071555_inLine +BABEL_OP2_205_97772_20130301_071555_outLine +BABEL_OP2_205_99594_20130320_070531_inLine +BABEL_OP2_205_99594_20130320_070531_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/training.list b/egs/babel/s5d/conf/lists/205-kurmanji/training.list new file mode 100644 index 00000000000..6f50b091eff --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/training.list @@ -0,0 +1,532 @@ +BABEL_OP2_205_10036_20130325_212656_inLine +BABEL_OP2_205_10036_20130325_212656_outLine +BABEL_OP2_205_10184_20130315_054426_inLine +BABEL_OP2_205_10184_20130315_054426_outLine +BABEL_OP2_205_10482_20130330_232812_inLine +BABEL_OP2_205_10482_20130330_232812_outLine +BABEL_OP2_205_10638_20140122_201207_inLine +BABEL_OP2_205_10638_20140122_201207_outLine +BABEL_OP2_205_10647_20130413_190550_inLine +BABEL_OP2_205_10647_20130413_190550_outLine +BABEL_OP2_205_10938_20130402_021742_inLine +BABEL_OP2_205_10938_20130402_021742_outLine +BABEL_OP2_205_10966_20130324_203837_inLine +BABEL_OP2_205_10966_20130324_203837_outLine +BABEL_OP2_205_11352_20130505_190427_inLine +BABEL_OP2_205_11352_20130505_190427_outLine +BABEL_OP2_205_11581_20130317_071927_inLine +BABEL_OP2_205_11581_20130317_071927_outLine +BABEL_OP2_205_11663_20130402_031747_inLine +BABEL_OP2_205_11663_20130402_031747_outLine +BABEL_OP2_205_11797_20130307_233702_inLine +BABEL_OP2_205_11797_20130307_233702_outLine +BABEL_OP2_205_11797_20130307_235053_inLine +BABEL_OP2_205_11797_20130307_235053_outLine +BABEL_OP2_205_12220_20130323_002310_inLine +BABEL_OP2_205_12220_20130323_002310_outLine +BABEL_OP2_205_12635_20130406_230527_inLine +BABEL_OP2_205_12635_20130406_230527_outLine +BABEL_OP2_205_13030_20130330_234019_inLine +BABEL_OP2_205_13030_20130330_234019_outLine +BABEL_OP2_205_13189_20130413_230649_inLine +BABEL_OP2_205_13189_20130413_230649_outLine +BABEL_OP2_205_13324_20130318_043359_inLine +BABEL_OP2_205_13324_20130318_043359_outLine +BABEL_OP2_205_13744_20130302_055938_inLine +BABEL_OP2_205_13744_20130302_055938_outLine +BABEL_OP2_205_14137_20130326_212737_inLine +BABEL_OP2_205_14137_20130326_212737_outLine +BABEL_OP2_205_14539_20130413_020822_inLine +BABEL_OP2_205_14539_20130413_020822_outLine +BABEL_OP2_205_14729_20130526_024319_inLine +BABEL_OP2_205_14729_20130526_024319_outLine +BABEL_OP2_205_14807_20130326_065101_inLine +BABEL_OP2_205_14807_20130326_065101_outLine +BABEL_OP2_205_14807_20130326_070339_inLine +BABEL_OP2_205_14807_20130326_070339_outLine +BABEL_OP2_205_14814_20130326_062123_inLine +BABEL_OP2_205_14814_20130326_062123_outLine +BABEL_OP2_205_14875_20130319_211742_inLine +BABEL_OP2_205_14875_20130319_211742_outLine +BABEL_OP2_205_14875_20130319_213338_inLine +BABEL_OP2_205_14875_20130319_213338_outLine +BABEL_OP2_205_14899_20130303_062436_inLine +BABEL_OP2_205_14899_20130303_062436_outLine +BABEL_OP2_205_14929_20131223_022753_inLine +BABEL_OP2_205_14972_20130312_213702_inLine +BABEL_OP2_205_14972_20130312_213702_outLine +BABEL_OP2_205_15024_20131222_033424_inLine +BABEL_OP2_205_15024_20131222_033424_outLine +BABEL_OP2_205_15227_20130412_005202_inLine +BABEL_OP2_205_15227_20130412_005202_outLine +BABEL_OP2_205_15382_20130325_075405_inLine +BABEL_OP2_205_15382_20130325_075405_outLine +BABEL_OP2_205_15535_20130506_195619_inLine +BABEL_OP2_205_15535_20130506_195619_outLine +BABEL_OP2_205_16839_20130407_052530_inLine +BABEL_OP2_205_16839_20130407_052530_outLine +BABEL_OP2_205_16886_20130326_054927_inLine +BABEL_OP2_205_16886_20130326_054927_outLine +BABEL_OP2_205_16924_20130331_232254_inLine +BABEL_OP2_205_16924_20130331_232254_outLine +BABEL_OP2_205_17320_20130413_054847_inLine +BABEL_OP2_205_17320_20130413_054847_outLine +BABEL_OP2_205_17440_20130413_195207_inLine +BABEL_OP2_205_17440_20130413_195207_outLine +BABEL_OP2_205_17472_20130508_013928_inLine +BABEL_OP2_205_17472_20130508_013928_outLine +BABEL_OP2_205_17496_20130414_215325_inLine +BABEL_OP2_205_17496_20130414_215325_outLine +BABEL_OP2_205_17520_20130312_074120_inLine +BABEL_OP2_205_17520_20130312_074120_outLine +BABEL_OP2_205_17615_20130407_234405_inLine +BABEL_OP2_205_17615_20130407_234405_outLine +BABEL_OP2_205_17881_20130413_190631_inLine +BABEL_OP2_205_17881_20130413_190631_outLine +BABEL_OP2_205_17881_20130413_191638_inLine +BABEL_OP2_205_17881_20130413_191638_outLine +BABEL_OP2_205_17914_20130407_235720_inLine +BABEL_OP2_205_17914_20130407_235720_outLine +BABEL_OP2_205_18242_20130408_005657_inLine +BABEL_OP2_205_18242_20130408_005657_outLine +BABEL_OP2_205_18291_20130618_004811_inLine +BABEL_OP2_205_18291_20130618_004811_outLine +BABEL_OP2_205_18566_20130505_173829_inLine +BABEL_OP2_205_18566_20130505_173829_outLine +BABEL_OP2_205_18766_20130413_033911_inLine +BABEL_OP2_205_18766_20130413_033911_outLine +BABEL_OP2_205_19134_20130331_195936_inLine +BABEL_OP2_205_19134_20130331_195936_outLine +BABEL_OP2_205_19589_20130413_203154_inLine +BABEL_OP2_205_19589_20130413_203154_outLine +BABEL_OP2_205_19703_20130325_042858_inLine +BABEL_OP2_205_19703_20130325_042858_outLine +BABEL_OP2_205_19722_20130306_045231_inLine +BABEL_OP2_205_19722_20130306_045231_outLine +BABEL_OP2_205_19749_20130406_231234_inLine +BABEL_OP2_205_19749_20130406_231234_outLine +BABEL_OP2_205_20133_20130228_055409_inLine +BABEL_OP2_205_20133_20130228_055409_outLine +BABEL_OP2_205_20800_20130408_015430_inLine +BABEL_OP2_205_20800_20130408_015430_outLine +BABEL_OP2_205_20916_20130228_200116_inLine +BABEL_OP2_205_20916_20130228_200116_outLine +BABEL_OP2_205_20922_20130406_225439_inLine +BABEL_OP2_205_20922_20130406_225439_outLine +BABEL_OP2_205_20985_20130401_025757_inLine +BABEL_OP2_205_20985_20130401_025757_outLine +BABEL_OP2_205_21004_20130408_222653_inLine +BABEL_OP2_205_21004_20130408_222653_outLine +BABEL_OP2_205_21206_20130312_205638_inLine +BABEL_OP2_205_21206_20130312_205638_outLine +BABEL_OP2_205_21435_20130414_044944_inLine +BABEL_OP2_205_21435_20130414_044944_outLine +BABEL_OP2_205_21543_20140125_004741_inLine +BABEL_OP2_205_21543_20140125_004741_outLine +BABEL_OP2_205_21807_20130324_054526_inLine +BABEL_OP2_205_21892_20130507_023354_inLine +BABEL_OP2_205_21892_20130507_023354_outLine +BABEL_OP2_205_22321_20130308_042214_inLine +BABEL_OP2_205_22321_20130308_042214_outLine +BABEL_OP2_205_22446_20130309_073946_outLine +BABEL_OP2_205_22494_20130331_230611_inLine +BABEL_OP2_205_22494_20130331_230611_outLine +BABEL_OP2_205_22624_20130331_012106_inLine +BABEL_OP2_205_22624_20130331_012106_outLine +BABEL_OP2_205_22629_20131231_223232_inLine +BABEL_OP2_205_22629_20131231_223232_outLine +BABEL_OP2_205_22918_20130413_043023_inLine +BABEL_OP2_205_22918_20130413_043023_outLine +BABEL_OP2_205_22918_20130413_044543_inLine +BABEL_OP2_205_22918_20130413_044543_outLine +BABEL_OP2_205_23006_20130322_202429_inLine +BABEL_OP2_205_23006_20130322_202429_outLine +BABEL_OP2_205_23046_20130327_010653_inLine +BABEL_OP2_205_23046_20130327_010653_outLine +BABEL_OP2_205_23092_20130413_181637_inLine +BABEL_OP2_205_23092_20130413_181637_outLine +BABEL_OP2_205_23190_20130323_014750_inLine +BABEL_OP2_205_23190_20130323_014750_outLine +BABEL_OP2_205_23239_20130331_034518_inLine +BABEL_OP2_205_23239_20130331_034518_outLine +BABEL_OP2_205_23752_20140123_024924_inLine +BABEL_OP2_205_23752_20140123_024924_outLine +BABEL_OP2_205_23893_20140123_003759_inLine +BABEL_OP2_205_23893_20140123_003759_outLine +BABEL_OP2_205_24239_20130415_171824_inLine +BABEL_OP2_205_24239_20130415_171824_outLine +BABEL_OP2_205_24253_20130505_214600_inLine +BABEL_OP2_205_24253_20130505_214600_outLine +BABEL_OP2_205_24270_20130406_070358_inLine +BABEL_OP2_205_24270_20130406_070358_outLine +BABEL_OP2_205_24290_20130414_221432_inLine +BABEL_OP2_205_24290_20130414_221432_outLine +BABEL_OP2_205_24323_20130326_051101_inLine +BABEL_OP2_205_24323_20130326_051101_outLine +BABEL_OP2_205_24470_20130406_021646_inLine +BABEL_OP2_205_24470_20130406_021646_outLine +BABEL_OP2_205_24532_20130227_052040_inLine +BABEL_OP2_205_24532_20130227_052040_outLine +BABEL_OP2_205_24569_20130508_235213_inLine +BABEL_OP2_205_24569_20130508_235213_outLine +BABEL_OP2_205_24605_20130311_012103_inLine +BABEL_OP2_205_24605_20130311_012103_outLine +BABEL_OP2_205_24679_20130303_043753_inLine +BABEL_OP2_205_24679_20130303_043753_outLine +BABEL_OP2_205_25085_20130612_023620_inLine +BABEL_OP2_205_25085_20130612_023620_outLine +BABEL_OP2_205_25719_20130406_231631_inLine +BABEL_OP2_205_25719_20130406_231631_outLine +BABEL_OP2_205_25719_20130406_232555_inLine +BABEL_OP2_205_25719_20130406_232555_outLine +BABEL_OP2_205_25719_20130406_233313_inLine +BABEL_OP2_205_25719_20130406_233313_outLine +BABEL_OP2_205_25961_20130305_063202_inLine +BABEL_OP2_205_25961_20130305_063202_outLine +BABEL_OP2_205_26381_20140125_015707_inLine +BABEL_OP2_205_26381_20140125_015707_outLine +BABEL_OP2_205_26388_20130330_021001_inLine +BABEL_OP2_205_26388_20130330_021001_outLine +BABEL_OP2_205_26507_20131101_103425_inLine +BABEL_OP2_205_26507_20131101_103425_outLine +BABEL_OP2_205_26574_20130509_203057_inLine +BABEL_OP2_205_26574_20130509_203057_outLine +BABEL_OP2_205_26602_20130412_235831_inLine +BABEL_OP2_205_26602_20130412_235831_outLine +BABEL_OP2_205_27125_20130227_061700_inLine +BABEL_OP2_205_27125_20130227_061700_outLine +BABEL_OP2_205_27189_20140104_001032_inLine +BABEL_OP2_205_27189_20140104_001032_outLine +BABEL_OP2_205_27203_20130331_021946_inLine +BABEL_OP2_205_27203_20130331_021946_outLine +BABEL_OP2_205_27590_20130506_201921_inLine +BABEL_OP2_205_27590_20130506_201921_outLine +BABEL_OP2_205_27841_20130414_222155_inLine +BABEL_OP2_205_27841_20130414_222155_outLine +BABEL_OP2_205_28012_20130507_054019_inLine +BABEL_OP2_205_28012_20130507_054019_outLine +BABEL_OP2_205_28419_20130320_202136_inLine +BABEL_OP2_205_28419_20130320_202136_outLine +BABEL_OP2_205_28477_20130412_234819_inLine +BABEL_OP2_205_28477_20130412_234819_outLine +BABEL_OP2_205_28522_20130401_211215_inLine +BABEL_OP2_205_28522_20130401_211215_outLine +BABEL_OP2_205_29023_20130314_060343_inLine +BABEL_OP2_205_29023_20130314_060343_outLine +BABEL_OP2_205_29323_20130414_230355_inLine +BABEL_OP2_205_29323_20130414_230355_outLine +BABEL_OP2_205_29404_20130414_214714_inLine +BABEL_OP2_205_29404_20130414_214714_outLine +BABEL_OP2_205_29439_20130413_182356_inLine +BABEL_OP2_205_29439_20130413_182356_outLine +BABEL_OP2_205_30013_20130401_005939_inLine +BABEL_OP2_205_30013_20130401_005939_outLine +BABEL_OP2_205_30180_20130323_005331_inLine +BABEL_OP2_205_30180_20130323_005331_outLine +BABEL_OP2_205_30395_20130316_060814_inLine +BABEL_OP2_205_30395_20130316_060814_outLine +BABEL_OP2_205_30432_20130330_200303_inLine +BABEL_OP2_205_30432_20130330_200303_outLine +BABEL_OP2_205_30869_20130412_202311_inLine +BABEL_OP2_205_30869_20130412_202311_outLine +BABEL_OP2_205_31039_20140125_023755_inLine +BABEL_OP2_205_31039_20140125_023755_outLine +BABEL_OP2_205_31109_20130619_181905_inLine +BABEL_OP2_205_31109_20130619_181905_outLine +BABEL_OP2_205_31346_20130507_204621_inLine +BABEL_OP2_205_31346_20130507_204621_outLine +BABEL_OP2_205_32097_20130301_034527_inLine +BABEL_OP2_205_32097_20130301_034527_outLine +BABEL_OP2_205_32122_20130321_004623_inLine +BABEL_OP2_205_32122_20130321_004623_outLine +BABEL_OP2_205_32122_20130321_010341_inLine +BABEL_OP2_205_32122_20130321_010341_outLine +BABEL_OP2_205_32244_20130412_190534_inLine +BABEL_OP2_205_32244_20130412_190534_outLine +BABEL_OP2_205_32630_20130412_054815_inLine +BABEL_OP2_205_32630_20130412_054815_outLine +BABEL_OP2_205_32837_20130507_011223_inLine +BABEL_OP2_205_32837_20130507_011223_outLine +BABEL_OP2_205_33229_20130414_213157_inLine +BABEL_OP2_205_33229_20130414_213157_outLine +BABEL_OP2_205_33273_20130320_040141_inLine +BABEL_OP2_205_33273_20130320_040141_outLine +BABEL_OP2_205_33355_20130311_214515_inLine +BABEL_OP2_205_33355_20130311_214515_outLine +BABEL_OP2_205_33424_20130412_193538_inLine +BABEL_OP2_205_33424_20130412_193538_outLine +BABEL_OP2_205_33476_20130405_051711_inLine +BABEL_OP2_205_33476_20130405_051711_outLine +BABEL_OP2_205_33497_20130619_220728_inLine +BABEL_OP2_205_33497_20130619_220728_outLine +BABEL_OP2_205_33840_20130507_012940_inLine +BABEL_OP2_205_33840_20130507_012940_outLine +BABEL_OP2_205_33913_20130414_052534_inLine +BABEL_OP2_205_33913_20130414_052534_outLine +BABEL_OP2_205_33951_20130619_212409_inLine +BABEL_OP2_205_33951_20130619_212409_outLine +BABEL_OP2_205_34106_20130301_221919_inLine +BABEL_OP2_205_34106_20130301_221919_outLine +BABEL_OP2_205_34197_20130302_231101_inLine +BABEL_OP2_205_34197_20130302_231101_outLine +BABEL_OP2_205_34586_20140125_203417_inLine +BABEL_OP2_205_34586_20140125_203417_outLine +BABEL_OP2_205_34647_20140125_205318_inLine +BABEL_OP2_205_34647_20140125_205318_outLine +BABEL_OP2_205_34903_20130406_055051_inLine +BABEL_OP2_205_34903_20130406_055051_outLine +BABEL_OP2_205_35139_20130312_070415_inLine +BABEL_OP2_205_35139_20130312_070415_outLine +BABEL_OP2_205_35143_20130414_203900_inLine +BABEL_OP2_205_35143_20130414_203900_outLine +BABEL_OP2_205_35181_20130413_201739_inLine +BABEL_OP2_205_35181_20130413_201739_outLine +BABEL_OP2_205_36642_20130413_013238_inLine +BABEL_OP2_205_36642_20130413_013238_outLine +BABEL_OP2_205_37228_20130407_205807_inLine +BABEL_OP2_205_37228_20130407_205807_outLine +BABEL_OP2_205_37271_20130507_231712_inLine +BABEL_OP2_205_37271_20130507_231712_outLine +BABEL_OP2_205_37285_20130401_061737_inLine +BABEL_OP2_205_37285_20130401_061737_outLine +BABEL_OP2_205_37290_20130405_070403_inLine +BABEL_OP2_205_37290_20130405_070403_outLine +BABEL_OP2_205_37598_20130405_034853_inLine +BABEL_OP2_205_37598_20130405_034853_outLine +BABEL_OP2_205_37682_20130325_022952_inLine +BABEL_OP2_205_37682_20130325_022952_outLine +BABEL_OP2_205_37776_20140125_220835_inLine +BABEL_OP2_205_37776_20140125_220835_outLine +BABEL_OP2_205_37853_20130413_005407_inLine +BABEL_OP2_205_37853_20130413_005407_outLine +BABEL_OP2_205_38340_20130315_063442_inLine +BABEL_OP2_205_38340_20130315_063442_outLine +BABEL_OP2_205_38554_20130301_085606_inLine +BABEL_OP2_205_38554_20130301_085606_outLine +BABEL_OP2_205_38664_20130325_030156_inLine +BABEL_OP2_205_38664_20130325_030156_outLine +BABEL_OP2_205_38689_20130414_233704_inLine +BABEL_OP2_205_38689_20130414_233704_outLine +BABEL_OP2_205_38741_20130315_071146_inLine +BABEL_OP2_205_38741_20130315_071146_outLine +BABEL_OP2_205_38878_20130406_202135_inLine +BABEL_OP2_205_38878_20130406_202135_outLine +BABEL_OP2_205_38963_20131227_202341_inLine +BABEL_OP2_205_38963_20131227_202341_outLine +BABEL_OP2_205_39059_20130414_033146_inLine +BABEL_OP2_205_39059_20130414_033146_outLine +BABEL_OP2_205_39059_20130414_034411_inLine +BABEL_OP2_205_39059_20130414_034411_outLine +BABEL_OP2_205_39555_20130507_025010_inLine +BABEL_OP2_205_39555_20130507_025010_outLine +BABEL_OP2_205_40196_20140125_222906_inLine +BABEL_OP2_205_40196_20140125_222906_outLine +BABEL_OP2_205_40557_20130413_185709_inLine +BABEL_OP2_205_40557_20130413_185709_outLine +BABEL_OP2_205_40557_20130413_190849_inLine +BABEL_OP2_205_40557_20130413_190849_outLine +BABEL_OP2_205_40565_20130401_015506_inLine +BABEL_OP2_205_40565_20130401_015506_outLine +BABEL_OP2_205_40939_20140125_231452_inLine +BABEL_OP2_205_40939_20140125_231452_outLine +BABEL_OP2_205_41038_20130405_060002_inLine +BABEL_OP2_205_41038_20130405_060002_outLine +BABEL_OP2_205_41174_20130318_033313_inLine +BABEL_OP2_205_41174_20130318_033313_outLine +BABEL_OP2_205_41618_20130312_214004_inLine +BABEL_OP2_205_41618_20130312_214004_outLine +BABEL_OP2_205_41741_20130326_004056_inLine +BABEL_OP2_205_41741_20130326_004056_outLine +BABEL_OP2_205_42619_20130325_002736_inLine +BABEL_OP2_205_42619_20130325_002736_outLine +BABEL_OP2_205_42834_20130414_202256_inLine +BABEL_OP2_205_42834_20130414_202256_outLine +BABEL_OP2_205_42991_20130401_024013_inLine +BABEL_OP2_205_42991_20130401_024013_outLine +BABEL_OP2_205_42991_20130401_025044_inLine +BABEL_OP2_205_42991_20130401_025044_outLine +BABEL_OP2_205_43286_20130304_044510_inLine +BABEL_OP2_205_43286_20130304_044510_outLine +BABEL_OP2_205_43368_20130329_211826_inLine +BABEL_OP2_205_43368_20130329_211826_outLine +BABEL_OP2_205_43368_20130329_212612_inLine +BABEL_OP2_205_43368_20130329_212612_outLine +BABEL_OP2_205_43788_20130331_024429_inLine +BABEL_OP2_205_43788_20130331_024429_outLine +BABEL_OP2_205_43788_20130331_030508_inLine +BABEL_OP2_205_43788_20130331_030508_outLine +BABEL_OP2_205_44847_20130325_055635_inLine +BABEL_OP2_205_44847_20130325_055635_outLine +BABEL_OP2_205_45121_20130412_035841_inLine +BABEL_OP2_205_45121_20130412_035841_outLine +BABEL_OP2_205_45235_20130509_011826_inLine +BABEL_OP2_205_45235_20130509_011826_outLine +BABEL_OP2_205_45374_20140126_000904_inLine +BABEL_OP2_205_45374_20140126_000904_outLine +BABEL_OP2_205_46041_20130507_202255_inLine +BABEL_OP2_205_46041_20130507_202255_outLine +BABEL_OP2_205_46315_20130506_231421_inLine +BABEL_OP2_205_46315_20130506_231421_outLine +BABEL_OP2_205_46589_20130331_014535_inLine +BABEL_OP2_205_46589_20130331_014535_outLine +BABEL_OP2_205_46757_20130401_191649_inLine +BABEL_OP2_205_46757_20130401_191649_outLine +BABEL_OP2_205_46976_20131104_051409_inLine +BABEL_OP2_205_46976_20131104_051409_outLine +BABEL_OP2_205_47110_20140126_005953_inLine +BABEL_OP2_205_47110_20140126_005953_outLine +BABEL_OP2_205_47451_20130408_195325_inLine +BABEL_OP2_205_47451_20130408_195325_outLine +BABEL_OP2_205_47487_20130328_060026_inLine +BABEL_OP2_205_47487_20130328_060026_outLine +BABEL_OP2_205_47823_20130330_204952_inLine +BABEL_OP2_205_47823_20130330_204952_outLine +BABEL_OP2_205_47878_20130319_211057_inLine +BABEL_OP2_205_47878_20130319_211057_outLine +BABEL_OP2_205_48422_20130407_020759_inLine +BABEL_OP2_205_48422_20130407_020759_outLine +BABEL_OP2_205_49118_20130412_210858_inLine +BABEL_OP2_205_49118_20130412_210858_outLine +BABEL_OP2_205_49118_20130412_211622_inLine +BABEL_OP2_205_49118_20130412_211622_outLine +BABEL_OP2_205_49287_20130327_053930_inLine +BABEL_OP2_205_49287_20130327_053930_outLine +BABEL_OP2_205_49630_20130401_013908_inLine +BABEL_OP2_205_49630_20130401_013908_outLine +BABEL_OP2_205_49768_20130330_025558_inLine +BABEL_OP2_205_49768_20130330_025558_outLine +BABEL_OP2_205_50186_20140126_012415_inLine +BABEL_OP2_205_50186_20140126_012415_outLine +BABEL_OP2_205_50745_20130505_195625_inLine +BABEL_OP2_205_50745_20130505_195625_outLine +BABEL_OP2_205_50779_20130320_043549_inLine +BABEL_OP2_205_50779_20130320_043549_outLine +BABEL_OP2_205_50779_20130320_044244_inLine +BABEL_OP2_205_50779_20130320_044244_outLine +BABEL_OP2_205_51015_20130401_202255_inLine +BABEL_OP2_205_51015_20130401_202255_outLine +BABEL_OP2_205_52246_20130323_232916_inLine +BABEL_OP2_205_52246_20130323_232916_outLine +BABEL_OP2_205_52490_20130326_051608_inLine +BABEL_OP2_205_52490_20130326_051608_outLine +BABEL_OP2_205_53063_20130508_051415_inLine +BABEL_OP2_205_53063_20130508_051415_outLine +BABEL_OP2_205_53415_20131216_223652_inLine +BABEL_OP2_205_53415_20131216_223652_outLine +BABEL_OP2_205_53441_20140126_015538_inLine +BABEL_OP2_205_53441_20140126_015538_outLine +BABEL_OP2_205_53758_20131228_000238_inLine +BABEL_OP2_205_53758_20131228_000238_outLine +BABEL_OP2_205_54104_20130323_222459_inLine +BABEL_OP2_205_54104_20130323_222459_outLine +BABEL_OP2_205_54827_20130414_030516_inLine +BABEL_OP2_205_54827_20130414_030516_outLine +BABEL_OP2_205_54841_20130414_225855_inLine +BABEL_OP2_205_54841_20130414_225855_outLine +BABEL_OP2_205_54953_20130317_013652_inLine +BABEL_OP2_205_54953_20130317_013652_outLine +BABEL_OP2_205_56198_20130321_041358_inLine +BABEL_OP2_205_56198_20130321_041358_outLine +BABEL_OP2_205_56925_20140126_023234_inLine +BABEL_OP2_205_56925_20140126_023234_outLine +BABEL_OP2_205_57065_20130407_232501_inLine +BABEL_OP2_205_57065_20130407_232501_outLine +BABEL_OP2_205_57678_20130323_232415_inLine +BABEL_OP2_205_57678_20130323_232415_outLine +BABEL_OP2_205_57935_20130322_224501_inLine +BABEL_OP2_205_57935_20130322_224501_outLine +BABEL_OP2_205_58026_20131219_010750_inLine +BABEL_OP2_205_58026_20131219_010750_outLine +BABEL_OP2_205_58821_20130415_190958_inLine +BABEL_OP2_205_58821_20130415_190958_outLine +BABEL_OP2_205_59078_20130406_075721_inLine +BABEL_OP2_205_59078_20130406_075721_outLine +BABEL_OP2_205_59635_20130406_225014_inLine +BABEL_OP2_205_59635_20130406_225014_outLine +BABEL_OP2_205_59645_20130619_190548_inLine +BABEL_OP2_205_59645_20130619_190548_outLine +BABEL_OP2_205_60282_20140107_024858_inLine +BABEL_OP2_205_60282_20140107_024858_outLine +BABEL_OP2_205_60436_20130413_200129_inLine +BABEL_OP2_205_60436_20130413_200129_outLine +BABEL_OP2_205_61440_20130411_231312_inLine +BABEL_OP2_205_61440_20130411_231312_outLine +BABEL_OP2_205_61971_20130413_052620_inLine +BABEL_OP2_205_61971_20130413_052620_outLine +BABEL_OP2_205_62014_20130329_225214_inLine +BABEL_OP2_205_62014_20130329_225214_outLine +BABEL_OP2_205_62200_20130405_021524_inLine +BABEL_OP2_205_62200_20130405_021524_outLine +BABEL_OP2_205_62289_20140122_214709_inLine +BABEL_OP2_205_62289_20140122_214709_outLine +BABEL_OP2_205_62360_20140122_233956_inLine +BABEL_OP2_205_62810_20130304_075632_inLine +BABEL_OP2_205_62810_20130304_075632_outLine +BABEL_OP2_205_63084_20130405_025236_inLine +BABEL_OP2_205_63084_20130405_025236_outLine +BABEL_OP2_205_63787_20130310_001339_inLine +BABEL_OP2_205_63787_20130310_001339_outLine +BABEL_OP2_205_63920_20131226_014831_inLine +BABEL_OP2_205_64688_20131226_232545_inLine +BABEL_OP2_205_64688_20131226_232545_outLine +BABEL_OP2_205_66971_20130413_002731_inLine +BABEL_OP2_205_66971_20130413_002731_outLine +BABEL_OP2_205_67964_20140122_221653_inLine +BABEL_OP2_205_67964_20140122_221653_outLine +BABEL_OP2_205_68289_20130407_225726_inLine +BABEL_OP2_205_68289_20130407_225726_outLine +BABEL_OP2_205_68748_20130330_225712_inLine +BABEL_OP2_205_68748_20130330_225712_outLine +BABEL_OP2_205_70452_20130328_011715_inLine +BABEL_OP2_205_70452_20130328_011715_outLine +BABEL_OP2_205_70713_20131129_235040_inLine +BABEL_OP2_205_70716_20130413_193114_inLine +BABEL_OP2_205_70716_20130413_193114_outLine +BABEL_OP2_205_74799_20130407_030553_inLine +BABEL_OP2_205_74799_20130407_030553_outLine +BABEL_OP2_205_76683_20130331_201352_inLine +BABEL_OP2_205_76683_20130331_201352_outLine +BABEL_OP2_205_77242_20130616_015950_inLine +BABEL_OP2_205_77242_20130616_015950_outLine +BABEL_OP2_205_78254_20130323_051609_inLine +BABEL_OP2_205_78254_20130323_051609_outLine +BABEL_OP2_205_80559_20130323_224458_inLine +BABEL_OP2_205_80559_20130323_224458_outLine +BABEL_OP2_205_81149_20130412_061213_inLine +BABEL_OP2_205_81149_20130412_061213_outLine +BABEL_OP2_205_82138_20130622_210458_inLine +BABEL_OP2_205_82138_20130622_210458_outLine +BABEL_OP2_205_84605_20130319_203823_inLine +BABEL_OP2_205_84605_20130319_203823_outLine +BABEL_OP2_205_84737_20130407_054058_inLine +BABEL_OP2_205_84737_20130407_054058_outLine +BABEL_OP2_205_84936_20130405_063301_inLine +BABEL_OP2_205_84936_20130405_063301_outLine +BABEL_OP2_205_86191_20130323_060631_inLine +BABEL_OP2_205_86191_20130323_060631_outLine +BABEL_OP2_205_86433_20130325_084312_inLine +BABEL_OP2_205_86433_20130325_084312_outLine +BABEL_OP2_205_86676_20130331_014116_inLine +BABEL_OP2_205_86676_20130331_014116_outLine +BABEL_OP2_205_86715_20130618_002759_inLine +BABEL_OP2_205_86715_20130618_002759_outLine +BABEL_OP2_205_86826_20130411_224207_inLine +BABEL_OP2_205_86826_20130411_224207_outLine +BABEL_OP2_205_90760_20130612_022556_inLine +BABEL_OP2_205_90760_20130612_022556_outLine +BABEL_OP2_205_91336_20130622_230929_inLine +BABEL_OP2_205_91336_20130622_230929_outLine +BABEL_OP2_205_92605_20140123_032518_inLine +BABEL_OP2_205_92605_20140123_032518_outLine +BABEL_OP2_205_93964_20130623_014819_inLine +BABEL_OP2_205_93964_20130623_014819_outLine +BABEL_OP2_205_94891_20140123_222847_inLine +BABEL_OP2_205_94891_20140123_222847_outLine +BABEL_OP2_205_94978_20131126_045451_inLine +BABEL_OP2_205_94978_20131126_045451_outLine +BABEL_OP2_205_96376_20140120_211321_inLine +BABEL_OP2_205_96376_20140120_211321_outLine +BABEL_OP2_205_97772_20130301_071555_inLine +BABEL_OP2_205_97772_20130301_071555_outLine +BABEL_OP2_205_99594_20130320_070531_inLine +BABEL_OP2_205_99594_20130320_070531_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list b/egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list new file mode 100644 index 00000000000..0239610b1a7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list @@ -0,0 +1,521 @@ +BABEL_OP2_205_12321_20131101_103424_inLine +BABEL_OP2_205_12321_20131101_103424_outLine +BABEL_OP2_205_14350_20130311_065704_inLine +BABEL_OP2_205_14350_20130311_065704_outLine +BABEL_OP2_205_15262_20130310_220350_inLine +BABEL_OP2_205_15262_20130310_220350_outLine +BABEL_OP2_205_15902_20130309_042954_inLine +BABEL_OP2_205_15902_20130309_042954_outLine +BABEL_OP2_205_16475_20130318_071049_inLine +BABEL_OP2_205_16475_20130318_071049_outLine +BABEL_OP2_205_17582_20130612_045820_inLine +BABEL_OP2_205_17582_20130612_045820_outLine +BABEL_OP2_205_17923_20130310_071855_inLine +BABEL_OP2_205_17923_20130310_071855_outLine +BABEL_OP2_205_18992_20131227_214303_inLine +BABEL_OP2_205_19545_20131103_054936_inLine +BABEL_OP2_205_19545_20131103_054936_outLine +BABEL_OP2_205_20724_20131226_220301_inLine +BABEL_OP2_205_20738_20131219_000457_inLine +BABEL_OP2_205_20738_20131219_000457_outLine +BABEL_OP2_205_20768_20131104_020043_inLine +BABEL_OP2_205_20768_20131104_020043_outLine +BABEL_OP2_205_21109_20131110_203639_inLine +BABEL_OP2_205_21109_20131110_203639_outLine +BABEL_OP2_205_21244_20130411_220542_inLine +BABEL_OP2_205_21244_20130411_220542_outLine +BABEL_OP2_205_23681_20130519_052009_outLine +BABEL_OP2_205_23731_20131104_055621_outLine +BABEL_OP2_205_26074_20130318_092803_inLine +BABEL_OP2_205_26074_20130318_092803_outLine +BABEL_OP2_205_26074_20130318_093434_inLine +BABEL_OP2_205_26074_20130318_093434_outLine +BABEL_OP2_205_26398_20130414_210257_outLine +BABEL_OP2_205_28538_20131222_010921_inLine +BABEL_OP2_205_29230_20130619_045232_inLine +BABEL_OP2_205_31182_20131104_010653_inLine +BABEL_OP2_205_31182_20131104_010653_outLine +BABEL_OP2_205_32301_20130401_015605_outLine +BABEL_OP2_205_36039_20130525_013805_inLine +BABEL_OP2_205_36039_20130525_013805_outLine +BABEL_OP2_205_36059_20131108_200854_inLine +BABEL_OP2_205_36059_20131108_200854_outLine +BABEL_OP2_205_36059_20131108_201758_inLine +BABEL_OP2_205_36059_20131108_201758_outLine +BABEL_OP2_205_36059_20131108_202426_inLine +BABEL_OP2_205_36059_20131108_202426_outLine +BABEL_OP2_205_37229_20130612_045130_inLine +BABEL_OP2_205_37229_20130612_045130_outLine +BABEL_OP2_205_37499_20130423_030008_inLine +BABEL_OP2_205_37499_20130423_030008_outLine +BABEL_OP2_205_38979_20131105_052419_inLine +BABEL_OP2_205_38979_20131105_052419_outLine +BABEL_OP2_205_39159_20130303_040403_inLine +BABEL_OP2_205_39159_20130303_040403_outLine +BABEL_OP2_205_40713_20130314_075828_inLine +BABEL_OP2_205_40713_20130314_075828_outLine +BABEL_OP2_205_40740_20130407_055052_inLine +BABEL_OP2_205_40740_20130407_055052_outLine +BABEL_OP2_205_41100_20130311_015856_inLine +BABEL_OP2_205_41100_20130311_015856_outLine +BABEL_OP2_205_41109_20130406_224530_inLine +BABEL_OP2_205_41109_20130406_224530_outLine +BABEL_OP2_205_41493_20130228_081724_inLine +BABEL_OP2_205_41493_20130228_081724_outLine +BABEL_OP2_205_41745_20130319_001127_inLine +BABEL_OP2_205_41745_20130319_001127_outLine +BABEL_OP2_205_41920_20130325_212001_inLine +BABEL_OP2_205_42155_20130312_064841_inLine +BABEL_OP2_205_42155_20130312_064841_outLine +BABEL_OP2_205_42243_20130305_063726_inLine +BABEL_OP2_205_42243_20130305_063726_outLine +BABEL_OP2_205_43239_20131110_055057_inLine +BABEL_OP2_205_43239_20131110_055057_outLine +BABEL_OP2_205_44255_20130406_205651_inLine +BABEL_OP2_205_44255_20130406_205651_outLine +BABEL_OP2_205_44531_20131108_194709_inLine +BABEL_OP2_205_44531_20131108_194709_outLine +BABEL_OP2_205_44619_20130314_074104_inLine +BABEL_OP2_205_44619_20130314_074104_outLine +BABEL_OP2_205_45642_20130311_052042_inLine +BABEL_OP2_205_45642_20130311_052042_outLine +BABEL_OP2_205_46558_20130304_053902_inLine +BABEL_OP2_205_46558_20130304_053902_outLine +BABEL_OP2_205_46702_20130301_025750_inLine +BABEL_OP2_205_46702_20130301_025750_outLine +BABEL_OP2_205_46763_20130505_222913_inLine +BABEL_OP2_205_46763_20130505_222913_outLine +BABEL_OP2_205_47215_20130408_021338_inLine +BABEL_OP2_205_47215_20130408_021338_outLine +BABEL_OP2_205_47270_20130406_211808_inLine +BABEL_OP2_205_47270_20130406_211808_outLine +BABEL_OP2_205_47405_20131231_032458_inLine +BABEL_OP2_205_47405_20131231_032458_outLine +BABEL_OP2_205_47877_20130407_205116_inLine +BABEL_OP2_205_47877_20130407_205116_outLine +BABEL_OP2_205_48399_20130304_002906_inLine +BABEL_OP2_205_48399_20130304_002906_outLine +BABEL_OP2_205_48758_20131107_075636_inLine +BABEL_OP2_205_48758_20131107_075636_outLine +BABEL_OP2_205_48758_20131107_080446_inLine +BABEL_OP2_205_48758_20131107_080446_outLine +BABEL_OP2_205_48789_20131103_043932_inLine +BABEL_OP2_205_48789_20131103_043932_outLine +BABEL_OP2_205_49775_20130227_060536_inLine +BABEL_OP2_205_49775_20130227_060536_outLine +BABEL_OP2_205_49812_20130407_020818_inLine +BABEL_OP2_205_49812_20130407_020818_outLine +BABEL_OP2_205_49945_20130412_222951_inLine +BABEL_OP2_205_49945_20130412_222951_outLine +BABEL_OP2_205_50090_20130329_203208_inLine +BABEL_OP2_205_50090_20130329_203208_outLine +BABEL_OP2_205_50681_20130330_050901_inLine +BABEL_OP2_205_50681_20130330_050901_outLine +BABEL_OP2_205_51530_20130414_210733_inLine +BABEL_OP2_205_51530_20130414_210733_outLine +BABEL_OP2_205_51611_20130311_072551_inLine +BABEL_OP2_205_51611_20130311_072551_outLine +BABEL_OP2_205_51819_20130401_010103_outLine +BABEL_OP2_205_51819_20130401_010745_outLine +BABEL_OP2_205_52447_20130412_001856_inLine +BABEL_OP2_205_52447_20130412_001856_outLine +BABEL_OP2_205_52483_20130621_205901_inLine +BABEL_OP2_205_52483_20130621_205901_outLine +BABEL_OP2_205_52804_20130311_002720_inLine +BABEL_OP2_205_52804_20130311_002720_outLine +BABEL_OP2_205_54040_20131104_013114_inLine +BABEL_OP2_205_54040_20131104_013114_outLine +BABEL_OP2_205_54160_20130306_033742_inLine +BABEL_OP2_205_54160_20130306_033742_outLine +BABEL_OP2_205_54405_20130618_202708_inLine +BABEL_OP2_205_54405_20130618_202708_outLine +BABEL_OP2_205_55818_20130309_080103_inLine +BABEL_OP2_205_55818_20130309_080103_outLine +BABEL_OP2_205_56090_20130227_204816_inLine +BABEL_OP2_205_56090_20130227_204816_outLine +BABEL_OP2_205_56306_20130407_012524_inLine +BABEL_OP2_205_56306_20130407_012524_outLine +BABEL_OP2_205_56306_20130407_013746_inLine +BABEL_OP2_205_56306_20130407_013746_outLine +BABEL_OP2_205_56331_20130413_043736_inLine +BABEL_OP2_205_56331_20130413_043736_outLine +BABEL_OP2_205_56370_20130306_061205_inLine +BABEL_OP2_205_56370_20130306_061205_outLine +BABEL_OP2_205_56429_20130311_053708_inLine +BABEL_OP2_205_56429_20130311_053708_outLine +BABEL_OP2_205_56523_20130317_224401_inLine +BABEL_OP2_205_56523_20130317_224401_outLine +BABEL_OP2_205_56720_20130506_182315_inLine +BABEL_OP2_205_56720_20130506_182315_outLine +BABEL_OP2_205_57566_20130407_031257_inLine +BABEL_OP2_205_57566_20130407_031257_outLine +BABEL_OP2_205_58915_20130611_221704_inLine +BABEL_OP2_205_58915_20130611_221704_outLine +BABEL_OP2_205_59928_20130321_012004_inLine +BABEL_OP2_205_59928_20130321_012004_outLine +BABEL_OP2_205_59993_20130321_045802_inLine +BABEL_OP2_205_59993_20130321_045802_outLine +BABEL_OP2_205_60474_20130324_213649_inLine +BABEL_OP2_205_60474_20130324_213649_outLine +BABEL_OP2_205_60508_20130304_205015_inLine +BABEL_OP2_205_60508_20130304_205015_outLine +BABEL_OP2_205_60538_20130310_004703_inLine +BABEL_OP2_205_60538_20130310_004703_outLine +BABEL_OP2_205_60626_20130315_071907_inLine +BABEL_OP2_205_60626_20130315_071907_outLine +BABEL_OP2_205_60706_20130307_053430_inLine +BABEL_OP2_205_60706_20130307_053430_outLine +BABEL_OP2_205_60836_20130330_072606_inLine +BABEL_OP2_205_60836_20130330_072606_outLine +BABEL_OP2_205_61167_20130326_222257_inLine +BABEL_OP2_205_61167_20130326_222257_outLine +BABEL_OP2_205_61190_20130325_004615_inLine +BABEL_OP2_205_61190_20130325_004615_outLine +BABEL_OP2_205_61219_20130325_212553_inLine +BABEL_OP2_205_61219_20130325_212553_outLine +BABEL_OP2_205_61357_20130330_232257_inLine +BABEL_OP2_205_61357_20130330_232257_outLine +BABEL_OP2_205_62434_20130305_215011_inLine +BABEL_OP2_205_62434_20130305_215011_outLine +BABEL_OP2_205_62434_20130305_220154_inLine +BABEL_OP2_205_62434_20130305_220154_outLine +BABEL_OP2_205_62734_20130328_050453_inLine +BABEL_OP2_205_62734_20130328_050453_outLine +BABEL_OP2_205_63081_20130309_012237_inLine +BABEL_OP2_205_63081_20130309_012237_outLine +BABEL_OP2_205_63094_20131113_030146_inLine +BABEL_OP2_205_63220_20130326_055356_inLine +BABEL_OP2_205_63220_20130326_055356_outLine +BABEL_OP2_205_63445_20130308_235018_inLine +BABEL_OP2_205_63445_20130308_235018_outLine +BABEL_OP2_205_63481_20130307_082632_inLine +BABEL_OP2_205_63481_20130307_082632_outLine +BABEL_OP2_205_63523_20140121_213251_inLine +BABEL_OP2_205_63757_20130406_055509_inLine +BABEL_OP2_205_63757_20130406_055509_outLine +BABEL_OP2_205_63938_20130413_044053_inLine +BABEL_OP2_205_63938_20130413_044053_outLine +BABEL_OP2_205_64014_20130413_004605_inLine +BABEL_OP2_205_64014_20130413_004605_outLine +BABEL_OP2_205_64065_20130315_044531_inLine +BABEL_OP2_205_64065_20130315_044531_outLine +BABEL_OP2_205_64494_20130313_043717_inLine +BABEL_OP2_205_64494_20130313_043717_outLine +BABEL_OP2_205_64638_20130408_004937_inLine +BABEL_OP2_205_64638_20130408_004937_outLine +BABEL_OP2_205_64722_20131107_210205_inLine +BABEL_OP2_205_64722_20131107_210205_outLine +BABEL_OP2_205_64759_20130307_214024_inLine +BABEL_OP2_205_64759_20130307_214024_outLine +BABEL_OP2_205_64759_20130307_215400_inLine +BABEL_OP2_205_64759_20130307_215400_outLine +BABEL_OP2_205_64768_20130327_213122_inLine +BABEL_OP2_205_64768_20130327_213122_outLine +BABEL_OP2_205_64796_20130307_042443_inLine +BABEL_OP2_205_64796_20130307_042443_outLine +BABEL_OP2_205_64902_20130414_052508_inLine +BABEL_OP2_205_64902_20130414_052508_outLine +BABEL_OP2_205_65882_20130311_013812_inLine +BABEL_OP2_205_65882_20130311_013812_outLine +BABEL_OP2_205_66026_20130508_223030_inLine +BABEL_OP2_205_66026_20130508_223030_outLine +BABEL_OP2_205_66959_20130414_224335_inLine +BABEL_OP2_205_66959_20130414_224335_outLine +BABEL_OP2_205_67085_20130414_180541_inLine +BABEL_OP2_205_67085_20130414_180541_outLine +BABEL_OP2_205_67389_20140111_225039_inLine +BABEL_OP2_205_67389_20140111_225039_outLine +BABEL_OP2_205_67552_20130331_072350_inLine +BABEL_OP2_205_67552_20130331_072350_outLine +BABEL_OP2_205_67552_20130331_073746_inLine +BABEL_OP2_205_67552_20130331_073746_outLine +BABEL_OP2_205_67592_20130509_213421_inLine +BABEL_OP2_205_67592_20130509_213421_outLine +BABEL_OP2_205_67794_20130315_081604_inLine +BABEL_OP2_205_67794_20130315_081604_outLine +BABEL_OP2_205_67999_20130407_223424_inLine +BABEL_OP2_205_67999_20130407_223424_outLine +BABEL_OP2_205_68059_20130619_053732_inLine +BABEL_OP2_205_68059_20130619_053732_outLine +BABEL_OP2_205_68182_20130415_041909_inLine +BABEL_OP2_205_68182_20130415_041909_outLine +BABEL_OP2_205_69633_20130331_021718_inLine +BABEL_OP2_205_69633_20130331_021718_outLine +BABEL_OP2_205_69633_20130331_023306_inLine +BABEL_OP2_205_69633_20130331_023306_outLine +BABEL_OP2_205_69885_20130415_051700_inLine +BABEL_OP2_205_69885_20130415_051700_outLine +BABEL_OP2_205_70110_20130302_074003_inLine +BABEL_OP2_205_70110_20130302_074003_outLine +BABEL_OP2_205_70343_20130401_203305_inLine +BABEL_OP2_205_70343_20130401_203305_outLine +BABEL_OP2_205_70526_20130416_033943_inLine +BABEL_OP2_205_70526_20130416_033943_outLine +BABEL_OP2_205_71047_20131109_013132_inLine +BABEL_OP2_205_71047_20131109_013132_outLine +BABEL_OP2_205_71333_20130326_225136_inLine +BABEL_OP2_205_71333_20130326_225136_outLine +BABEL_OP2_205_71614_20130506_175649_inLine +BABEL_OP2_205_71614_20130506_175649_outLine +BABEL_OP2_205_71704_20130317_002057_inLine +BABEL_OP2_205_71704_20130317_002057_outLine +BABEL_OP2_205_71754_20140115_014345_inLine +BABEL_OP2_205_71754_20140115_014345_outLine +BABEL_OP2_205_72040_20130321_022323_inLine +BABEL_OP2_205_72040_20130321_022323_outLine +BABEL_OP2_205_72733_20130415_183417_inLine +BABEL_OP2_205_72733_20130415_183417_outLine +BABEL_OP2_205_73042_20130317_000810_inLine +BABEL_OP2_205_73042_20130317_000810_outLine +BABEL_OP2_205_73072_20130311_213816_inLine +BABEL_OP2_205_73072_20130311_213816_outLine +BABEL_OP2_205_73301_20130330_062717_inLine +BABEL_OP2_205_73301_20130330_062717_outLine +BABEL_OP2_205_73301_20130330_064357_inLine +BABEL_OP2_205_73301_20130330_064357_outLine +BABEL_OP2_205_73408_20130622_062600_inLine +BABEL_OP2_205_73408_20130622_062600_outLine +BABEL_OP2_205_73837_20130330_054105_inLine +BABEL_OP2_205_73837_20130330_054105_outLine +BABEL_OP2_205_74111_20130507_182333_inLine +BABEL_OP2_205_74111_20130507_182333_outLine +BABEL_OP2_205_74280_20130301_022106_inLine +BABEL_OP2_205_74280_20130301_022106_outLine +BABEL_OP2_205_74455_20130414_041223_inLine +BABEL_OP2_205_74455_20130414_041223_outLine +BABEL_OP2_205_74641_20130314_060344_inLine +BABEL_OP2_205_74641_20130314_060344_outLine +BABEL_OP2_205_74921_20130331_061311_inLine +BABEL_OP2_205_74921_20130331_061311_outLine +BABEL_OP2_205_75223_20130306_045441_inLine +BABEL_OP2_205_75223_20130306_045441_outLine +BABEL_OP2_205_75261_20130408_234257_inLine +BABEL_OP2_205_75261_20130408_234257_outLine +BABEL_OP2_205_75342_20130415_192555_inLine +BABEL_OP2_205_75342_20130415_192555_outLine +BABEL_OP2_205_75981_20130413_042503_inLine +BABEL_OP2_205_75981_20130413_042503_outLine +BABEL_OP2_205_76773_20130312_051652_inLine +BABEL_OP2_205_76773_20130312_051652_outLine +BABEL_OP2_205_77139_20130305_045120_inLine +BABEL_OP2_205_77139_20130305_045120_outLine +BABEL_OP2_205_77744_20130328_012940_outLine +BABEL_OP2_205_78544_20130408_000050_inLine +BABEL_OP2_205_78544_20130408_000050_outLine +BABEL_OP2_205_78544_20130408_001043_inLine +BABEL_OP2_205_78544_20130408_001043_outLine +BABEL_OP2_205_78609_20130508_001720_inLine +BABEL_OP2_205_78609_20130508_001720_outLine +BABEL_OP2_205_78630_20130330_200921_inLine +BABEL_OP2_205_78630_20130330_200921_outLine +BABEL_OP2_205_78943_20130326_063742_inLine +BABEL_OP2_205_78943_20130326_063742_outLine +BABEL_OP2_205_79045_20130507_020315_inLine +BABEL_OP2_205_79045_20130507_020315_outLine +BABEL_OP2_205_79107_20130613_004324_inLine +BABEL_OP2_205_79107_20130613_004324_outLine +BABEL_OP2_205_79167_20130331_053551_inLine +BABEL_OP2_205_79167_20130331_053551_outLine +BABEL_OP2_205_79190_20130313_020401_inLine +BABEL_OP2_205_79190_20130313_020401_outLine +BABEL_OP2_205_79590_20131221_025241_inLine +BABEL_OP2_205_79590_20131221_025241_outLine +BABEL_OP2_205_79590_20131221_031508_inLine +BABEL_OP2_205_79590_20131221_031508_outLine +BABEL_OP2_205_79751_20130324_220236_inLine +BABEL_OP2_205_79751_20130324_220236_outLine +BABEL_OP2_205_79858_20130309_210841_inLine +BABEL_OP2_205_79858_20130309_210841_outLine +BABEL_OP2_205_80136_20130406_190838_inLine +BABEL_OP2_205_80136_20130406_190838_outLine +BABEL_OP2_205_80577_20131110_045204_inLine +BABEL_OP2_205_80577_20131110_045204_outLine +BABEL_OP2_205_80881_20130326_002818_inLine +BABEL_OP2_205_80881_20130326_002818_outLine +BABEL_OP2_205_80881_20130326_004157_inLine +BABEL_OP2_205_80881_20130326_004157_outLine +BABEL_OP2_205_80881_20130326_005241_inLine +BABEL_OP2_205_80881_20130326_005241_outLine +BABEL_OP2_205_81287_20130414_230143_inLine +BABEL_OP2_205_81287_20130414_230143_outLine +BABEL_OP2_205_81392_20130506_224137_inLine +BABEL_OP2_205_81392_20130506_224137_outLine +BABEL_OP2_205_81404_20130324_072708_inLine +BABEL_OP2_205_81404_20130324_072708_outLine +BABEL_OP2_205_81433_20131110_024152_inLine +BABEL_OP2_205_81433_20131110_024152_outLine +BABEL_OP2_205_81553_20130408_190946_inLine +BABEL_OP2_205_81671_20130407_193047_inLine +BABEL_OP2_205_81671_20130407_193047_outLine +BABEL_OP2_205_81854_20130413_035448_inLine +BABEL_OP2_205_81854_20130413_035448_outLine +BABEL_OP2_205_82030_20130416_024208_inLine +BABEL_OP2_205_82030_20130416_024208_outLine +BABEL_OP2_205_82145_20131101_103425_inLine +BABEL_OP2_205_82145_20131101_103425_outLine +BABEL_OP2_205_82863_20131110_214438_inLine +BABEL_OP2_205_82863_20131110_214438_outLine +BABEL_OP2_205_82863_20131110_220419_inLine +BABEL_OP2_205_82863_20131110_220419_outLine +BABEL_OP2_205_82979_20130321_013427_inLine +BABEL_OP2_205_83062_20130412_213219_inLine +BABEL_OP2_205_83062_20130412_213219_outLine +BABEL_OP2_205_83366_20130406_070242_outLine +BABEL_OP2_205_83436_20130306_064555_inLine +BABEL_OP2_205_83436_20130306_064555_outLine +BABEL_OP2_205_83545_20130413_183305_outLine +BABEL_OP2_205_83775_20130326_223716_inLine +BABEL_OP2_205_83775_20130326_223716_outLine +BABEL_OP2_205_83783_20130316_062751_inLine +BABEL_OP2_205_83783_20130316_062751_outLine +BABEL_OP2_205_84125_20130301_040550_inLine +BABEL_OP2_205_84125_20130301_040550_outLine +BABEL_OP2_205_84370_20130613_203134_inLine +BABEL_OP2_205_84370_20130613_213749_inLine +BABEL_OP2_205_84458_20130508_224724_inLine +BABEL_OP2_205_84458_20130508_224724_outLine +BABEL_OP2_205_84469_20130408_213237_inLine +BABEL_OP2_205_84469_20130408_213237_outLine +BABEL_OP2_205_84583_20130312_211219_inLine +BABEL_OP2_205_84583_20130312_211219_outLine +BABEL_OP2_205_84815_20130413_183704_inLine +BABEL_OP2_205_84815_20130413_183704_outLine +BABEL_OP2_205_84838_20130509_005525_inLine +BABEL_OP2_205_84838_20130509_005525_outLine +BABEL_OP2_205_85179_20130409_002521_inLine +BABEL_OP2_205_85179_20130409_002521_outLine +BABEL_OP2_205_85248_20130414_215500_inLine +BABEL_OP2_205_85248_20130414_215500_outLine +BABEL_OP2_205_85260_20140115_021714_inLine +BABEL_OP2_205_85260_20140115_021714_outLine +BABEL_OP2_205_85322_20130323_013257_inLine +BABEL_OP2_205_85325_20130414_061613_inLine +BABEL_OP2_205_85325_20130414_061613_outLine +BABEL_OP2_205_85331_20130616_014645_inLine +BABEL_OP2_205_85331_20130616_014645_outLine +BABEL_OP2_205_85340_20130321_040745_inLine +BABEL_OP2_205_85340_20130321_040745_outLine +BABEL_OP2_205_86321_20130413_173559_inLine +BABEL_OP2_205_86321_20130413_173559_outLine +BABEL_OP2_205_86467_20130305_074640_inLine +BABEL_OP2_205_86467_20130305_074640_outLine +BABEL_OP2_205_86472_20130408_003043_inLine +BABEL_OP2_205_86472_20130408_003043_outLine +BABEL_OP2_205_86557_20130304_044109_inLine +BABEL_OP2_205_86557_20130304_044109_outLine +BABEL_OP2_205_87298_20130322_201204_inLine +BABEL_OP2_205_87629_20130312_052701_inLine +BABEL_OP2_205_87629_20130312_052701_outLine +BABEL_OP2_205_87884_20130413_194754_inLine +BABEL_OP2_205_87884_20130413_194754_outLine +BABEL_OP2_205_87889_20130408_204610_inLine +BABEL_OP2_205_87889_20130408_204610_outLine +BABEL_OP2_205_88686_20130303_070128_inLine +BABEL_OP2_205_88686_20130303_070128_outLine +BABEL_OP2_205_88873_20130326_050532_inLine +BABEL_OP2_205_88873_20130326_050532_outLine +BABEL_OP2_205_89372_20130227_014653_inLine +BABEL_OP2_205_89372_20130227_014653_outLine +BABEL_OP2_205_89457_20131111_005030_inLine +BABEL_OP2_205_89457_20131111_005030_outLine +BABEL_OP2_205_89560_20130507_184514_inLine +BABEL_OP2_205_89560_20130507_184514_outLine +BABEL_OP2_205_89888_20130311_070650_inLine +BABEL_OP2_205_89888_20130311_070650_outLine +BABEL_OP2_205_90347_20130331_202436_outLine +BABEL_OP2_205_90935_20130324_042904_inLine +BABEL_OP2_205_90935_20130324_042904_outLine +BABEL_OP2_205_91189_20131108_222823_inLine +BABEL_OP2_205_91189_20131108_222823_outLine +BABEL_OP2_205_91319_20130408_214039_inLine +BABEL_OP2_205_91319_20130408_214039_outLine +BABEL_OP2_205_91463_20130331_044435_inLine +BABEL_OP2_205_91463_20130331_044435_outLine +BABEL_OP2_205_91581_20130408_184119_inLine +BABEL_OP2_205_91581_20130408_184119_outLine +BABEL_OP2_205_91884_20130413_195918_inLine +BABEL_OP2_205_91884_20130413_195918_outLine +BABEL_OP2_205_91891_20130330_214543_inLine +BABEL_OP2_205_91891_20130330_214543_outLine +BABEL_OP2_205_91971_20140108_225426_inLine +BABEL_OP2_205_91971_20140108_225426_outLine +BABEL_OP2_205_91977_20130331_024658_inLine +BABEL_OP2_205_91977_20130331_024658_outLine +BABEL_OP2_205_91977_20130331_030804_inLine +BABEL_OP2_205_91977_20130331_030804_outLine +BABEL_OP2_205_92077_20130413_012328_inLine +BABEL_OP2_205_92077_20130413_012328_outLine +BABEL_OP2_205_92459_20130330_001356_inLine +BABEL_OP2_205_92459_20130330_001356_outLine +BABEL_OP2_205_92509_20130303_235756_inLine +BABEL_OP2_205_92509_20130303_235756_outLine +BABEL_OP2_205_92740_20130330_215927_inLine +BABEL_OP2_205_92740_20130330_215927_outLine +BABEL_OP2_205_92886_20130318_002931_inLine +BABEL_OP2_205_92886_20130318_002931_outLine +BABEL_OP2_205_92941_20130329_233855_inLine +BABEL_OP2_205_92941_20130329_233855_outLine +BABEL_OP2_205_93224_20130329_231410_inLine +BABEL_OP2_205_93224_20130329_231410_outLine +BABEL_OP2_205_93224_20130329_233120_inLine +BABEL_OP2_205_93224_20130329_233120_outLine +BABEL_OP2_205_93475_20130318_075901_inLine +BABEL_OP2_205_93475_20130318_075901_outLine +BABEL_OP2_205_93861_20130623_023740_inLine +BABEL_OP2_205_93946_20130413_175030_inLine +BABEL_OP2_205_93946_20130413_175030_outLine +BABEL_OP2_205_93946_20130413_180241_inLine +BABEL_OP2_205_93946_20130413_180241_outLine +BABEL_OP2_205_94002_20131110_223144_inLine +BABEL_OP2_205_94002_20131110_223144_outLine +BABEL_OP2_205_94442_20130413_000848_inLine +BABEL_OP2_205_94442_20130413_000848_outLine +BABEL_OP2_205_94465_20130408_073150_inLine +BABEL_OP2_205_94465_20130408_073150_outLine +BABEL_OP2_205_94587_20130330_222137_inLine +BABEL_OP2_205_94587_20130330_222137_outLine +BABEL_OP2_205_95269_20130323_231507_inLine +BABEL_OP2_205_95269_20130323_231507_outLine +BABEL_OP2_205_95294_20130331_225911_inLine +BABEL_OP2_205_95294_20130331_225911_outLine +BABEL_OP2_205_95446_20131109_020511_inLine +BABEL_OP2_205_95446_20131109_020511_outLine +BABEL_OP2_205_95467_20130616_022551_inLine +BABEL_OP2_205_95490_20130303_001325_inLine +BABEL_OP2_205_95490_20130303_001325_outLine +BABEL_OP2_205_95583_20130305_224743_inLine +BABEL_OP2_205_95583_20130305_224743_outLine +BABEL_OP2_205_96324_20130307_062000_inLine +BABEL_OP2_205_96324_20130307_062000_outLine +BABEL_OP2_205_96842_20130413_004424_inLine +BABEL_OP2_205_96842_20130413_004424_outLine +BABEL_OP2_205_96934_20130330_010104_inLine +BABEL_OP2_205_96934_20130330_010104_outLine +BABEL_OP2_205_96985_20130305_215036_inLine +BABEL_OP2_205_96985_20130305_215036_outLine +BABEL_OP2_205_97264_20130407_190517_inLine +BABEL_OP2_205_97264_20130407_190517_outLine +BABEL_OP2_205_97363_20130321_030214_inLine +BABEL_OP2_205_97363_20130321_030214_outLine +BABEL_OP2_205_97570_20130406_072121_inLine +BABEL_OP2_205_97570_20130406_072121_outLine +BABEL_OP2_205_97604_20130408_175013_inLine +BABEL_OP2_205_97604_20130408_175013_outLine +BABEL_OP2_205_97731_20130413_011730_inLine +BABEL_OP2_205_97731_20130413_011730_outLine +BABEL_OP2_205_97731_20130413_013459_inLine +BABEL_OP2_205_97731_20130413_013459_outLine +BABEL_OP2_205_98311_20130311_063743_inLine +BABEL_OP2_205_98311_20130311_063743_outLine +BABEL_OP2_205_99264_20130412_221353_inLine +BABEL_OP2_205_99264_20130412_221353_outLine +BABEL_OP2_205_99487_20130310_062912_inLine +BABEL_OP2_205_99487_20130310_062912_outLine +BABEL_OP2_205_99516_20130304_070035_inLine +BABEL_OP2_205_99516_20130304_070035_outLine +BABEL_OP2_205_99718_20130311_081329_inLine +BABEL_OP2_205_99718_20130311_081329_outLine +BABEL_OP2_205_99813_20131110_022455_inLine +BABEL_OP2_205_99813_20131110_022455_outLine +BABEL_OP2_205_99920_20130408_013635_inLine +BABEL_OP2_205_99920_20130408_013635_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/dev.list b/egs/babel/s5d/conf/lists/206-zulu/dev.list new file mode 100644 index 00000000000..52d51a26c88 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/dev.list @@ -0,0 +1,141 @@ +BABEL_OP1_206_14350_20121123_042710_inLine +BABEL_OP1_206_14350_20121123_042710_outLine +BABEL_OP1_206_15042_20130124_002208_inLine +BABEL_OP1_206_15042_20130124_002208_outLine +BABEL_OP1_206_15042_20130124_003815_inLine +BABEL_OP1_206_15042_20130124_003815_outLine +BABEL_OP1_206_15163_20121129_232215_inLine +BABEL_OP1_206_15163_20121129_232215_outLine +BABEL_OP1_206_19621_20121219_031810_inLine +BABEL_OP1_206_19621_20121219_031810_outLine +BABEL_OP1_206_19663_20121219_173010_inLine +BABEL_OP1_206_19663_20121219_173010_outLine +BABEL_OP1_206_22466_20121130_231814_inLine +BABEL_OP1_206_22466_20121130_231814_outLine +BABEL_OP1_206_23995_20121215_221537_inLine +BABEL_OP1_206_23995_20121215_221537_outLine +BABEL_OP1_206_26999_20121213_022027_inLine +BABEL_OP1_206_26999_20121213_022027_outLine +BABEL_OP1_206_28190_20121213_031401_inLine +BABEL_OP1_206_28190_20121213_031401_outLine +BABEL_OP1_206_28606_20121215_000631_inLine +BABEL_OP1_206_28606_20121215_000631_outLine +BABEL_OP1_206_31182_20121222_050854_inLine +BABEL_OP1_206_31182_20121222_050854_outLine +BABEL_OP1_206_32727_20130601_012544_inLine +BABEL_OP1_206_32727_20130601_012544_outLine +BABEL_OP1_206_34477_20121130_183409_inLine +BABEL_OP1_206_34477_20121130_183409_outLine +BABEL_OP1_206_34477_20121130_184826_inLine +BABEL_OP1_206_34477_20121130_184826_outLine +BABEL_OP1_206_34899_20130602_004027_inLine +BABEL_OP1_206_34899_20130602_004027_outLine +BABEL_OP1_206_35583_20130529_005600_inLine +BABEL_OP1_206_35583_20130529_005600_outLine +BABEL_OP1_206_36219_20121130_184946_inLine +BABEL_OP1_206_36219_20121130_184946_outLine +BABEL_OP1_206_36594_20130601_002535_inLine +BABEL_OP1_206_36594_20130601_002535_outLine +BABEL_OP1_206_36990_20121130_212128_inLine +BABEL_OP1_206_36990_20121130_212128_outLine +BABEL_OP1_206_36990_20121130_213230_inLine +BABEL_OP1_206_36990_20121130_213230_outLine +BABEL_OP1_206_36990_20121130_220005_inLine +BABEL_OP1_206_36990_20121130_220005_outLine +BABEL_OP1_206_40740_20121214_002216_inLine +BABEL_OP1_206_40740_20121214_002216_outLine +BABEL_OP1_206_41100_20121129_002525_inLine +BABEL_OP1_206_41100_20121129_002525_outLine +BABEL_OP1_206_41100_20121129_003855_inLine +BABEL_OP1_206_41100_20121129_003855_outLine +BABEL_OP1_206_41493_20121128_222116_inLine +BABEL_OP1_206_41493_20121128_222116_outLine +BABEL_OP1_206_41493_20121128_230231_inLine +BABEL_OP1_206_41493_20121128_230231_outLine +BABEL_OP1_206_41920_20121129_204231_inLine +BABEL_OP1_206_41920_20121129_204231_outLine +BABEL_OP1_206_42600_20121206_212006_inLine +BABEL_OP1_206_42600_20121206_212006_outLine +BABEL_OP1_206_43646_20121206_213819_inLine +BABEL_OP1_206_43646_20121206_213819_outLine +BABEL_OP1_206_47877_20121212_233516_inLine +BABEL_OP1_206_47877_20121212_233516_outLine +BABEL_OP1_206_47877_20121213_000206_inLine +BABEL_OP1_206_47877_20121213_000206_outLine +BABEL_OP1_206_47877_20121213_030248_inLine +BABEL_OP1_206_47877_20121213_030248_outLine +BABEL_OP1_206_49767_20130530_203947_inLine +BABEL_OP1_206_49767_20130530_203947_outLine +BABEL_OP1_206_49902_20121201_230757_inLine +BABEL_OP1_206_49902_20121201_230757_outLine +BABEL_OP1_206_49902_20121202_000107_inLine +BABEL_OP1_206_49902_20121202_000107_outLine +BABEL_OP1_206_54405_20130522_224053_inLine +BABEL_OP1_206_54405_20130522_224053_outLine +BABEL_OP1_206_56198_20121128_190457_inLine +BABEL_OP1_206_56198_20121128_190457_outLine +BABEL_OP1_206_56429_20121220_005243_inLine +BABEL_OP1_206_56429_20121220_005243_outLine +BABEL_OP1_206_56684_20121212_010900_inLine +BABEL_OP1_206_56684_20121212_010900_outLine +BABEL_OP1_206_58815_20121216_231254_inLine +BABEL_OP1_206_58815_20121216_231254_outLine +BABEL_OP1_206_60538_20121205_021137_inLine +BABEL_OP1_206_60538_20121205_021137_outLine +BABEL_OP1_206_60706_20121128_191751_inLine +BABEL_OP1_206_60706_20121128_191751_outLine +BABEL_OP1_206_61011_20121219_024939_inLine +BABEL_OP1_206_61011_20121219_024939_outLine +BABEL_OP1_206_61219_20121204_234808_inLine +BABEL_OP1_206_61219_20121204_234808_outLine +BABEL_OP1_206_62362_20130301_013214_inLine +BABEL_OP1_206_62362_20130301_013214_outLine +BABEL_OP1_206_63220_20130531_002428_inLine +BABEL_OP1_206_63220_20130531_002428_outLine +BABEL_OP1_206_65692_20121212_230954_inLine +BABEL_OP1_206_65692_20121212_230954_outLine +BABEL_OP1_206_66837_20130111_182531_inLine +BABEL_OP1_206_66837_20130111_182531_outLine +BABEL_OP1_206_66959_20121218_192949_inLine +BABEL_OP1_206_66959_20121218_192949_outLine +BABEL_OP1_206_67066_20130604_231822_inLine +BABEL_OP1_206_67066_20130604_231822_outLine +BABEL_OP1_206_71780_20121219_010817_inLine +BABEL_OP1_206_71780_20121219_010817_outLine +BABEL_OP1_206_77225_20130604_013253_inLine +BABEL_OP1_206_77225_20130604_013253_outLine +BABEL_OP1_206_79858_20121126_013705_inLine +BABEL_OP1_206_79858_20121126_013705_outLine +BABEL_OP1_206_81854_20130122_210400_inLine +BABEL_OP1_206_81854_20130122_210400_outLine +BABEL_OP1_206_82224_20130602_234038_inLine +BABEL_OP1_206_82224_20130602_234038_outLine +BABEL_OP1_206_82966_20121213_231116_inLine +BABEL_OP1_206_82966_20121213_231116_outLine +BABEL_OP1_206_84838_20121210_051040_inLine +BABEL_OP1_206_84838_20121210_051040_outLine +BABEL_OP1_206_85048_20121220_202904_inLine +BABEL_OP1_206_85048_20121220_202904_outLine +BABEL_OP1_206_85340_20121129_000834_inLine +BABEL_OP1_206_85340_20121129_000834_outLine +BABEL_OP1_206_85340_20121129_231533_inLine +BABEL_OP1_206_85340_20121129_231533_outLine +BABEL_OP1_206_92252_20130601_235344_inLine +BABEL_OP1_206_92252_20130601_235344_outLine +BABEL_OP1_206_92886_20121128_042622_inLine +BABEL_OP1_206_92886_20121128_042622_outLine +BABEL_OP1_206_92886_20121128_045107_inLine +BABEL_OP1_206_92886_20121128_045107_outLine +BABEL_OP1_206_93007_20130528_211314_inLine +BABEL_OP1_206_93007_20130528_211314_outLine +BABEL_OP1_206_95490_20130103_005535_inLine +BABEL_OP1_206_95490_20130103_005535_outLine +BABEL_OP1_206_96584_20130121_011505_inLine +BABEL_OP1_206_96584_20130121_011505_outLine +BABEL_OP1_206_97849_20130123_000229_inLine +BABEL_OP1_206_97849_20130123_000229_outLine +BABEL_OP1_206_97988_20121212_223804_inLine +BABEL_OP1_206_97988_20121212_223804_outLine +BABEL_OP1_206_99594_20121220_022404_outLine +BABEL_OP1_206_99718_20121128_213548_inLine +BABEL_OP1_206_99718_20121128_213548_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/eval.list b/egs/babel/s5d/conf/lists/206-zulu/eval.list new file mode 100644 index 00000000000..b75e559d38b --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/eval.list @@ -0,0 +1,202 @@ +BABEL_OP1_206_10019_20121129_221847_inLine +BABEL_OP1_206_10019_20121129_221847_outLine +BABEL_OP1_206_10184_20130530_225826_inLine +BABEL_OP1_206_10184_20130530_225826_outLine +BABEL_OP1_206_10319_20121201_000052_inLine +BABEL_OP1_206_10319_20121201_000052_outLine +BABEL_OP1_206_10319_20121201_002831_inLine +BABEL_OP1_206_10319_20121201_002831_outLine +BABEL_OP1_206_10416_20121229_182422_inLine +BABEL_OP1_206_10416_20121229_182422_outLine +BABEL_OP1_206_13040_20121206_215505_inLine +BABEL_OP1_206_13040_20121206_215505_outLine +BABEL_OP1_206_13040_20121206_221350_inLine +BABEL_OP1_206_13040_20121206_221350_outLine +BABEL_OP1_206_14229_20121220_002130_inLine +BABEL_OP1_206_14229_20121220_002130_outLine +BABEL_OP1_206_14237_20121130_193638_inLine +BABEL_OP1_206_14237_20121130_193638_outLine +BABEL_OP1_206_15926_20121211_205054_inLine +BABEL_OP1_206_15926_20121211_205054_outLine +BABEL_OP1_206_16787_20121220_025209_inLine +BABEL_OP1_206_16787_20121220_025209_outLine +BABEL_OP1_206_17165_20121128_185603_inLine +BABEL_OP1_206_17165_20121128_185603_outLine +BABEL_OP1_206_17573_20121214_234307_inLine +BABEL_OP1_206_17573_20121214_234307_outLine +BABEL_OP1_206_18863_20121214_201427_inLine +BABEL_OP1_206_18863_20121214_201427_outLine +BABEL_OP1_206_19672_20121218_230453_inLine +BABEL_OP1_206_19672_20121218_230453_outLine +BABEL_OP1_206_21794_20121130_183726_inLine +BABEL_OP1_206_21794_20121130_183726_outLine +BABEL_OP1_206_22641_20130605_195037_inLine +BABEL_OP1_206_22641_20130605_195037_outLine +BABEL_OP1_206_23395_20130110_222315_inLine +BABEL_OP1_206_23395_20130110_222315_outLine +BABEL_OP1_206_23628_20121128_215213_inLine +BABEL_OP1_206_23628_20121128_215213_outLine +BABEL_OP1_206_25220_20130528_232132_inLine +BABEL_OP1_206_25220_20130528_232132_outLine +BABEL_OP1_206_26074_20121221_172845_inLine +BABEL_OP1_206_26074_20121221_172845_outLine +BABEL_OP1_206_26478_20130523_003304_inLine +BABEL_OP1_206_26478_20130523_003304_outLine +BABEL_OP1_206_29208_20121220_212757_inLine +BABEL_OP1_206_29208_20121220_212757_outLine +BABEL_OP1_206_29777_20121220_010458_inLine +BABEL_OP1_206_29777_20121220_010458_outLine +BABEL_OP1_206_29777_20121220_012240_inLine +BABEL_OP1_206_29777_20121220_012240_outLine +BABEL_OP1_206_30250_20121129_205052_inLine +BABEL_OP1_206_30250_20121129_205052_outLine +BABEL_OP1_206_31484_20130530_181941_inLine +BABEL_OP1_206_31484_20130530_181941_outLine +BABEL_OP1_206_31979_20130120_174010_inLine +BABEL_OP1_206_31979_20130120_174010_outLine +BABEL_OP1_206_35000_20121220_022037_inLine +BABEL_OP1_206_35000_20121220_022037_outLine +BABEL_OP1_206_35202_20121218_153251_inLine +BABEL_OP1_206_35202_20121218_153251_outLine +BABEL_OP1_206_35706_20130603_175544_inLine +BABEL_OP1_206_35706_20130603_175544_outLine +BABEL_OP1_206_36669_20130528_012812_inLine +BABEL_OP1_206_36669_20130528_012812_outLine +BABEL_OP1_206_37064_20121128_061027_inLine +BABEL_OP1_206_37064_20121128_061027_outLine +BABEL_OP1_206_37064_20121128_224230_inLine +BABEL_OP1_206_37064_20121128_224230_outLine +BABEL_OP1_206_37064_20121128_233033_inLine +BABEL_OP1_206_37064_20121128_233033_outLine +BABEL_OP1_206_40092_20130604_005619_inLine +BABEL_OP1_206_40092_20130604_005619_outLine +BABEL_OP1_206_41741_20121123_161203_inLine +BABEL_OP1_206_41741_20121123_161203_outLine +BABEL_OP1_206_41745_20121206_052354_inLine +BABEL_OP1_206_41745_20121206_052354_outLine +BABEL_OP1_206_42231_20121213_215559_inLine +BABEL_OP1_206_42231_20121213_215559_outLine +BABEL_OP1_206_43920_20130527_173524_inLine +BABEL_OP1_206_43920_20130527_173524_outLine +BABEL_OP1_206_45106_20121207_233620_inLine +BABEL_OP1_206_45106_20121207_233620_outLine +BABEL_OP1_206_45140_20130602_193439_inLine +BABEL_OP1_206_45140_20130602_193439_outLine +BABEL_OP1_206_45777_20121220_211320_inLine +BABEL_OP1_206_45777_20121220_211320_outLine +BABEL_OP1_206_45843_20130103_065538_inLine +BABEL_OP1_206_45843_20130103_065538_outLine +BABEL_OP1_206_46625_20121206_223937_inLine +BABEL_OP1_206_46625_20121206_223937_outLine +BABEL_OP1_206_46712_20121129_221717_inLine +BABEL_OP1_206_46712_20121129_221717_outLine +BABEL_OP1_206_48200_20121218_202643_inLine +BABEL_OP1_206_48200_20121218_202643_outLine +BABEL_OP1_206_48758_20130601_165902_inLine +BABEL_OP1_206_48758_20130601_165902_outLine +BABEL_OP1_206_50962_20121205_031651_inLine +BABEL_OP1_206_50962_20121205_031651_outLine +BABEL_OP1_206_53842_20121203_222845_inLine +BABEL_OP1_206_53842_20121203_222845_outLine +BABEL_OP1_206_54040_20121216_233328_inLine +BABEL_OP1_206_54040_20121216_233328_outLine +BABEL_OP1_206_55742_20121129_210507_inLine +BABEL_OP1_206_55742_20121129_210507_outLine +BABEL_OP1_206_56090_20121130_064154_inLine +BABEL_OP1_206_56090_20121130_064154_outLine +BABEL_OP1_206_56743_20121205_030951_inLine +BABEL_OP1_206_56743_20121205_030951_outLine +BABEL_OP1_206_57650_20130605_164821_inLine +BABEL_OP1_206_57650_20130605_164821_outLine +BABEL_OP1_206_57654_20121201_024813_inLine +BABEL_OP1_206_57654_20121201_024813_outLine +BABEL_OP1_206_59993_20121218_222534_inLine +BABEL_OP1_206_59993_20121218_222534_outLine +BABEL_OP1_206_60282_20130604_201941_inLine +BABEL_OP1_206_60282_20130604_201941_outLine +BABEL_OP1_206_60836_20130523_194516_inLine +BABEL_OP1_206_60836_20130523_194516_outLine +BABEL_OP1_206_62155_20130301_010901_inLine +BABEL_OP1_206_62155_20130301_010901_outLine +BABEL_OP1_206_62835_20121201_223026_inLine +BABEL_OP1_206_62835_20121201_223026_outLine +BABEL_OP1_206_66967_20121128_215012_inLine +BABEL_OP1_206_66967_20121128_215012_outLine +BABEL_OP1_206_67842_20130523_231054_inLine +BABEL_OP1_206_67842_20130523_231054_outLine +BABEL_OP1_206_71282_20121219_154752_inLine +BABEL_OP1_206_71282_20121219_154752_outLine +BABEL_OP1_206_71333_20121219_195507_inLine +BABEL_OP1_206_71333_20121219_195507_outLine +BABEL_OP1_206_71333_20121219_202710_inLine +BABEL_OP1_206_71333_20121219_202710_outLine +BABEL_OP1_206_71333_20121220_020603_inLine +BABEL_OP1_206_71333_20121220_020603_outLine +BABEL_OP1_206_71704_20121203_210805_inLine +BABEL_OP1_206_71704_20121203_210805_outLine +BABEL_OP1_206_73042_20130528_223845_inLine +BABEL_OP1_206_73042_20130528_223845_outLine +BABEL_OP1_206_73622_20121203_233522_inLine +BABEL_OP1_206_73622_20121203_233522_outLine +BABEL_OP1_206_73837_20121202_232509_inLine +BABEL_OP1_206_73837_20121202_232509_outLine +BABEL_OP1_206_73837_20121202_234026_inLine +BABEL_OP1_206_73837_20121202_234026_outLine +BABEL_OP1_206_74111_20130527_210704_inLine +BABEL_OP1_206_74111_20130527_210704_outLine +BABEL_OP1_206_74641_20130601_192414_inLine +BABEL_OP1_206_74641_20130601_192414_outLine +BABEL_OP1_206_76773_20121219_022906_inLine +BABEL_OP1_206_76773_20121219_022906_outLine +BABEL_OP1_206_78630_20130420_211941_inLine +BABEL_OP1_206_78630_20130420_211941_outLine +BABEL_OP1_206_78976_20121206_005749_inLine +BABEL_OP1_206_78976_20121206_005749_outLine +BABEL_OP1_206_79820_20121127_235837_inLine +BABEL_OP1_206_79820_20121127_235837_outLine +BABEL_OP1_206_81392_20121219_022235_inLine +BABEL_OP1_206_81392_20121219_022235_outLine +BABEL_OP1_206_81404_20121215_230948_inLine +BABEL_OP1_206_81404_20121215_230948_outLine +BABEL_OP1_206_84125_20121201_213358_inLine +BABEL_OP1_206_84125_20121201_213358_outLine +BABEL_OP1_206_88873_20121129_222922_inLine +BABEL_OP1_206_88873_20121129_222922_outLine +BABEL_OP1_206_89045_20121201_221210_inLine +BABEL_OP1_206_89045_20121201_221210_outLine +BABEL_OP1_206_89045_20121201_222746_inLine +BABEL_OP1_206_89045_20121201_222746_outLine +BABEL_OP1_206_89372_20130103_022242_inLine +BABEL_OP1_206_89372_20130103_022242_outLine +BABEL_OP1_206_90935_20121207_230747_inLine +BABEL_OP1_206_90935_20121207_230747_outLine +BABEL_OP1_206_91593_20130602_212217_inLine +BABEL_OP1_206_91593_20130602_212217_outLine +BABEL_OP1_206_91884_20130531_175329_inLine +BABEL_OP1_206_91884_20130531_175329_outLine +BABEL_OP1_206_92698_20121128_234824_inLine +BABEL_OP1_206_92698_20121128_234824_outLine +BABEL_OP1_206_92698_20121129_000933_inLine +BABEL_OP1_206_92698_20121129_000933_outLine +BABEL_OP1_206_93153_20130524_203739_inLine +BABEL_OP1_206_93153_20130524_203739_outLine +BABEL_OP1_206_93946_20130531_215200_inLine +BABEL_OP1_206_93946_20130531_215200_outLine +BABEL_OP1_206_94002_20121208_002204_inLine +BABEL_OP1_206_94002_20121208_002204_outLine +BABEL_OP1_206_95399_20130528_171818_inLine +BABEL_OP1_206_95399_20130528_171818_outLine +BABEL_OP1_206_96205_20121217_165620_inLine +BABEL_OP1_206_96205_20121217_165620_outLine +BABEL_OP1_206_96205_20121217_171026_inLine +BABEL_OP1_206_96205_20121217_171026_outLine +BABEL_OP1_206_96504_20121207_214704_inLine +BABEL_OP1_206_96504_20121207_214704_outLine +BABEL_OP1_206_98580_20121201_203508_inLine +BABEL_OP1_206_98580_20121201_203508_outLine +BABEL_OP1_206_98888_20130603_202859_inLine +BABEL_OP1_206_98888_20130603_202859_outLine +BABEL_OP1_206_99401_20121123_043326_inLine +BABEL_OP1_206_99401_20121123_043326_outLine +BABEL_OP1_206_99732_20121220_033454_inLine +BABEL_OP1_206_99732_20121220_033454_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/evalpart1.list b/egs/babel/s5d/conf/lists/206-zulu/evalpart1.list new file mode 100644 index 00000000000..6b6bf451b3e --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/evalpart1.list @@ -0,0 +1,72 @@ +BABEL_OP1_206_13040_20121206_215505_inLine +BABEL_OP1_206_13040_20121206_215505_outLine +BABEL_OP1_206_13040_20121206_221350_inLine +BABEL_OP1_206_13040_20121206_221350_outLine +BABEL_OP1_206_18863_20121214_201427_inLine +BABEL_OP1_206_18863_20121214_201427_outLine +BABEL_OP1_206_19672_20121218_230453_inLine +BABEL_OP1_206_19672_20121218_230453_outLine +BABEL_OP1_206_21794_20121130_183726_inLine +BABEL_OP1_206_21794_20121130_183726_outLine +BABEL_OP1_206_23395_20130110_222315_inLine +BABEL_OP1_206_23395_20130110_222315_outLine +BABEL_OP1_206_23628_20121128_215213_inLine +BABEL_OP1_206_23628_20121128_215213_outLine +BABEL_OP1_206_30250_20121129_205052_inLine +BABEL_OP1_206_30250_20121129_205052_outLine +BABEL_OP1_206_31979_20130120_174010_inLine +BABEL_OP1_206_31979_20130120_174010_outLine +BABEL_OP1_206_35202_20121218_153251_inLine +BABEL_OP1_206_35202_20121218_153251_outLine +BABEL_OP1_206_37064_20121128_061027_inLine +BABEL_OP1_206_37064_20121128_061027_outLine +BABEL_OP1_206_37064_20121128_224230_inLine +BABEL_OP1_206_37064_20121128_224230_outLine +BABEL_OP1_206_37064_20121128_233033_inLine +BABEL_OP1_206_37064_20121128_233033_outLine +BABEL_OP1_206_41745_20121206_052354_inLine +BABEL_OP1_206_41745_20121206_052354_outLine +BABEL_OP1_206_45140_20130602_193439_inLine +BABEL_OP1_206_45140_20130602_193439_outLine +BABEL_OP1_206_45777_20121220_211320_inLine +BABEL_OP1_206_45777_20121220_211320_outLine +BABEL_OP1_206_48758_20130601_165902_inLine +BABEL_OP1_206_48758_20130601_165902_outLine +BABEL_OP1_206_55742_20121129_210507_inLine +BABEL_OP1_206_55742_20121129_210507_outLine +BABEL_OP1_206_57650_20130605_164821_inLine +BABEL_OP1_206_57650_20130605_164821_outLine +BABEL_OP1_206_57654_20121201_024813_inLine +BABEL_OP1_206_57654_20121201_024813_outLine +BABEL_OP1_206_62155_20130301_010901_inLine +BABEL_OP1_206_62155_20130301_010901_outLine +BABEL_OP1_206_62835_20121201_223026_inLine +BABEL_OP1_206_62835_20121201_223026_outLine +BABEL_OP1_206_71333_20121219_195507_inLine +BABEL_OP1_206_71333_20121219_195507_outLine +BABEL_OP1_206_71333_20121219_202710_inLine +BABEL_OP1_206_71333_20121219_202710_outLine +BABEL_OP1_206_71333_20121220_020603_inLine +BABEL_OP1_206_71333_20121220_020603_outLine +BABEL_OP1_206_71704_20121203_210805_inLine +BABEL_OP1_206_71704_20121203_210805_outLine +BABEL_OP1_206_73622_20121203_233522_inLine +BABEL_OP1_206_73622_20121203_233522_outLine +BABEL_OP1_206_73837_20121202_232509_inLine +BABEL_OP1_206_73837_20121202_232509_outLine +BABEL_OP1_206_73837_20121202_234026_inLine +BABEL_OP1_206_73837_20121202_234026_outLine +BABEL_OP1_206_78630_20130420_211941_inLine +BABEL_OP1_206_78630_20130420_211941_outLine +BABEL_OP1_206_78976_20121206_005749_inLine +BABEL_OP1_206_78976_20121206_005749_outLine +BABEL_OP1_206_81392_20121219_022235_inLine +BABEL_OP1_206_81392_20121219_022235_outLine +BABEL_OP1_206_88873_20121129_222922_inLine +BABEL_OP1_206_88873_20121129_222922_outLine +BABEL_OP1_206_90935_20121207_230747_inLine +BABEL_OP1_206_90935_20121207_230747_outLine +BABEL_OP1_206_98580_20121201_203508_inLine +BABEL_OP1_206_98580_20121201_203508_outLine +BABEL_OP1_206_98888_20130603_202859_inLine +BABEL_OP1_206_98888_20130603_202859_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list b/egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list new file mode 100644 index 00000000000..f47e8d654e1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list @@ -0,0 +1,829 @@ +BABEL_OP1_206_10901_20121128_230024_inLine +BABEL_OP1_206_10901_20121128_230024_outLine +BABEL_OP1_206_10901_20121129_003238_inLine +BABEL_OP1_206_10901_20121129_003238_outLine +BABEL_OP1_206_10966_20121205_213021_inLine +BABEL_OP1_206_10966_20121205_213021_outLine +BABEL_OP1_206_10966_20121205_214750_inLine +BABEL_OP1_206_10966_20121205_214750_outLine +BABEL_OP1_206_11581_20121213_020058_inLine +BABEL_OP1_206_11581_20121213_020058_outLine +BABEL_OP1_206_11797_20121207_001426_inLine +BABEL_OP1_206_11797_20121207_001426_outLine +BABEL_OP1_206_11797_20121207_002917_inLine +BABEL_OP1_206_11797_20121207_002917_outLine +BABEL_OP1_206_11859_20130602_013210_inLine +BABEL_OP1_206_11859_20130602_013210_outLine +BABEL_OP1_206_12242_20121218_022109_inLine +BABEL_OP1_206_12242_20121218_022109_outLine +BABEL_OP1_206_12851_20121215_010712_inLine +BABEL_OP1_206_12851_20121215_010712_outLine +BABEL_OP1_206_13030_20121129_225418_inLine +BABEL_OP1_206_13030_20121129_225418_outLine +BABEL_OP1_206_13184_20121216_223430_inLine +BABEL_OP1_206_13184_20121216_223430_outLine +BABEL_OP1_206_13184_20121216_224722_inLine +BABEL_OP1_206_13184_20121216_224722_outLine +BABEL_OP1_206_13483_20121219_205820_inLine +BABEL_OP1_206_13483_20121219_205820_outLine +BABEL_OP1_206_13483_20121219_212915_inLine +BABEL_OP1_206_13483_20121219_212915_outLine +BABEL_OP1_206_13490_20121221_005743_inLine +BABEL_OP1_206_13490_20121221_005743_outLine +BABEL_OP1_206_13744_20121205_205818_inLine +BABEL_OP1_206_13744_20121205_205818_outLine +BABEL_OP1_206_14137_20130118_010712_inLine +BABEL_OP1_206_14137_20130118_010712_outLine +BABEL_OP1_206_14137_20130122_014528_inLine +BABEL_OP1_206_14137_20130122_014528_outLine +BABEL_OP1_206_14179_20121210_224630_inLine +BABEL_OP1_206_14179_20121210_224630_outLine +BABEL_OP1_206_14440_20121218_231347_inLine +BABEL_OP1_206_14440_20121218_231347_outLine +BABEL_OP1_206_14719_20121213_040757_inLine +BABEL_OP1_206_14719_20121213_040757_outLine +BABEL_OP1_206_14729_20130531_183022_inLine +BABEL_OP1_206_14729_20130531_183022_outLine +BABEL_OP1_206_14807_20121221_150943_inLine +BABEL_OP1_206_14807_20121221_150943_outLine +BABEL_OP1_206_14814_20121129_203954_inLine +BABEL_OP1_206_14814_20121129_203954_outLine +BABEL_OP1_206_14899_20121203_021835_inLine +BABEL_OP1_206_14899_20121203_021835_outLine +BABEL_OP1_206_14929_20121203_232411_inLine +BABEL_OP1_206_14929_20121203_232411_outLine +BABEL_OP1_206_15024_20130527_234410_inLine +BABEL_OP1_206_15024_20130527_234410_outLine +BABEL_OP1_206_15324_20121208_010033_inLine +BABEL_OP1_206_15324_20121208_010033_outLine +BABEL_OP1_206_15702_20121214_225618_inLine +BABEL_OP1_206_15702_20121214_225618_outLine +BABEL_OP1_206_15702_20121214_231152_inLine +BABEL_OP1_206_15702_20121214_231152_outLine +BABEL_OP1_206_15702_20121214_232449_inLine +BABEL_OP1_206_15702_20121214_232449_outLine +BABEL_OP1_206_16149_20121201_010342_inLine +BABEL_OP1_206_16149_20121201_010342_outLine +BABEL_OP1_206_16467_20130531_200137_inLine +BABEL_OP1_206_16467_20130531_200137_outLine +BABEL_OP1_206_16475_20130121_210828_inLine +BABEL_OP1_206_16475_20130121_210828_outLine +BABEL_OP1_206_16475_20130121_212136_inLine +BABEL_OP1_206_16475_20130121_212136_outLine +BABEL_OP1_206_16839_20121217_170534_inLine +BABEL_OP1_206_16839_20121217_170534_outLine +BABEL_OP1_206_16886_20130524_232154_inLine +BABEL_OP1_206_16886_20130524_232154_outLine +BABEL_OP1_206_17032_20121219_220514_inLine +BABEL_OP1_206_17032_20121219_220514_outLine +BABEL_OP1_206_17280_20130527_191437_inLine +BABEL_OP1_206_17280_20130527_191437_outLine +BABEL_OP1_206_17440_20121227_213432_inLine +BABEL_OP1_206_17440_20121227_213432_outLine +BABEL_OP1_206_17472_20121214_193824_inLine +BABEL_OP1_206_17472_20121214_193824_outLine +BABEL_OP1_206_17567_20121209_205317_inLine +BABEL_OP1_206_17567_20121209_205317_outLine +BABEL_OP1_206_17567_20121209_211139_inLine +BABEL_OP1_206_17567_20121209_211139_outLine +BABEL_OP1_206_17615_20121214_193534_inLine +BABEL_OP1_206_17615_20121214_193534_outLine +BABEL_OP1_206_17881_20130121_005313_inLine +BABEL_OP1_206_17881_20130121_005313_outLine +BABEL_OP1_206_17923_20121130_214207_inLine +BABEL_OP1_206_17923_20121130_214207_outLine +BABEL_OP1_206_18291_20130604_183732_inLine +BABEL_OP1_206_18291_20130604_183732_outLine +BABEL_OP1_206_19722_20121130_203924_inLine +BABEL_OP1_206_19722_20121130_203924_outLine +BABEL_OP1_206_19773_20130101_015259_inLine +BABEL_OP1_206_19773_20130101_015259_outLine +BABEL_OP1_206_19818_20130529_204811_inLine +BABEL_OP1_206_19818_20130529_204811_outLine +BABEL_OP1_206_19877_20130123_175339_inLine +BABEL_OP1_206_19877_20130123_175339_outLine +BABEL_OP1_206_19877_20130123_181047_inLine +BABEL_OP1_206_19877_20130123_181047_outLine +BABEL_OP1_206_20682_20121213_030430_inLine +BABEL_OP1_206_20682_20121213_030430_outLine +BABEL_OP1_206_20800_20130523_220352_inLine +BABEL_OP1_206_20800_20130523_220352_outLine +BABEL_OP1_206_20916_20121205_203848_inLine +BABEL_OP1_206_20916_20121205_203848_outLine +BABEL_OP1_206_20922_20121214_231110_inLine +BABEL_OP1_206_20922_20121214_231110_outLine +BABEL_OP1_206_21004_20121210_215455_inLine +BABEL_OP1_206_21004_20121210_215455_outLine +BABEL_OP1_206_21004_20121210_223449_inLine +BABEL_OP1_206_21004_20121210_223449_outLine +BABEL_OP1_206_21206_20121220_001511_inLine +BABEL_OP1_206_21206_20121220_001511_outLine +BABEL_OP1_206_21327_20130111_022748_inLine +BABEL_OP1_206_21327_20130111_022748_outLine +BABEL_OP1_206_21892_20121213_235725_inLine +BABEL_OP1_206_21892_20121213_235725_outLine +BABEL_OP1_206_22494_20130530_004456_inLine +BABEL_OP1_206_22494_20130530_004456_outLine +BABEL_OP1_206_22624_20121219_210041_inLine +BABEL_OP1_206_22624_20121219_210041_outLine +BABEL_OP1_206_22826_20130121_231859_inLine +BABEL_OP1_206_22826_20130121_231859_outLine +BABEL_OP1_206_22826_20130121_233139_inLine +BABEL_OP1_206_22826_20130121_233139_outLine +BABEL_OP1_206_22965_20121128_011001_inLine +BABEL_OP1_206_22965_20121128_011001_outLine +BABEL_OP1_206_22965_20121128_012241_inLine +BABEL_OP1_206_22965_20121128_012241_outLine +BABEL_OP1_206_23006_20121203_004250_inLine +BABEL_OP1_206_23006_20121203_004250_outLine +BABEL_OP1_206_23006_20121203_073608_inLine +BABEL_OP1_206_23006_20121203_073608_outLine +BABEL_OP1_206_23092_20121227_211821_inLine +BABEL_OP1_206_23092_20121227_211821_outLine +BABEL_OP1_206_23151_20121217_034512_inLine +BABEL_OP1_206_23151_20121217_034512_outLine +BABEL_OP1_206_23153_20130102_224836_inLine +BABEL_OP1_206_23153_20130102_224836_outLine +BABEL_OP1_206_23190_20121219_204325_inLine +BABEL_OP1_206_23190_20121219_204325_outLine +BABEL_OP1_206_23239_20130118_000831_inLine +BABEL_OP1_206_23239_20130118_000831_outLine +BABEL_OP1_206_23505_20121203_010039_inLine +BABEL_OP1_206_23505_20121203_010039_outLine +BABEL_OP1_206_24253_20130120_235750_inLine +BABEL_OP1_206_24253_20130120_235750_outLine +BABEL_OP1_206_24253_20130121_000835_inLine +BABEL_OP1_206_24253_20130121_000835_outLine +BABEL_OP1_206_24253_20130121_012503_inLine +BABEL_OP1_206_24253_20130121_012503_outLine +BABEL_OP1_206_24323_20121214_212407_inLine +BABEL_OP1_206_24323_20121214_212407_outLine +BABEL_OP1_206_24323_20121214_213448_inLine +BABEL_OP1_206_24323_20121214_213448_outLine +BABEL_OP1_206_24532_20121201_203102_inLine +BABEL_OP1_206_24532_20121201_203102_outLine +BABEL_OP1_206_24569_20121210_211659_inLine +BABEL_OP1_206_24569_20121210_211659_outLine +BABEL_OP1_206_24590_20121201_210938_inLine +BABEL_OP1_206_24590_20121201_210938_outLine +BABEL_OP1_206_24590_20121201_215618_inLine +BABEL_OP1_206_24590_20121201_215618_outLine +BABEL_OP1_206_24605_20121218_201807_inLine +BABEL_OP1_206_24605_20121218_201807_outLine +BABEL_OP1_206_24982_20130603_194918_inLine +BABEL_OP1_206_24982_20130603_194918_outLine +BABEL_OP1_206_25412_20121210_201120_inLine +BABEL_OP1_206_25412_20121210_201120_outLine +BABEL_OP1_206_25412_20121210_203544_inLine +BABEL_OP1_206_25412_20121210_203544_outLine +BABEL_OP1_206_25496_20130529_000539_inLine +BABEL_OP1_206_25496_20130529_000539_outLine +BABEL_OP1_206_25698_20130603_011444_inLine +BABEL_OP1_206_25698_20130603_011444_outLine +BABEL_OP1_206_25719_20121215_000803_inLine +BABEL_OP1_206_25719_20121215_000803_outLine +BABEL_OP1_206_25767_20121204_021252_inLine +BABEL_OP1_206_25767_20121204_021252_outLine +BABEL_OP1_206_25961_20121202_232650_inLine +BABEL_OP1_206_25961_20121202_232650_outLine +BABEL_OP1_206_25961_20121202_234202_inLine +BABEL_OP1_206_25961_20121202_234202_outLine +BABEL_OP1_206_26206_20130529_172847_inLine +BABEL_OP1_206_26206_20130529_172847_outLine +BABEL_OP1_206_26388_20121202_191806_inLine +BABEL_OP1_206_26388_20121202_191806_outLine +BABEL_OP1_206_26836_20121201_210310_inLine +BABEL_OP1_206_26836_20121201_210310_outLine +BABEL_OP1_206_27042_20121219_230502_inLine +BABEL_OP1_206_27042_20121219_230502_outLine +BABEL_OP1_206_27082_20121220_012037_inLine +BABEL_OP1_206_27082_20121220_012037_outLine +BABEL_OP1_206_27125_20121203_012043_inLine +BABEL_OP1_206_27125_20121203_012043_outLine +BABEL_OP1_206_27203_20121214_210018_inLine +BABEL_OP1_206_27203_20121214_210018_outLine +BABEL_OP1_206_27590_20121216_180900_inLine +BABEL_OP1_206_27590_20121216_180900_outLine +BABEL_OP1_206_27841_20121216_014031_inLine +BABEL_OP1_206_27841_20121216_014031_outLine +BABEL_OP1_206_28303_20121128_201831_inLine +BABEL_OP1_206_28303_20121128_201831_outLine +BABEL_OP1_206_28419_20121207_221153_inLine +BABEL_OP1_206_28419_20121207_221153_outLine +BABEL_OP1_206_28775_20121203_022428_inLine +BABEL_OP1_206_28775_20121203_022428_outLine +BABEL_OP1_206_28945_20130118_003100_inLine +BABEL_OP1_206_28945_20130118_003100_outLine +BABEL_OP1_206_29023_20121201_234219_inLine +BABEL_OP1_206_29023_20121201_234219_outLine +BABEL_OP1_206_29039_20121220_013046_inLine +BABEL_OP1_206_29039_20121220_013046_outLine +BABEL_OP1_206_29135_20121219_224133_inLine +BABEL_OP1_206_29135_20121219_224133_outLine +BABEL_OP1_206_29323_20121219_201726_inLine +BABEL_OP1_206_29323_20121219_201726_outLine +BABEL_OP1_206_29323_20121219_203137_inLine +BABEL_OP1_206_29323_20121219_203137_outLine +BABEL_OP1_206_30395_20121206_014115_inLine +BABEL_OP1_206_30395_20121206_014115_outLine +BABEL_OP1_206_30869_20121227_221910_inLine +BABEL_OP1_206_30869_20121227_221910_outLine +BABEL_OP1_206_31109_20121224_061142_inLine +BABEL_OP1_206_31109_20121224_061142_outLine +BABEL_OP1_206_31490_20121128_234650_inLine +BABEL_OP1_206_31490_20121128_234650_outLine +BABEL_OP1_206_31624_20121123_081518_inLine +BABEL_OP1_206_31624_20121123_081518_outLine +BABEL_OP1_206_31628_20130528_194548_inLine +BABEL_OP1_206_31628_20130528_194548_outLine +BABEL_OP1_206_32122_20121128_184757_inLine +BABEL_OP1_206_32122_20121128_184757_outLine +BABEL_OP1_206_32301_20130530_191142_inLine +BABEL_OP1_206_32301_20130530_191142_outLine +BABEL_OP1_206_32328_20121215_181911_inLine +BABEL_OP1_206_32328_20121215_181911_outLine +BABEL_OP1_206_32708_20121231_225706_inLine +BABEL_OP1_206_32708_20121231_225706_outLine +BABEL_OP1_206_32837_20121213_221825_inLine +BABEL_OP1_206_32837_20121213_221825_outLine +BABEL_OP1_206_32837_20121213_223037_inLine +BABEL_OP1_206_32837_20121213_223037_outLine +BABEL_OP1_206_33111_20130601_200233_inLine +BABEL_OP1_206_33111_20130601_200233_outLine +BABEL_OP1_206_33273_20121129_201318_inLine +BABEL_OP1_206_33273_20121129_201318_outLine +BABEL_OP1_206_33355_20121130_055943_inLine +BABEL_OP1_206_33355_20121130_055943_outLine +BABEL_OP1_206_33672_20130524_171145_inLine +BABEL_OP1_206_33672_20130524_171145_outLine +BABEL_OP1_206_33704_20121213_214430_inLine +BABEL_OP1_206_33704_20121213_214430_outLine +BABEL_OP1_206_33840_20121213_230741_inLine +BABEL_OP1_206_33840_20121213_230741_outLine +BABEL_OP1_206_34197_20121128_232538_inLine +BABEL_OP1_206_34197_20121128_232538_outLine +BABEL_OP1_206_34328_20121202_184915_inLine +BABEL_OP1_206_34328_20121202_184915_outLine +BABEL_OP1_206_34564_20121214_020257_inLine +BABEL_OP1_206_34564_20121214_020257_outLine +BABEL_OP1_206_34679_20121206_000152_inLine +BABEL_OP1_206_34679_20121206_000152_outLine +BABEL_OP1_206_34826_20121215_005505_inLine +BABEL_OP1_206_34826_20121215_005505_outLine +BABEL_OP1_206_35008_20121216_210449_inLine +BABEL_OP1_206_35008_20121216_210449_outLine +BABEL_OP1_206_36505_20121213_222927_inLine +BABEL_OP1_206_36505_20121213_222927_outLine +BABEL_OP1_206_36894_20121128_201825_inLine +BABEL_OP1_206_36894_20121128_201825_outLine +BABEL_OP1_206_37598_20130111_224005_inLine +BABEL_OP1_206_37598_20130111_224005_outLine +BABEL_OP1_206_38431_20121214_013939_inLine +BABEL_OP1_206_38431_20121214_013939_outLine +BABEL_OP1_206_38554_20121123_025415_inLine +BABEL_OP1_206_38554_20121123_025415_outLine +BABEL_OP1_206_38689_20121217_013737_inLine +BABEL_OP1_206_38689_20121217_013737_outLine +BABEL_OP1_206_38878_20130530_172309_inLine +BABEL_OP1_206_38878_20130530_172309_outLine +BABEL_OP1_206_39059_20121215_230057_inLine +BABEL_OP1_206_39059_20121215_230057_outLine +BABEL_OP1_206_39059_20121216_000252_inLine +BABEL_OP1_206_39059_20121216_000252_outLine +BABEL_OP1_206_39307_20121207_024156_inLine +BABEL_OP1_206_39307_20121207_024156_outLine +BABEL_OP1_206_39426_20130120_232407_inLine +BABEL_OP1_206_39426_20130120_232407_outLine +BABEL_OP1_206_39426_20130120_233651_inLine +BABEL_OP1_206_39426_20130120_233651_outLine +BABEL_OP1_206_40557_20121218_025254_inLine +BABEL_OP1_206_40557_20121218_025254_outLine +BABEL_OP1_206_40713_20121129_215041_inLine +BABEL_OP1_206_40713_20121129_215041_outLine +BABEL_OP1_206_41097_20121215_173120_inLine +BABEL_OP1_206_41097_20121215_173120_outLine +BABEL_OP1_206_41174_20130604_193434_inLine +BABEL_OP1_206_41174_20130604_193434_outLine +BABEL_OP1_206_41233_20121215_001846_inLine +BABEL_OP1_206_41233_20121215_001846_outLine +BABEL_OP1_206_41598_20130102_233834_inLine +BABEL_OP1_206_41598_20130102_233834_outLine +BABEL_OP1_206_42029_20121220_181050_inLine +BABEL_OP1_206_42029_20121220_181050_outLine +BABEL_OP1_206_42434_20121202_195754_inLine +BABEL_OP1_206_42434_20121202_195754_outLine +BABEL_OP1_206_42434_20121202_202540_inLine +BABEL_OP1_206_42434_20121202_202540_outLine +BABEL_OP1_206_42619_20121213_204854_inLine +BABEL_OP1_206_42619_20121213_204854_outLine +BABEL_OP1_206_42771_20130601_203101_inLine +BABEL_OP1_206_42771_20130601_203101_outLine +BABEL_OP1_206_42834_20121219_015826_inLine +BABEL_OP1_206_42834_20121219_015826_outLine +BABEL_OP1_206_43286_20121125_054930_inLine +BABEL_OP1_206_43286_20121125_054930_outLine +BABEL_OP1_206_43286_20121125_060858_inLine +BABEL_OP1_206_43286_20121125_060858_outLine +BABEL_OP1_206_43286_20121126_003810_inLine +BABEL_OP1_206_43286_20121126_003810_outLine +BABEL_OP1_206_43368_20121128_203447_inLine +BABEL_OP1_206_43368_20121128_203447_outLine +BABEL_OP1_206_43784_20121230_224515_inLine +BABEL_OP1_206_43784_20121230_224515_outLine +BABEL_OP1_206_43788_20121223_235436_inLine +BABEL_OP1_206_43788_20121223_235436_outLine +BABEL_OP1_206_44477_20121228_020003_inLine +BABEL_OP1_206_44477_20121228_020003_outLine +BABEL_OP1_206_44619_20121129_201028_inLine +BABEL_OP1_206_44619_20121129_201028_outLine +BABEL_OP1_206_44619_20121129_203209_inLine +BABEL_OP1_206_44619_20121129_203209_outLine +BABEL_OP1_206_45235_20121213_044536_inLine +BABEL_OP1_206_45235_20121213_044536_outLine +BABEL_OP1_206_45536_20121212_023751_inLine +BABEL_OP1_206_45536_20121212_023751_outLine +BABEL_OP1_206_45560_20121210_054617_inLine +BABEL_OP1_206_45560_20121210_054617_outLine +BABEL_OP1_206_45770_20121205_213203_inLine +BABEL_OP1_206_45770_20121205_213203_outLine +BABEL_OP1_206_45851_20130123_013016_inLine +BABEL_OP1_206_45851_20130123_013016_outLine +BABEL_OP1_206_46066_20121218_015244_outLine +BABEL_OP1_206_46066_20121218_020520_inLine +BABEL_OP1_206_46066_20121218_020520_outLine +BABEL_OP1_206_46261_20130524_180914_inLine +BABEL_OP1_206_46261_20130524_180914_outLine +BABEL_OP1_206_46330_20121220_171612_inLine +BABEL_OP1_206_46330_20121220_171612_outLine +BABEL_OP1_206_46558_20121125_000809_inLine +BABEL_OP1_206_46558_20121125_000809_outLine +BABEL_OP1_206_46688_20121130_222025_inLine +BABEL_OP1_206_46688_20121130_222025_outLine +BABEL_OP1_206_46770_20121213_030348_inLine +BABEL_OP1_206_46770_20121213_030348_outLine +BABEL_OP1_206_46976_20121222_002626_inLine +BABEL_OP1_206_46976_20121222_002626_outLine +BABEL_OP1_206_47186_20121214_212658_inLine +BABEL_OP1_206_47186_20121214_212658_outLine +BABEL_OP1_206_47215_20121129_232526_inLine +BABEL_OP1_206_47215_20121129_232526_outLine +BABEL_OP1_206_47487_20121127_232736_inLine +BABEL_OP1_206_47487_20121127_232736_outLine +BABEL_OP1_206_47802_20121213_220928_inLine +BABEL_OP1_206_47802_20121213_220928_outLine +BABEL_OP1_206_47878_20121221_153159_inLine +BABEL_OP1_206_47878_20121221_153159_outLine +BABEL_OP1_206_48789_20121202_173639_inLine +BABEL_OP1_206_48789_20121202_173639_outLine +BABEL_OP1_206_48844_20121123_030435_inLine +BABEL_OP1_206_48844_20121123_030435_outLine +BABEL_OP1_206_48844_20121204_030447_inLine +BABEL_OP1_206_48844_20121204_030447_outLine +BABEL_OP1_206_49001_20121128_201907_inLine +BABEL_OP1_206_49001_20121128_201907_outLine +BABEL_OP1_206_49287_20121219_204754_inLine +BABEL_OP1_206_49287_20121219_204754_outLine +BABEL_OP1_206_49870_20130605_000829_inLine +BABEL_OP1_206_49870_20130605_000829_outLine +BABEL_OP1_206_49907_20121128_055731_inLine +BABEL_OP1_206_49907_20121128_055731_outLine +BABEL_OP1_206_49912_20130603_002155_inLine +BABEL_OP1_206_49912_20130603_002155_outLine +BABEL_OP1_206_50090_20121210_232617_inLine +BABEL_OP1_206_50090_20121210_232617_outLine +BABEL_OP1_206_50090_20121210_234419_inLine +BABEL_OP1_206_50090_20121210_234419_outLine +BABEL_OP1_206_50175_20130604_165733_inLine +BABEL_OP1_206_50175_20130604_165733_outLine +BABEL_OP1_206_50565_20121206_213949_inLine +BABEL_OP1_206_50565_20121206_213949_outLine +BABEL_OP1_206_50565_20121206_215103_inLine +BABEL_OP1_206_50565_20121206_215103_outLine +BABEL_OP1_206_50565_20121206_221547_inLine +BABEL_OP1_206_50565_20121206_221547_outLine +BABEL_OP1_206_50601_20121219_030519_inLine +BABEL_OP1_206_50601_20121219_030519_outLine +BABEL_OP1_206_50681_20121222_003908_inLine +BABEL_OP1_206_50681_20121222_003908_outLine +BABEL_OP1_206_50726_20130103_015437_inLine +BABEL_OP1_206_50726_20130103_015437_outLine +BABEL_OP1_206_51015_20121216_025307_inLine +BABEL_OP1_206_51015_20121216_025307_outLine +BABEL_OP1_206_51484_20121213_023814_inLine +BABEL_OP1_206_51484_20121213_023814_outLine +BABEL_OP1_206_51540_20121212_225359_inLine +BABEL_OP1_206_51540_20121212_225359_outLine +BABEL_OP1_206_51955_20121219_004818_inLine +BABEL_OP1_206_51955_20121219_004818_outLine +BABEL_OP1_206_52422_20121220_034724_inLine +BABEL_OP1_206_52422_20121220_034724_outLine +BABEL_OP1_206_52694_20130523_175759_inLine +BABEL_OP1_206_52694_20130523_175759_outLine +BABEL_OP1_206_52804_20121201_184720_inLine +BABEL_OP1_206_52804_20121201_184720_outLine +BABEL_OP1_206_52818_20121228_012038_inLine +BABEL_OP1_206_52818_20121228_012038_outLine +BABEL_OP1_206_52854_20121128_034458_inLine +BABEL_OP1_206_52854_20121128_034458_outLine +BABEL_OP1_206_52854_20121206_214928_inLine +BABEL_OP1_206_52854_20121206_214928_outLine +BABEL_OP1_206_52854_20121206_224251_inLine +BABEL_OP1_206_52854_20121206_224251_outLine +BABEL_OP1_206_52932_20121128_045304_inLine +BABEL_OP1_206_52932_20121128_045304_outLine +BABEL_OP1_206_52932_20121128_233739_inLine +BABEL_OP1_206_52932_20121128_233739_outLine +BABEL_OP1_206_53957_20130522_194644_inLine +BABEL_OP1_206_53957_20130522_194644_outLine +BABEL_OP1_206_54104_20130102_215440_inLine +BABEL_OP1_206_54104_20130102_215440_outLine +BABEL_OP1_206_54162_20121220_230656_inLine +BABEL_OP1_206_54162_20121220_230656_outLine +BABEL_OP1_206_54390_20121130_203012_inLine +BABEL_OP1_206_54390_20121130_203012_outLine +BABEL_OP1_206_54477_20121212_013137_inLine +BABEL_OP1_206_54477_20121212_013137_outLine +BABEL_OP1_206_54530_20130531_233153_inLine +BABEL_OP1_206_54530_20130531_233153_outLine +BABEL_OP1_206_54697_20121228_003256_inLine +BABEL_OP1_206_54697_20121228_003256_outLine +BABEL_OP1_206_54744_20130103_035406_inLine +BABEL_OP1_206_54744_20130103_035406_outLine +BABEL_OP1_206_54953_20121205_023337_inLine +BABEL_OP1_206_54953_20121205_023337_outLine +BABEL_OP1_206_55259_20130118_022049_inLine +BABEL_OP1_206_55259_20130118_022049_outLine +BABEL_OP1_206_55259_20130118_023307_inLine +BABEL_OP1_206_55259_20130118_023307_outLine +BABEL_OP1_206_55818_20121130_051150_inLine +BABEL_OP1_206_55818_20121130_051150_outLine +BABEL_OP1_206_55818_20121130_054331_inLine +BABEL_OP1_206_55818_20121130_054331_outLine +BABEL_OP1_206_55968_20121204_204317_inLine +BABEL_OP1_206_55968_20121204_204317_outLine +BABEL_OP1_206_55968_20121204_211213_inLine +BABEL_OP1_206_55968_20121204_211213_outLine +BABEL_OP1_206_56023_20121227_235521_inLine +BABEL_OP1_206_56023_20121227_235521_outLine +BABEL_OP1_206_56677_20130111_174028_inLine +BABEL_OP1_206_56677_20130111_174028_outLine +BABEL_OP1_206_57093_20121205_002300_inLine +BABEL_OP1_206_57093_20121205_002300_outLine +BABEL_OP1_206_57093_20121205_044909_inLine +BABEL_OP1_206_57093_20121205_044909_outLine +BABEL_OP1_206_57141_20121212_211734_inLine +BABEL_OP1_206_57141_20121212_211734_outLine +BABEL_OP1_206_57529_20121211_232002_inLine +BABEL_OP1_206_57529_20121211_232002_outLine +BABEL_OP1_206_57678_20121201_231032_inLine +BABEL_OP1_206_57678_20121201_231032_outLine +BABEL_OP1_206_58047_20121212_222839_inLine +BABEL_OP1_206_58047_20121212_222839_outLine +BABEL_OP1_206_58313_20121220_211354_inLine +BABEL_OP1_206_58313_20121220_211354_outLine +BABEL_OP1_206_58489_20121221_225602_inLine +BABEL_OP1_206_58489_20121221_225602_outLine +BABEL_OP1_206_58734_20121130_203502_inLine +BABEL_OP1_206_58734_20121130_203502_outLine +BABEL_OP1_206_58821_20130531_205929_inLine +BABEL_OP1_206_58821_20130531_205929_outLine +BABEL_OP1_206_60026_20121205_044105_inLine +BABEL_OP1_206_60026_20121205_044105_outLine +BABEL_OP1_206_60299_20130602_222928_inLine +BABEL_OP1_206_60299_20130602_222928_outLine +BABEL_OP1_206_60310_20121220_003756_inLine +BABEL_OP1_206_60310_20121220_003756_outLine +BABEL_OP1_206_60418_20130530_195743_inLine +BABEL_OP1_206_60418_20130530_195743_outLine +BABEL_OP1_206_61167_20121202_012318_inLine +BABEL_OP1_206_61167_20121202_012318_outLine +BABEL_OP1_206_61167_20121203_083125_inLine +BABEL_OP1_206_61167_20121203_083125_outLine +BABEL_OP1_206_61225_20121128_222308_inLine +BABEL_OP1_206_61225_20121128_222308_outLine +BABEL_OP1_206_61348_20121218_225731_inLine +BABEL_OP1_206_61348_20121218_225731_outLine +BABEL_OP1_206_61357_20130120_183001_inLine +BABEL_OP1_206_61357_20130120_183001_outLine +BABEL_OP1_206_61435_20121217_000451_inLine +BABEL_OP1_206_61435_20121217_000451_outLine +BABEL_OP1_206_61678_20121123_013649_inLine +BABEL_OP1_206_61678_20121123_013649_outLine +BABEL_OP1_206_61731_20121128_024803_inLine +BABEL_OP1_206_61731_20121128_024803_outLine +BABEL_OP1_206_61888_20130605_172611_inLine +BABEL_OP1_206_61888_20130605_172611_outLine +BABEL_OP1_206_62200_20130522_212226_inLine +BABEL_OP1_206_62200_20130522_212226_outLine +BABEL_OP1_206_62724_20121218_202436_inLine +BABEL_OP1_206_62724_20121218_202436_outLine +BABEL_OP1_206_62800_20121201_010750_inLine +BABEL_OP1_206_62800_20121201_010750_outLine +BABEL_OP1_206_62800_20121201_015047_inLine +BABEL_OP1_206_62800_20121201_015047_outLine +BABEL_OP1_206_62800_20121201_021942_inLine +BABEL_OP1_206_62800_20121201_021942_outLine +BABEL_OP1_206_62810_20121122_202600_inLine +BABEL_OP1_206_62810_20121122_202600_outLine +BABEL_OP1_206_63081_20121219_012926_inLine +BABEL_OP1_206_63081_20121219_012926_outLine +BABEL_OP1_206_63081_20121219_174450_inLine +BABEL_OP1_206_63081_20121219_174450_outLine +BABEL_OP1_206_63084_20121210_013516_inLine +BABEL_OP1_206_63084_20121210_013516_outLine +BABEL_OP1_206_63425_20121214_182639_inLine +BABEL_OP1_206_63425_20121214_182639_outLine +BABEL_OP1_206_63445_20121207_014019_inLine +BABEL_OP1_206_63445_20121207_014019_outLine +BABEL_OP1_206_63604_20130527_215715_inLine +BABEL_OP1_206_63604_20130527_215715_outLine +BABEL_OP1_206_63670_20121212_212623_inLine +BABEL_OP1_206_63670_20121212_212623_outLine +BABEL_OP1_206_63757_20121222_235730_inLine +BABEL_OP1_206_63757_20121222_235730_outLine +BABEL_OP1_206_63787_20130530_221300_inLine +BABEL_OP1_206_63787_20130530_221300_outLine +BABEL_OP1_206_63906_20130131_014942_inLine +BABEL_OP1_206_63906_20130131_014942_outLine +BABEL_OP1_206_64014_20130122_011323_inLine +BABEL_OP1_206_64014_20130122_011323_outLine +BABEL_OP1_206_64768_20121207_223917_inLine +BABEL_OP1_206_64768_20121207_223917_outLine +BABEL_OP1_206_65064_20121221_000939_inLine +BABEL_OP1_206_65064_20121221_000939_outLine +BABEL_OP1_206_65723_20121129_222430_inLine +BABEL_OP1_206_65723_20121129_222430_outLine +BABEL_OP1_206_65882_20121201_174526_inLine +BABEL_OP1_206_65882_20121201_174526_outLine +BABEL_OP1_206_66001_20130103_012213_inLine +BABEL_OP1_206_66001_20130103_012213_outLine +BABEL_OP1_206_66045_20121129_223013_inLine +BABEL_OP1_206_66045_20121129_223013_outLine +BABEL_OP1_206_66519_20121202_220401_inLine +BABEL_OP1_206_66519_20121202_220401_outLine +BABEL_OP1_206_66916_20130118_005447_inLine +BABEL_OP1_206_66916_20130118_005447_outLine +BABEL_OP1_206_66916_20130118_010520_inLine +BABEL_OP1_206_66916_20130118_010520_outLine +BABEL_OP1_206_67622_20121206_210526_inLine +BABEL_OP1_206_67622_20121206_210526_outLine +BABEL_OP1_206_67659_20121219_201336_inLine +BABEL_OP1_206_67659_20121219_201336_outLine +BABEL_OP1_206_68306_20121213_205817_inLine +BABEL_OP1_206_68306_20121213_205817_outLine +BABEL_OP1_206_68385_20121123_231120_inLine +BABEL_OP1_206_68385_20121123_231120_outLine +BABEL_OP1_206_68627_20130122_023725_inLine +BABEL_OP1_206_68627_20130122_023725_outLine +BABEL_OP1_206_68748_20121212_025750_inLine +BABEL_OP1_206_68748_20121212_025750_outLine +BABEL_OP1_206_68924_20121228_001758_inLine +BABEL_OP1_206_68924_20121228_001758_outLine +BABEL_OP1_206_69578_20121214_002009_inLine +BABEL_OP1_206_69578_20121214_002009_outLine +BABEL_OP1_206_69992_20130529_181609_inLine +BABEL_OP1_206_69992_20130529_181609_outLine +BABEL_OP1_206_70121_20121219_215051_inLine +BABEL_OP1_206_70121_20121219_215051_outLine +BABEL_OP1_206_70121_20121219_220824_inLine +BABEL_OP1_206_70121_20121219_220824_outLine +BABEL_OP1_206_70251_20121219_044415_inLine +BABEL_OP1_206_70251_20121219_044415_outLine +BABEL_OP1_206_70343_20121221_023826_inLine +BABEL_OP1_206_70343_20121221_023826_outLine +BABEL_OP1_206_70386_20121207_232647_inLine +BABEL_OP1_206_70386_20121207_232647_outLine +BABEL_OP1_206_71067_20121209_210046_inLine +BABEL_OP1_206_71067_20121209_210046_outLine +BABEL_OP1_206_71067_20121209_214030_inLine +BABEL_OP1_206_71067_20121209_214030_outLine +BABEL_OP1_206_71566_20130604_214443_inLine +BABEL_OP1_206_71566_20130604_214443_outLine +BABEL_OP1_206_72110_20121221_232617_inLine +BABEL_OP1_206_72110_20121221_232617_outLine +BABEL_OP1_206_72319_20130123_022502_inLine +BABEL_OP1_206_72319_20130123_022502_outLine +BABEL_OP1_206_72324_20130602_184851_inLine +BABEL_OP1_206_72324_20130602_184851_outLine +BABEL_OP1_206_72844_20121130_193956_inLine +BABEL_OP1_206_72844_20121130_193956_outLine +BABEL_OP1_206_73005_20130122_021229_inLine +BABEL_OP1_206_73005_20130122_021229_outLine +BABEL_OP1_206_73072_20121205_231914_inLine +BABEL_OP1_206_73072_20121205_231914_outLine +BABEL_OP1_206_73258_20130120_170200_inLine +BABEL_OP1_206_73258_20130120_170200_outLine +BABEL_OP1_206_73301_20130529_214428_inLine +BABEL_OP1_206_73301_20130529_214428_outLine +BABEL_OP1_206_73485_20130122_235208_inLine +BABEL_OP1_206_73485_20130122_235208_outLine +BABEL_OP1_206_73591_20121117_212751_inLine +BABEL_OP1_206_73591_20121117_212751_outLine +BABEL_OP1_206_73964_20130317_202534_inLine +BABEL_OP1_206_73964_20130317_202534_outLine +BABEL_OP1_206_74886_20121128_205141_inLine +BABEL_OP1_206_74886_20121128_205141_outLine +BABEL_OP1_206_75064_20121129_233512_inLine +BABEL_OP1_206_75064_20121129_233512_outLine +BABEL_OP1_206_75505_20130522_234600_inLine +BABEL_OP1_206_75505_20130522_234600_outLine +BABEL_OP1_206_75993_20121128_223040_inLine +BABEL_OP1_206_75993_20121128_223040_outLine +BABEL_OP1_206_76126_20121219_020552_inLine +BABEL_OP1_206_76126_20121219_020552_outLine +BABEL_OP1_206_76238_20130111_190815_inLine +BABEL_OP1_206_76238_20130111_190815_outLine +BABEL_OP1_206_76372_20130603_190448_inLine +BABEL_OP1_206_76372_20130603_190448_outLine +BABEL_OP1_206_76437_20121117_202446_inLine +BABEL_OP1_206_76437_20121117_202446_outLine +BABEL_OP1_206_77730_20130107_234021_inLine +BABEL_OP1_206_77730_20130107_234021_outLine +BABEL_OP1_206_77803_20121130_005638_inLine +BABEL_OP1_206_77803_20121130_005638_outLine +BABEL_OP1_206_78398_20121206_003319_inLine +BABEL_OP1_206_78398_20121206_003319_outLine +BABEL_OP1_206_78544_20121220_000743_inLine +BABEL_OP1_206_78544_20121220_000743_outLine +BABEL_OP1_206_78943_20121129_231930_inLine +BABEL_OP1_206_78943_20121129_231930_outLine +BABEL_OP1_206_79080_20121212_205306_inLine +BABEL_OP1_206_79080_20121212_205306_outLine +BABEL_OP1_206_79131_20130123_003404_inLine +BABEL_OP1_206_79131_20130123_003404_outLine +BABEL_OP1_206_79167_20130602_202526_inLine +BABEL_OP1_206_79167_20130602_202526_outLine +BABEL_OP1_206_79367_20121204_001524_inLine +BABEL_OP1_206_79367_20121204_001524_outLine +BABEL_OP1_206_79367_20121204_004137_inLine +BABEL_OP1_206_79367_20121204_004137_outLine +BABEL_OP1_206_79898_20130524_002505_inLine +BABEL_OP1_206_79898_20130524_002505_outLine +BABEL_OP1_206_80241_20130604_001309_inLine +BABEL_OP1_206_80241_20130604_001309_outLine +BABEL_OP1_206_80439_20130527_182722_inLine +BABEL_OP1_206_80439_20130527_182722_outLine +BABEL_OP1_206_80559_20121206_232755_inLine +BABEL_OP1_206_80559_20121206_232755_outLine +BABEL_OP1_206_80781_20121219_233131_inLine +BABEL_OP1_206_80781_20121219_233131_outLine +BABEL_OP1_206_80881_20121204_030141_inLine +BABEL_OP1_206_80881_20121204_030141_outLine +BABEL_OP1_206_81435_20121220_204044_inLine +BABEL_OP1_206_81435_20121220_204044_outLine +BABEL_OP1_206_82035_20121220_195943_inLine +BABEL_OP1_206_82035_20121220_195943_outLine +BABEL_OP1_206_82138_20121129_223223_inLine +BABEL_OP1_206_82138_20121129_223223_outLine +BABEL_OP1_206_82303_20130531_191551_inLine +BABEL_OP1_206_82303_20130531_191551_outLine +BABEL_OP1_206_82391_20121221_015423_inLine +BABEL_OP1_206_82391_20121221_015423_outLine +BABEL_OP1_206_82425_20121129_212519_inLine +BABEL_OP1_206_82425_20121129_212519_outLine +BABEL_OP1_206_82473_20121206_004738_inLine +BABEL_OP1_206_82473_20121206_004738_outLine +BABEL_OP1_206_82622_20130604_222219_inLine +BABEL_OP1_206_82622_20130604_222219_outLine +BABEL_OP1_206_83455_20121205_024244_inLine +BABEL_OP1_206_83455_20121205_024244_outLine +BABEL_OP1_206_84547_20121206_225105_inLine +BABEL_OP1_206_84547_20121206_225105_outLine +BABEL_OP1_206_84605_20121129_212603_inLine +BABEL_OP1_206_84605_20121129_212603_outLine +BABEL_OP1_206_84805_20121214_221155_inLine +BABEL_OP1_206_84805_20121214_221155_outLine +BABEL_OP1_206_85028_20121212_014236_inLine +BABEL_OP1_206_85028_20121212_014236_outLine +BABEL_OP1_206_85248_20121217_174710_inLine +BABEL_OP1_206_85248_20121217_174710_outLine +BABEL_OP1_206_85322_20130530_233851_inLine +BABEL_OP1_206_85322_20130530_233851_outLine +BABEL_OP1_206_85647_20121206_022317_inLine +BABEL_OP1_206_85647_20121206_022317_outLine +BABEL_OP1_206_85647_20121206_024354_inLine +BABEL_OP1_206_85647_20121206_024354_outLine +BABEL_OP1_206_85651_20130420_232505_inLine +BABEL_OP1_206_85651_20130420_232505_outLine +BABEL_OP1_206_86191_20121205_001218_inLine +BABEL_OP1_206_86191_20121205_001218_outLine +BABEL_OP1_206_86321_20121212_025212_inLine +BABEL_OP1_206_86321_20121212_025212_outLine +BABEL_OP1_206_86433_20121220_215310_inLine +BABEL_OP1_206_86433_20121220_215310_outLine +BABEL_OP1_206_86433_20121220_225718_inLine +BABEL_OP1_206_86433_20121220_225718_outLine +BABEL_OP1_206_86472_20121221_010912_inLine +BABEL_OP1_206_86472_20121221_010912_outLine +BABEL_OP1_206_86635_20121218_223238_inLine +BABEL_OP1_206_86635_20121218_223238_outLine +BABEL_OP1_206_86635_20121218_230141_inLine +BABEL_OP1_206_86635_20121218_230141_outLine +BABEL_OP1_206_86715_20130602_174900_inLine +BABEL_OP1_206_86715_20130602_174900_outLine +BABEL_OP1_206_86722_20121204_231838_inLine +BABEL_OP1_206_86722_20121204_231838_outLine +BABEL_OP1_206_86860_20130122_004822_inLine +BABEL_OP1_206_86860_20130122_004822_outLine +BABEL_OP1_206_86952_20130601_175321_inLine +BABEL_OP1_206_86952_20130601_175321_outLine +BABEL_OP1_206_87073_20130102_212334_inLine +BABEL_OP1_206_87073_20130102_212334_outLine +BABEL_OP1_206_87074_20121128_194554_inLine +BABEL_OP1_206_87074_20121128_194554_outLine +BABEL_OP1_206_87280_20121207_231125_inLine +BABEL_OP1_206_87280_20121207_231125_outLine +BABEL_OP1_206_87298_20121129_212519_inLine +BABEL_OP1_206_87298_20121129_212519_outLine +BABEL_OP1_206_87298_20121129_213610_inLine +BABEL_OP1_206_87298_20121129_213610_outLine +BABEL_OP1_206_87470_20121203_052237_inLine +BABEL_OP1_206_87470_20121203_052237_outLine +BABEL_OP1_206_87871_20121220_222250_inLine +BABEL_OP1_206_87871_20121220_222250_outLine +BABEL_OP1_206_87921_20121221_003205_inLine +BABEL_OP1_206_87921_20121221_003205_outLine +BABEL_OP1_206_88260_20121208_204256_inLine +BABEL_OP1_206_88260_20121208_204256_outLine +BABEL_OP1_206_88372_20130120_230911_inLine +BABEL_OP1_206_88372_20130120_230911_outLine +BABEL_OP1_206_88925_20130603_230637_inLine +BABEL_OP1_206_88925_20130603_230637_outLine +BABEL_OP1_206_89575_20121220_211420_inLine +BABEL_OP1_206_89575_20121220_211420_outLine +BABEL_OP1_206_89665_20121208_212046_inLine +BABEL_OP1_206_89665_20121208_212046_outLine +BABEL_OP1_206_89943_20121127_034521_inLine +BABEL_OP1_206_89943_20121127_034521_outLine +BABEL_OP1_206_89943_20121128_015307_inLine +BABEL_OP1_206_89943_20121128_015307_outLine +BABEL_OP1_206_90417_20130605_185956_inLine +BABEL_OP1_206_90417_20130605_185956_outLine +BABEL_OP1_206_90572_20130618_045832_inLine +BABEL_OP1_206_90572_20130618_045832_outLine +BABEL_OP1_206_90739_20130604_174758_inLine +BABEL_OP1_206_90739_20130604_174758_outLine +BABEL_OP1_206_90760_20130525_001351_inLine +BABEL_OP1_206_90760_20130525_001351_outLine +BABEL_OP1_206_91080_20121220_024658_inLine +BABEL_OP1_206_91080_20121220_024658_outLine +BABEL_OP1_206_91125_20121123_063516_inLine +BABEL_OP1_206_91125_20121123_063516_outLine +BABEL_OP1_206_91336_20121205_221404_inLine +BABEL_OP1_206_91336_20121205_221404_outLine +BABEL_OP1_206_91581_20121209_193208_inLine +BABEL_OP1_206_91581_20121209_193208_outLine +BABEL_OP1_206_92096_20130123_010912_inLine +BABEL_OP1_206_92096_20130123_010912_outLine +BABEL_OP1_206_92459_20130529_223322_inLine +BABEL_OP1_206_92459_20130529_223322_outLine +BABEL_OP1_206_92527_20121128_232151_inLine +BABEL_OP1_206_92527_20121128_232151_outLine +BABEL_OP1_206_92527_20121128_234105_inLine +BABEL_OP1_206_92527_20121128_234105_outLine +BABEL_OP1_206_92557_20121213_005100_inLine +BABEL_OP1_206_92557_20121213_005100_outLine +BABEL_OP1_206_92740_20121211_184826_inLine +BABEL_OP1_206_92740_20121211_184826_outLine +BABEL_OP1_206_93224_20121211_003624_inLine +BABEL_OP1_206_93224_20121211_003624_outLine +BABEL_OP1_206_93411_20121220_002408_inLine +BABEL_OP1_206_93411_20121220_002408_outLine +BABEL_OP1_206_93632_20121212_021207_inLine +BABEL_OP1_206_93632_20121212_021207_outLine +BABEL_OP1_206_93858_20130605_005238_inLine +BABEL_OP1_206_93858_20130605_005238_outLine +BABEL_OP1_206_93964_20121205_235339_inLine +BABEL_OP1_206_93964_20121205_235339_outLine +BABEL_OP1_206_94025_20121213_025224_inLine +BABEL_OP1_206_94025_20121213_025224_outLine +BABEL_OP1_206_94745_20130531_014707_inLine +BABEL_OP1_206_94745_20130531_014707_outLine +BABEL_OP1_206_94869_20121205_203951_inLine +BABEL_OP1_206_94869_20121205_203951_outLine +BABEL_OP1_206_95028_20130601_222202_inLine +BABEL_OP1_206_95028_20130601_222202_outLine +BABEL_OP1_206_95231_20130601_230414_inLine +BABEL_OP1_206_95231_20130601_230414_outLine +BABEL_OP1_206_95446_20121220_221335_inLine +BABEL_OP1_206_95446_20121220_221335_outLine +BABEL_OP1_206_96730_20121220_213139_inLine +BABEL_OP1_206_96730_20121220_213139_outLine +BABEL_OP1_206_96910_20121202_211324_inLine +BABEL_OP1_206_96910_20121202_211324_outLine +BABEL_OP1_206_97376_20121220_234456_inLine +BABEL_OP1_206_97376_20121220_234456_outLine +BABEL_OP1_206_97772_20121123_064042_inLine +BABEL_OP1_206_97772_20121123_064042_outLine +BABEL_OP1_206_98311_20130528_182109_inLine +BABEL_OP1_206_98311_20130528_182109_outLine +BABEL_OP1_206_98390_20121123_064010_inLine +BABEL_OP1_206_98390_20121123_064010_outLine +BABEL_OP1_206_98489_20121201_220216_inLine +BABEL_OP1_206_98489_20121201_220216_outLine +BABEL_OP1_206_99289_20130123_161855_inLine +BABEL_OP1_206_99289_20130123_161855_outLine +BABEL_OP1_206_99289_20130123_163456_inLine +BABEL_OP1_206_99289_20130123_163456_outLine +BABEL_OP1_206_99955_20121219_002822_inLine +BABEL_OP1_206_99955_20121219_002822_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list new file mode 100644 index 00000000000..37be6f9253e --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list @@ -0,0 +1,124 @@ +BABEL_OP1_206_13030_20121129_225418_inLine +BABEL_OP1_206_13030_20121129_225418_outLine +BABEL_OP1_206_14440_20121218_231347_inLine +BABEL_OP1_206_14440_20121218_231347_outLine +BABEL_OP1_206_15324_20121208_010033_inLine +BABEL_OP1_206_15324_20121208_010033_outLine +BABEL_OP1_206_17440_20121227_213432_inLine +BABEL_OP1_206_17440_20121227_213432_outLine +BABEL_OP1_206_17923_20121130_214207_inLine +BABEL_OP1_206_17923_20121130_214207_outLine +BABEL_OP1_206_18291_20130604_183732_inLine +BABEL_OP1_206_18291_20130604_183732_outLine +BABEL_OP1_206_20682_20121213_030430_inLine +BABEL_OP1_206_20682_20121213_030430_outLine +BABEL_OP1_206_20800_20130523_220352_inLine +BABEL_OP1_206_20800_20130523_220352_outLine +BABEL_OP1_206_23151_20121217_034512_inLine +BABEL_OP1_206_23151_20121217_034512_outLine +BABEL_OP1_206_24605_20121218_201807_inLine +BABEL_OP1_206_24605_20121218_201807_outLine +BABEL_OP1_206_26206_20130529_172847_inLine +BABEL_OP1_206_26206_20130529_172847_outLine +BABEL_OP1_206_27082_20121220_012037_inLine +BABEL_OP1_206_27082_20121220_012037_outLine +BABEL_OP1_206_28419_20121207_221153_inLine +BABEL_OP1_206_28419_20121207_221153_outLine +BABEL_OP1_206_28775_20121203_022428_inLine +BABEL_OP1_206_28775_20121203_022428_outLine +BABEL_OP1_206_31624_20121123_081518_inLine +BABEL_OP1_206_31624_20121123_081518_outLine +BABEL_OP1_206_32708_20121231_225706_inLine +BABEL_OP1_206_32708_20121231_225706_outLine +BABEL_OP1_206_34564_20121214_020257_inLine +BABEL_OP1_206_34564_20121214_020257_outLine +BABEL_OP1_206_36505_20121213_222927_inLine +BABEL_OP1_206_36505_20121213_222927_outLine +BABEL_OP1_206_38431_20121214_013939_inLine +BABEL_OP1_206_38431_20121214_013939_outLine +BABEL_OP1_206_45560_20121210_054617_inLine +BABEL_OP1_206_45560_20121210_054617_outLine +BABEL_OP1_206_45770_20121205_213203_inLine +BABEL_OP1_206_45770_20121205_213203_outLine +BABEL_OP1_206_47186_20121214_212658_inLine +BABEL_OP1_206_47186_20121214_212658_outLine +BABEL_OP1_206_47215_20121129_232526_inLine +BABEL_OP1_206_47215_20121129_232526_outLine +BABEL_OP1_206_48789_20121202_173639_inLine +BABEL_OP1_206_48789_20121202_173639_outLine +BABEL_OP1_206_50175_20130604_165733_inLine +BABEL_OP1_206_50175_20130604_165733_outLine +BABEL_OP1_206_50601_20121219_030519_inLine +BABEL_OP1_206_50601_20121219_030519_outLine +BABEL_OP1_206_50726_20130103_015437_inLine +BABEL_OP1_206_50726_20130103_015437_outLine +BABEL_OP1_206_51540_20121212_225359_inLine +BABEL_OP1_206_51540_20121212_225359_outLine +BABEL_OP1_206_52694_20130523_175759_inLine +BABEL_OP1_206_52694_20130523_175759_outLine +BABEL_OP1_206_53957_20130522_194644_inLine +BABEL_OP1_206_53957_20130522_194644_outLine +BABEL_OP1_206_54744_20130103_035406_inLine +BABEL_OP1_206_54744_20130103_035406_outLine +BABEL_OP1_206_55818_20121130_051150_inLine +BABEL_OP1_206_55818_20121130_051150_outLine +BABEL_OP1_206_55818_20121130_054331_inLine +BABEL_OP1_206_55818_20121130_054331_outLine +BABEL_OP1_206_57678_20121201_231032_inLine +BABEL_OP1_206_57678_20121201_231032_outLine +BABEL_OP1_206_60418_20130530_195743_inLine +BABEL_OP1_206_60418_20130530_195743_outLine +BABEL_OP1_206_61225_20121128_222308_inLine +BABEL_OP1_206_61225_20121128_222308_outLine +BABEL_OP1_206_63081_20121219_012926_inLine +BABEL_OP1_206_63081_20121219_012926_outLine +BABEL_OP1_206_63081_20121219_174450_inLine +BABEL_OP1_206_63081_20121219_174450_outLine +BABEL_OP1_206_63445_20121207_014019_inLine +BABEL_OP1_206_63445_20121207_014019_outLine +BABEL_OP1_206_63604_20130527_215715_inLine +BABEL_OP1_206_63604_20130527_215715_outLine +BABEL_OP1_206_65723_20121129_222430_inLine +BABEL_OP1_206_65723_20121129_222430_outLine +BABEL_OP1_206_65882_20121201_174526_inLine +BABEL_OP1_206_65882_20121201_174526_outLine +BABEL_OP1_206_66519_20121202_220401_inLine +BABEL_OP1_206_66519_20121202_220401_outLine +BABEL_OP1_206_67659_20121219_201336_inLine +BABEL_OP1_206_67659_20121219_201336_outLine +BABEL_OP1_206_73072_20121205_231914_inLine +BABEL_OP1_206_73072_20121205_231914_outLine +BABEL_OP1_206_73964_20130317_202534_inLine +BABEL_OP1_206_73964_20130317_202534_outLine +BABEL_OP1_206_76372_20130603_190448_inLine +BABEL_OP1_206_76372_20130603_190448_outLine +BABEL_OP1_206_77730_20130107_234021_inLine +BABEL_OP1_206_77730_20130107_234021_outLine +BABEL_OP1_206_79898_20130524_002505_inLine +BABEL_OP1_206_79898_20130524_002505_outLine +BABEL_OP1_206_80241_20130604_001309_inLine +BABEL_OP1_206_80241_20130604_001309_outLine +BABEL_OP1_206_80881_20121204_030141_inLine +BABEL_OP1_206_80881_20121204_030141_outLine +BABEL_OP1_206_85248_20121217_174710_inLine +BABEL_OP1_206_85248_20121217_174710_outLine +BABEL_OP1_206_86860_20130122_004822_inLine +BABEL_OP1_206_86860_20130122_004822_outLine +BABEL_OP1_206_86952_20130601_175321_inLine +BABEL_OP1_206_86952_20130601_175321_outLine +BABEL_OP1_206_87074_20121128_194554_inLine +BABEL_OP1_206_87074_20121128_194554_outLine +BABEL_OP1_206_87280_20121207_231125_inLine +BABEL_OP1_206_87280_20121207_231125_outLine +BABEL_OP1_206_90417_20130605_185956_inLine +BABEL_OP1_206_90417_20130605_185956_outLine +BABEL_OP1_206_91080_20121220_024658_inLine +BABEL_OP1_206_91080_20121220_024658_outLine +BABEL_OP1_206_91581_20121209_193208_inLine +BABEL_OP1_206_91581_20121209_193208_outLine +BABEL_OP1_206_92096_20130123_010912_inLine +BABEL_OP1_206_92096_20130123_010912_outLine +BABEL_OP1_206_93224_20121211_003624_inLine +BABEL_OP1_206_93224_20121211_003624_outLine +BABEL_OP1_206_98489_20121201_220216_inLine +BABEL_OP1_206_98489_20121201_220216_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..dd4d5d3c445 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list @@ -0,0 +1,705 @@ +BABEL_OP1_206_10901_20121128_230024_inLine +BABEL_OP1_206_10901_20121128_230024_outLine +BABEL_OP1_206_10901_20121129_003238_inLine +BABEL_OP1_206_10901_20121129_003238_outLine +BABEL_OP1_206_10966_20121205_213021_inLine +BABEL_OP1_206_10966_20121205_213021_outLine +BABEL_OP1_206_10966_20121205_214750_inLine +BABEL_OP1_206_10966_20121205_214750_outLine +BABEL_OP1_206_11581_20121213_020058_inLine +BABEL_OP1_206_11581_20121213_020058_outLine +BABEL_OP1_206_11797_20121207_001426_inLine +BABEL_OP1_206_11797_20121207_001426_outLine +BABEL_OP1_206_11797_20121207_002917_inLine +BABEL_OP1_206_11797_20121207_002917_outLine +BABEL_OP1_206_11859_20130602_013210_inLine +BABEL_OP1_206_11859_20130602_013210_outLine +BABEL_OP1_206_12242_20121218_022109_inLine +BABEL_OP1_206_12242_20121218_022109_outLine +BABEL_OP1_206_12851_20121215_010712_inLine +BABEL_OP1_206_12851_20121215_010712_outLine +BABEL_OP1_206_13184_20121216_223430_inLine +BABEL_OP1_206_13184_20121216_223430_outLine +BABEL_OP1_206_13184_20121216_224722_inLine +BABEL_OP1_206_13184_20121216_224722_outLine +BABEL_OP1_206_13483_20121219_205820_inLine +BABEL_OP1_206_13483_20121219_205820_outLine +BABEL_OP1_206_13483_20121219_212915_inLine +BABEL_OP1_206_13483_20121219_212915_outLine +BABEL_OP1_206_13490_20121221_005743_inLine +BABEL_OP1_206_13490_20121221_005743_outLine +BABEL_OP1_206_13744_20121205_205818_inLine +BABEL_OP1_206_13744_20121205_205818_outLine +BABEL_OP1_206_14137_20130118_010712_inLine +BABEL_OP1_206_14137_20130118_010712_outLine +BABEL_OP1_206_14137_20130122_014528_inLine +BABEL_OP1_206_14137_20130122_014528_outLine +BABEL_OP1_206_14179_20121210_224630_inLine +BABEL_OP1_206_14179_20121210_224630_outLine +BABEL_OP1_206_14719_20121213_040757_inLine +BABEL_OP1_206_14719_20121213_040757_outLine +BABEL_OP1_206_14729_20130531_183022_inLine +BABEL_OP1_206_14729_20130531_183022_outLine +BABEL_OP1_206_14807_20121221_150943_inLine +BABEL_OP1_206_14807_20121221_150943_outLine +BABEL_OP1_206_14814_20121129_203954_inLine +BABEL_OP1_206_14814_20121129_203954_outLine +BABEL_OP1_206_14899_20121203_021835_inLine +BABEL_OP1_206_14899_20121203_021835_outLine +BABEL_OP1_206_14929_20121203_232411_inLine +BABEL_OP1_206_14929_20121203_232411_outLine +BABEL_OP1_206_15024_20130527_234410_inLine +BABEL_OP1_206_15024_20130527_234410_outLine +BABEL_OP1_206_15702_20121214_225618_inLine +BABEL_OP1_206_15702_20121214_225618_outLine +BABEL_OP1_206_15702_20121214_231152_inLine +BABEL_OP1_206_15702_20121214_231152_outLine +BABEL_OP1_206_15702_20121214_232449_inLine +BABEL_OP1_206_15702_20121214_232449_outLine +BABEL_OP1_206_16149_20121201_010342_inLine +BABEL_OP1_206_16149_20121201_010342_outLine +BABEL_OP1_206_16467_20130531_200137_inLine +BABEL_OP1_206_16467_20130531_200137_outLine +BABEL_OP1_206_16475_20130121_210828_inLine +BABEL_OP1_206_16475_20130121_210828_outLine +BABEL_OP1_206_16475_20130121_212136_inLine +BABEL_OP1_206_16475_20130121_212136_outLine +BABEL_OP1_206_16839_20121217_170534_inLine +BABEL_OP1_206_16839_20121217_170534_outLine +BABEL_OP1_206_16886_20130524_232154_inLine +BABEL_OP1_206_16886_20130524_232154_outLine +BABEL_OP1_206_17032_20121219_220514_inLine +BABEL_OP1_206_17032_20121219_220514_outLine +BABEL_OP1_206_17280_20130527_191437_inLine +BABEL_OP1_206_17280_20130527_191437_outLine +BABEL_OP1_206_17472_20121214_193824_inLine +BABEL_OP1_206_17472_20121214_193824_outLine +BABEL_OP1_206_17567_20121209_205317_inLine +BABEL_OP1_206_17567_20121209_205317_outLine +BABEL_OP1_206_17567_20121209_211139_inLine +BABEL_OP1_206_17567_20121209_211139_outLine +BABEL_OP1_206_17615_20121214_193534_inLine +BABEL_OP1_206_17615_20121214_193534_outLine +BABEL_OP1_206_17881_20130121_005313_inLine +BABEL_OP1_206_17881_20130121_005313_outLine +BABEL_OP1_206_19722_20121130_203924_inLine +BABEL_OP1_206_19722_20121130_203924_outLine +BABEL_OP1_206_19773_20130101_015259_inLine +BABEL_OP1_206_19773_20130101_015259_outLine +BABEL_OP1_206_19818_20130529_204811_inLine +BABEL_OP1_206_19818_20130529_204811_outLine +BABEL_OP1_206_19877_20130123_175339_inLine +BABEL_OP1_206_19877_20130123_175339_outLine +BABEL_OP1_206_19877_20130123_181047_inLine +BABEL_OP1_206_19877_20130123_181047_outLine +BABEL_OP1_206_20916_20121205_203848_inLine +BABEL_OP1_206_20916_20121205_203848_outLine +BABEL_OP1_206_20922_20121214_231110_inLine +BABEL_OP1_206_20922_20121214_231110_outLine +BABEL_OP1_206_21004_20121210_215455_inLine +BABEL_OP1_206_21004_20121210_215455_outLine +BABEL_OP1_206_21004_20121210_223449_inLine +BABEL_OP1_206_21004_20121210_223449_outLine +BABEL_OP1_206_21206_20121220_001511_inLine +BABEL_OP1_206_21206_20121220_001511_outLine +BABEL_OP1_206_21327_20130111_022748_inLine +BABEL_OP1_206_21327_20130111_022748_outLine +BABEL_OP1_206_21892_20121213_235725_inLine +BABEL_OP1_206_21892_20121213_235725_outLine +BABEL_OP1_206_22494_20130530_004456_inLine +BABEL_OP1_206_22494_20130530_004456_outLine +BABEL_OP1_206_22624_20121219_210041_inLine +BABEL_OP1_206_22624_20121219_210041_outLine +BABEL_OP1_206_22826_20130121_231859_inLine +BABEL_OP1_206_22826_20130121_231859_outLine +BABEL_OP1_206_22826_20130121_233139_inLine +BABEL_OP1_206_22826_20130121_233139_outLine +BABEL_OP1_206_22965_20121128_011001_inLine +BABEL_OP1_206_22965_20121128_011001_outLine +BABEL_OP1_206_22965_20121128_012241_inLine +BABEL_OP1_206_22965_20121128_012241_outLine +BABEL_OP1_206_23006_20121203_004250_inLine +BABEL_OP1_206_23006_20121203_004250_outLine +BABEL_OP1_206_23006_20121203_073608_inLine +BABEL_OP1_206_23006_20121203_073608_outLine +BABEL_OP1_206_23092_20121227_211821_inLine +BABEL_OP1_206_23092_20121227_211821_outLine +BABEL_OP1_206_23153_20130102_224836_inLine +BABEL_OP1_206_23153_20130102_224836_outLine +BABEL_OP1_206_23190_20121219_204325_inLine +BABEL_OP1_206_23190_20121219_204325_outLine +BABEL_OP1_206_23239_20130118_000831_inLine +BABEL_OP1_206_23239_20130118_000831_outLine +BABEL_OP1_206_23505_20121203_010039_inLine +BABEL_OP1_206_23505_20121203_010039_outLine +BABEL_OP1_206_24253_20130120_235750_inLine +BABEL_OP1_206_24253_20130120_235750_outLine +BABEL_OP1_206_24253_20130121_000835_inLine +BABEL_OP1_206_24253_20130121_000835_outLine +BABEL_OP1_206_24253_20130121_012503_inLine +BABEL_OP1_206_24253_20130121_012503_outLine +BABEL_OP1_206_24323_20121214_212407_inLine +BABEL_OP1_206_24323_20121214_212407_outLine +BABEL_OP1_206_24323_20121214_213448_inLine +BABEL_OP1_206_24323_20121214_213448_outLine +BABEL_OP1_206_24532_20121201_203102_inLine +BABEL_OP1_206_24532_20121201_203102_outLine +BABEL_OP1_206_24569_20121210_211659_inLine +BABEL_OP1_206_24569_20121210_211659_outLine +BABEL_OP1_206_24590_20121201_210938_inLine +BABEL_OP1_206_24590_20121201_210938_outLine +BABEL_OP1_206_24590_20121201_215618_inLine +BABEL_OP1_206_24590_20121201_215618_outLine +BABEL_OP1_206_24982_20130603_194918_inLine +BABEL_OP1_206_24982_20130603_194918_outLine +BABEL_OP1_206_25412_20121210_201120_inLine +BABEL_OP1_206_25412_20121210_201120_outLine +BABEL_OP1_206_25412_20121210_203544_inLine +BABEL_OP1_206_25412_20121210_203544_outLine +BABEL_OP1_206_25496_20130529_000539_inLine +BABEL_OP1_206_25496_20130529_000539_outLine +BABEL_OP1_206_25698_20130603_011444_inLine +BABEL_OP1_206_25698_20130603_011444_outLine +BABEL_OP1_206_25719_20121215_000803_inLine +BABEL_OP1_206_25719_20121215_000803_outLine +BABEL_OP1_206_25767_20121204_021252_inLine +BABEL_OP1_206_25767_20121204_021252_outLine +BABEL_OP1_206_25961_20121202_232650_inLine +BABEL_OP1_206_25961_20121202_232650_outLine +BABEL_OP1_206_25961_20121202_234202_inLine +BABEL_OP1_206_25961_20121202_234202_outLine +BABEL_OP1_206_26388_20121202_191806_inLine +BABEL_OP1_206_26388_20121202_191806_outLine +BABEL_OP1_206_26836_20121201_210310_inLine +BABEL_OP1_206_26836_20121201_210310_outLine +BABEL_OP1_206_27042_20121219_230502_inLine +BABEL_OP1_206_27042_20121219_230502_outLine +BABEL_OP1_206_27125_20121203_012043_inLine +BABEL_OP1_206_27125_20121203_012043_outLine +BABEL_OP1_206_27203_20121214_210018_inLine +BABEL_OP1_206_27203_20121214_210018_outLine +BABEL_OP1_206_27590_20121216_180900_inLine +BABEL_OP1_206_27590_20121216_180900_outLine +BABEL_OP1_206_27841_20121216_014031_inLine +BABEL_OP1_206_27841_20121216_014031_outLine +BABEL_OP1_206_28303_20121128_201831_inLine +BABEL_OP1_206_28303_20121128_201831_outLine +BABEL_OP1_206_28945_20130118_003100_inLine +BABEL_OP1_206_28945_20130118_003100_outLine +BABEL_OP1_206_29023_20121201_234219_inLine +BABEL_OP1_206_29023_20121201_234219_outLine +BABEL_OP1_206_29039_20121220_013046_inLine +BABEL_OP1_206_29039_20121220_013046_outLine +BABEL_OP1_206_29135_20121219_224133_inLine +BABEL_OP1_206_29135_20121219_224133_outLine +BABEL_OP1_206_29323_20121219_201726_inLine +BABEL_OP1_206_29323_20121219_201726_outLine +BABEL_OP1_206_29323_20121219_203137_inLine +BABEL_OP1_206_29323_20121219_203137_outLine +BABEL_OP1_206_30395_20121206_014115_inLine +BABEL_OP1_206_30395_20121206_014115_outLine +BABEL_OP1_206_30869_20121227_221910_inLine +BABEL_OP1_206_30869_20121227_221910_outLine +BABEL_OP1_206_31109_20121224_061142_inLine +BABEL_OP1_206_31109_20121224_061142_outLine +BABEL_OP1_206_31490_20121128_234650_inLine +BABEL_OP1_206_31490_20121128_234650_outLine +BABEL_OP1_206_31628_20130528_194548_inLine +BABEL_OP1_206_31628_20130528_194548_outLine +BABEL_OP1_206_32122_20121128_184757_inLine +BABEL_OP1_206_32122_20121128_184757_outLine +BABEL_OP1_206_32301_20130530_191142_inLine +BABEL_OP1_206_32301_20130530_191142_outLine +BABEL_OP1_206_32328_20121215_181911_inLine +BABEL_OP1_206_32328_20121215_181911_outLine +BABEL_OP1_206_32837_20121213_221825_inLine +BABEL_OP1_206_32837_20121213_221825_outLine +BABEL_OP1_206_32837_20121213_223037_inLine +BABEL_OP1_206_32837_20121213_223037_outLine +BABEL_OP1_206_33111_20130601_200233_inLine +BABEL_OP1_206_33111_20130601_200233_outLine +BABEL_OP1_206_33273_20121129_201318_inLine +BABEL_OP1_206_33273_20121129_201318_outLine +BABEL_OP1_206_33355_20121130_055943_inLine +BABEL_OP1_206_33355_20121130_055943_outLine +BABEL_OP1_206_33672_20130524_171145_inLine +BABEL_OP1_206_33672_20130524_171145_outLine +BABEL_OP1_206_33704_20121213_214430_inLine +BABEL_OP1_206_33704_20121213_214430_outLine +BABEL_OP1_206_33840_20121213_230741_inLine +BABEL_OP1_206_33840_20121213_230741_outLine +BABEL_OP1_206_34197_20121128_232538_inLine +BABEL_OP1_206_34197_20121128_232538_outLine +BABEL_OP1_206_34328_20121202_184915_inLine +BABEL_OP1_206_34328_20121202_184915_outLine +BABEL_OP1_206_34679_20121206_000152_inLine +BABEL_OP1_206_34679_20121206_000152_outLine +BABEL_OP1_206_34826_20121215_005505_inLine +BABEL_OP1_206_34826_20121215_005505_outLine +BABEL_OP1_206_35008_20121216_210449_inLine +BABEL_OP1_206_35008_20121216_210449_outLine +BABEL_OP1_206_36894_20121128_201825_inLine +BABEL_OP1_206_36894_20121128_201825_outLine +BABEL_OP1_206_37598_20130111_224005_inLine +BABEL_OP1_206_37598_20130111_224005_outLine +BABEL_OP1_206_38554_20121123_025415_inLine +BABEL_OP1_206_38554_20121123_025415_outLine +BABEL_OP1_206_38689_20121217_013737_inLine +BABEL_OP1_206_38689_20121217_013737_outLine +BABEL_OP1_206_38878_20130530_172309_inLine +BABEL_OP1_206_38878_20130530_172309_outLine +BABEL_OP1_206_39059_20121215_230057_inLine +BABEL_OP1_206_39059_20121215_230057_outLine +BABEL_OP1_206_39059_20121216_000252_inLine +BABEL_OP1_206_39059_20121216_000252_outLine +BABEL_OP1_206_39307_20121207_024156_inLine +BABEL_OP1_206_39307_20121207_024156_outLine +BABEL_OP1_206_39426_20130120_232407_inLine +BABEL_OP1_206_39426_20130120_232407_outLine +BABEL_OP1_206_39426_20130120_233651_inLine +BABEL_OP1_206_39426_20130120_233651_outLine +BABEL_OP1_206_40557_20121218_025254_inLine +BABEL_OP1_206_40557_20121218_025254_outLine +BABEL_OP1_206_40713_20121129_215041_inLine +BABEL_OP1_206_40713_20121129_215041_outLine +BABEL_OP1_206_41097_20121215_173120_inLine +BABEL_OP1_206_41097_20121215_173120_outLine +BABEL_OP1_206_41174_20130604_193434_inLine +BABEL_OP1_206_41174_20130604_193434_outLine +BABEL_OP1_206_41233_20121215_001846_inLine +BABEL_OP1_206_41233_20121215_001846_outLine +BABEL_OP1_206_41598_20130102_233834_inLine +BABEL_OP1_206_41598_20130102_233834_outLine +BABEL_OP1_206_42029_20121220_181050_inLine +BABEL_OP1_206_42029_20121220_181050_outLine +BABEL_OP1_206_42434_20121202_195754_inLine +BABEL_OP1_206_42434_20121202_195754_outLine +BABEL_OP1_206_42434_20121202_202540_inLine +BABEL_OP1_206_42434_20121202_202540_outLine +BABEL_OP1_206_42619_20121213_204854_inLine +BABEL_OP1_206_42619_20121213_204854_outLine +BABEL_OP1_206_42771_20130601_203101_inLine +BABEL_OP1_206_42771_20130601_203101_outLine +BABEL_OP1_206_42834_20121219_015826_inLine +BABEL_OP1_206_42834_20121219_015826_outLine +BABEL_OP1_206_43286_20121125_054930_inLine +BABEL_OP1_206_43286_20121125_054930_outLine +BABEL_OP1_206_43286_20121125_060858_inLine +BABEL_OP1_206_43286_20121125_060858_outLine +BABEL_OP1_206_43286_20121126_003810_inLine +BABEL_OP1_206_43286_20121126_003810_outLine +BABEL_OP1_206_43368_20121128_203447_inLine +BABEL_OP1_206_43368_20121128_203447_outLine +BABEL_OP1_206_43784_20121230_224515_inLine +BABEL_OP1_206_43784_20121230_224515_outLine +BABEL_OP1_206_43788_20121223_235436_inLine +BABEL_OP1_206_43788_20121223_235436_outLine +BABEL_OP1_206_44477_20121228_020003_inLine +BABEL_OP1_206_44477_20121228_020003_outLine +BABEL_OP1_206_44619_20121129_201028_inLine +BABEL_OP1_206_44619_20121129_201028_outLine +BABEL_OP1_206_44619_20121129_203209_inLine +BABEL_OP1_206_44619_20121129_203209_outLine +BABEL_OP1_206_45235_20121213_044536_inLine +BABEL_OP1_206_45235_20121213_044536_outLine +BABEL_OP1_206_45536_20121212_023751_inLine +BABEL_OP1_206_45536_20121212_023751_outLine +BABEL_OP1_206_45851_20130123_013016_inLine +BABEL_OP1_206_45851_20130123_013016_outLine +BABEL_OP1_206_46066_20121218_015244_outLine +BABEL_OP1_206_46066_20121218_020520_inLine +BABEL_OP1_206_46066_20121218_020520_outLine +BABEL_OP1_206_46261_20130524_180914_inLine +BABEL_OP1_206_46261_20130524_180914_outLine +BABEL_OP1_206_46330_20121220_171612_inLine +BABEL_OP1_206_46330_20121220_171612_outLine +BABEL_OP1_206_46558_20121125_000809_inLine +BABEL_OP1_206_46558_20121125_000809_outLine +BABEL_OP1_206_46688_20121130_222025_inLine +BABEL_OP1_206_46688_20121130_222025_outLine +BABEL_OP1_206_46770_20121213_030348_inLine +BABEL_OP1_206_46770_20121213_030348_outLine +BABEL_OP1_206_46976_20121222_002626_inLine +BABEL_OP1_206_46976_20121222_002626_outLine +BABEL_OP1_206_47487_20121127_232736_inLine +BABEL_OP1_206_47487_20121127_232736_outLine +BABEL_OP1_206_47802_20121213_220928_inLine +BABEL_OP1_206_47802_20121213_220928_outLine +BABEL_OP1_206_47878_20121221_153159_inLine +BABEL_OP1_206_47878_20121221_153159_outLine +BABEL_OP1_206_48844_20121123_030435_inLine +BABEL_OP1_206_48844_20121123_030435_outLine +BABEL_OP1_206_48844_20121204_030447_inLine +BABEL_OP1_206_48844_20121204_030447_outLine +BABEL_OP1_206_49001_20121128_201907_inLine +BABEL_OP1_206_49001_20121128_201907_outLine +BABEL_OP1_206_49287_20121219_204754_inLine +BABEL_OP1_206_49287_20121219_204754_outLine +BABEL_OP1_206_49870_20130605_000829_inLine +BABEL_OP1_206_49870_20130605_000829_outLine +BABEL_OP1_206_49907_20121128_055731_inLine +BABEL_OP1_206_49907_20121128_055731_outLine +BABEL_OP1_206_49912_20130603_002155_inLine +BABEL_OP1_206_49912_20130603_002155_outLine +BABEL_OP1_206_50090_20121210_232617_inLine +BABEL_OP1_206_50090_20121210_232617_outLine +BABEL_OP1_206_50090_20121210_234419_inLine +BABEL_OP1_206_50090_20121210_234419_outLine +BABEL_OP1_206_50565_20121206_213949_inLine +BABEL_OP1_206_50565_20121206_213949_outLine +BABEL_OP1_206_50565_20121206_215103_inLine +BABEL_OP1_206_50565_20121206_215103_outLine +BABEL_OP1_206_50565_20121206_221547_inLine +BABEL_OP1_206_50565_20121206_221547_outLine +BABEL_OP1_206_50681_20121222_003908_inLine +BABEL_OP1_206_50681_20121222_003908_outLine +BABEL_OP1_206_51015_20121216_025307_inLine +BABEL_OP1_206_51015_20121216_025307_outLine +BABEL_OP1_206_51484_20121213_023814_inLine +BABEL_OP1_206_51484_20121213_023814_outLine +BABEL_OP1_206_51955_20121219_004818_inLine +BABEL_OP1_206_51955_20121219_004818_outLine +BABEL_OP1_206_52422_20121220_034724_inLine +BABEL_OP1_206_52422_20121220_034724_outLine +BABEL_OP1_206_52804_20121201_184720_inLine +BABEL_OP1_206_52804_20121201_184720_outLine +BABEL_OP1_206_52818_20121228_012038_inLine +BABEL_OP1_206_52818_20121228_012038_outLine +BABEL_OP1_206_52854_20121128_034458_inLine +BABEL_OP1_206_52854_20121128_034458_outLine +BABEL_OP1_206_52854_20121206_214928_inLine +BABEL_OP1_206_52854_20121206_214928_outLine +BABEL_OP1_206_52854_20121206_224251_inLine +BABEL_OP1_206_52854_20121206_224251_outLine +BABEL_OP1_206_52932_20121128_045304_inLine +BABEL_OP1_206_52932_20121128_045304_outLine +BABEL_OP1_206_52932_20121128_233739_inLine +BABEL_OP1_206_52932_20121128_233739_outLine +BABEL_OP1_206_54104_20130102_215440_inLine +BABEL_OP1_206_54104_20130102_215440_outLine +BABEL_OP1_206_54162_20121220_230656_inLine +BABEL_OP1_206_54162_20121220_230656_outLine +BABEL_OP1_206_54390_20121130_203012_inLine +BABEL_OP1_206_54390_20121130_203012_outLine +BABEL_OP1_206_54477_20121212_013137_inLine +BABEL_OP1_206_54477_20121212_013137_outLine +BABEL_OP1_206_54530_20130531_233153_inLine +BABEL_OP1_206_54530_20130531_233153_outLine +BABEL_OP1_206_54697_20121228_003256_inLine +BABEL_OP1_206_54697_20121228_003256_outLine +BABEL_OP1_206_54953_20121205_023337_inLine +BABEL_OP1_206_54953_20121205_023337_outLine +BABEL_OP1_206_55259_20130118_022049_inLine +BABEL_OP1_206_55259_20130118_022049_outLine +BABEL_OP1_206_55259_20130118_023307_inLine +BABEL_OP1_206_55259_20130118_023307_outLine +BABEL_OP1_206_55968_20121204_204317_inLine +BABEL_OP1_206_55968_20121204_204317_outLine +BABEL_OP1_206_55968_20121204_211213_inLine +BABEL_OP1_206_55968_20121204_211213_outLine +BABEL_OP1_206_56023_20121227_235521_inLine +BABEL_OP1_206_56023_20121227_235521_outLine +BABEL_OP1_206_56677_20130111_174028_inLine +BABEL_OP1_206_56677_20130111_174028_outLine +BABEL_OP1_206_57093_20121205_002300_inLine +BABEL_OP1_206_57093_20121205_002300_outLine +BABEL_OP1_206_57093_20121205_044909_inLine +BABEL_OP1_206_57093_20121205_044909_outLine +BABEL_OP1_206_57141_20121212_211734_inLine +BABEL_OP1_206_57141_20121212_211734_outLine +BABEL_OP1_206_57529_20121211_232002_inLine +BABEL_OP1_206_57529_20121211_232002_outLine +BABEL_OP1_206_58047_20121212_222839_inLine +BABEL_OP1_206_58047_20121212_222839_outLine +BABEL_OP1_206_58313_20121220_211354_inLine +BABEL_OP1_206_58313_20121220_211354_outLine +BABEL_OP1_206_58489_20121221_225602_inLine +BABEL_OP1_206_58489_20121221_225602_outLine +BABEL_OP1_206_58734_20121130_203502_inLine +BABEL_OP1_206_58734_20121130_203502_outLine +BABEL_OP1_206_58821_20130531_205929_inLine +BABEL_OP1_206_58821_20130531_205929_outLine +BABEL_OP1_206_60026_20121205_044105_inLine +BABEL_OP1_206_60026_20121205_044105_outLine +BABEL_OP1_206_60299_20130602_222928_inLine +BABEL_OP1_206_60299_20130602_222928_outLine +BABEL_OP1_206_60310_20121220_003756_inLine +BABEL_OP1_206_60310_20121220_003756_outLine +BABEL_OP1_206_61167_20121202_012318_inLine +BABEL_OP1_206_61167_20121202_012318_outLine +BABEL_OP1_206_61167_20121203_083125_inLine +BABEL_OP1_206_61167_20121203_083125_outLine +BABEL_OP1_206_61348_20121218_225731_inLine +BABEL_OP1_206_61348_20121218_225731_outLine +BABEL_OP1_206_61357_20130120_183001_inLine +BABEL_OP1_206_61357_20130120_183001_outLine +BABEL_OP1_206_61435_20121217_000451_inLine +BABEL_OP1_206_61435_20121217_000451_outLine +BABEL_OP1_206_61678_20121123_013649_inLine +BABEL_OP1_206_61678_20121123_013649_outLine +BABEL_OP1_206_61731_20121128_024803_inLine +BABEL_OP1_206_61731_20121128_024803_outLine +BABEL_OP1_206_61888_20130605_172611_inLine +BABEL_OP1_206_61888_20130605_172611_outLine +BABEL_OP1_206_62200_20130522_212226_inLine +BABEL_OP1_206_62200_20130522_212226_outLine +BABEL_OP1_206_62724_20121218_202436_inLine +BABEL_OP1_206_62724_20121218_202436_outLine +BABEL_OP1_206_62800_20121201_010750_inLine +BABEL_OP1_206_62800_20121201_010750_outLine +BABEL_OP1_206_62800_20121201_015047_inLine +BABEL_OP1_206_62800_20121201_015047_outLine +BABEL_OP1_206_62800_20121201_021942_inLine +BABEL_OP1_206_62800_20121201_021942_outLine +BABEL_OP1_206_62810_20121122_202600_inLine +BABEL_OP1_206_62810_20121122_202600_outLine +BABEL_OP1_206_63084_20121210_013516_inLine +BABEL_OP1_206_63084_20121210_013516_outLine +BABEL_OP1_206_63425_20121214_182639_inLine +BABEL_OP1_206_63425_20121214_182639_outLine +BABEL_OP1_206_63670_20121212_212623_inLine +BABEL_OP1_206_63670_20121212_212623_outLine +BABEL_OP1_206_63757_20121222_235730_inLine +BABEL_OP1_206_63757_20121222_235730_outLine +BABEL_OP1_206_63787_20130530_221300_inLine +BABEL_OP1_206_63787_20130530_221300_outLine +BABEL_OP1_206_63906_20130131_014942_inLine +BABEL_OP1_206_63906_20130131_014942_outLine +BABEL_OP1_206_64014_20130122_011323_inLine +BABEL_OP1_206_64014_20130122_011323_outLine +BABEL_OP1_206_64768_20121207_223917_inLine +BABEL_OP1_206_64768_20121207_223917_outLine +BABEL_OP1_206_65064_20121221_000939_inLine +BABEL_OP1_206_65064_20121221_000939_outLine +BABEL_OP1_206_66001_20130103_012213_inLine +BABEL_OP1_206_66001_20130103_012213_outLine +BABEL_OP1_206_66045_20121129_223013_inLine +BABEL_OP1_206_66045_20121129_223013_outLine +BABEL_OP1_206_66916_20130118_005447_inLine +BABEL_OP1_206_66916_20130118_005447_outLine +BABEL_OP1_206_66916_20130118_010520_inLine +BABEL_OP1_206_66916_20130118_010520_outLine +BABEL_OP1_206_67622_20121206_210526_inLine +BABEL_OP1_206_67622_20121206_210526_outLine +BABEL_OP1_206_68306_20121213_205817_inLine +BABEL_OP1_206_68306_20121213_205817_outLine +BABEL_OP1_206_68385_20121123_231120_inLine +BABEL_OP1_206_68385_20121123_231120_outLine +BABEL_OP1_206_68627_20130122_023725_inLine +BABEL_OP1_206_68627_20130122_023725_outLine +BABEL_OP1_206_68748_20121212_025750_inLine +BABEL_OP1_206_68748_20121212_025750_outLine +BABEL_OP1_206_68924_20121228_001758_inLine +BABEL_OP1_206_68924_20121228_001758_outLine +BABEL_OP1_206_69578_20121214_002009_inLine +BABEL_OP1_206_69578_20121214_002009_outLine +BABEL_OP1_206_69992_20130529_181609_inLine +BABEL_OP1_206_69992_20130529_181609_outLine +BABEL_OP1_206_70121_20121219_215051_inLine +BABEL_OP1_206_70121_20121219_215051_outLine +BABEL_OP1_206_70121_20121219_220824_inLine +BABEL_OP1_206_70121_20121219_220824_outLine +BABEL_OP1_206_70251_20121219_044415_inLine +BABEL_OP1_206_70251_20121219_044415_outLine +BABEL_OP1_206_70343_20121221_023826_inLine +BABEL_OP1_206_70343_20121221_023826_outLine +BABEL_OP1_206_70386_20121207_232647_inLine +BABEL_OP1_206_70386_20121207_232647_outLine +BABEL_OP1_206_71067_20121209_210046_inLine +BABEL_OP1_206_71067_20121209_210046_outLine +BABEL_OP1_206_71067_20121209_214030_inLine +BABEL_OP1_206_71067_20121209_214030_outLine +BABEL_OP1_206_71566_20130604_214443_inLine +BABEL_OP1_206_71566_20130604_214443_outLine +BABEL_OP1_206_72110_20121221_232617_inLine +BABEL_OP1_206_72110_20121221_232617_outLine +BABEL_OP1_206_72319_20130123_022502_inLine +BABEL_OP1_206_72319_20130123_022502_outLine +BABEL_OP1_206_72324_20130602_184851_inLine +BABEL_OP1_206_72324_20130602_184851_outLine +BABEL_OP1_206_72844_20121130_193956_inLine +BABEL_OP1_206_72844_20121130_193956_outLine +BABEL_OP1_206_73005_20130122_021229_inLine +BABEL_OP1_206_73005_20130122_021229_outLine +BABEL_OP1_206_73258_20130120_170200_inLine +BABEL_OP1_206_73258_20130120_170200_outLine +BABEL_OP1_206_73301_20130529_214428_inLine +BABEL_OP1_206_73301_20130529_214428_outLine +BABEL_OP1_206_73485_20130122_235208_inLine +BABEL_OP1_206_73485_20130122_235208_outLine +BABEL_OP1_206_73591_20121117_212751_inLine +BABEL_OP1_206_73591_20121117_212751_outLine +BABEL_OP1_206_74886_20121128_205141_inLine +BABEL_OP1_206_74886_20121128_205141_outLine +BABEL_OP1_206_75064_20121129_233512_inLine +BABEL_OP1_206_75064_20121129_233512_outLine +BABEL_OP1_206_75505_20130522_234600_inLine +BABEL_OP1_206_75505_20130522_234600_outLine +BABEL_OP1_206_75993_20121128_223040_inLine +BABEL_OP1_206_75993_20121128_223040_outLine +BABEL_OP1_206_76126_20121219_020552_inLine +BABEL_OP1_206_76126_20121219_020552_outLine +BABEL_OP1_206_76238_20130111_190815_inLine +BABEL_OP1_206_76238_20130111_190815_outLine +BABEL_OP1_206_76437_20121117_202446_inLine +BABEL_OP1_206_76437_20121117_202446_outLine +BABEL_OP1_206_77803_20121130_005638_inLine +BABEL_OP1_206_77803_20121130_005638_outLine +BABEL_OP1_206_78398_20121206_003319_inLine +BABEL_OP1_206_78398_20121206_003319_outLine +BABEL_OP1_206_78544_20121220_000743_inLine +BABEL_OP1_206_78544_20121220_000743_outLine +BABEL_OP1_206_78943_20121129_231930_inLine +BABEL_OP1_206_78943_20121129_231930_outLine +BABEL_OP1_206_79080_20121212_205306_inLine +BABEL_OP1_206_79080_20121212_205306_outLine +BABEL_OP1_206_79131_20130123_003404_inLine +BABEL_OP1_206_79131_20130123_003404_outLine +BABEL_OP1_206_79167_20130602_202526_inLine +BABEL_OP1_206_79167_20130602_202526_outLine +BABEL_OP1_206_79367_20121204_001524_inLine +BABEL_OP1_206_79367_20121204_001524_outLine +BABEL_OP1_206_79367_20121204_004137_inLine +BABEL_OP1_206_79367_20121204_004137_outLine +BABEL_OP1_206_80439_20130527_182722_inLine +BABEL_OP1_206_80439_20130527_182722_outLine +BABEL_OP1_206_80559_20121206_232755_inLine +BABEL_OP1_206_80559_20121206_232755_outLine +BABEL_OP1_206_80781_20121219_233131_inLine +BABEL_OP1_206_80781_20121219_233131_outLine +BABEL_OP1_206_81435_20121220_204044_inLine +BABEL_OP1_206_81435_20121220_204044_outLine +BABEL_OP1_206_82035_20121220_195943_inLine +BABEL_OP1_206_82035_20121220_195943_outLine +BABEL_OP1_206_82138_20121129_223223_inLine +BABEL_OP1_206_82138_20121129_223223_outLine +BABEL_OP1_206_82303_20130531_191551_inLine +BABEL_OP1_206_82303_20130531_191551_outLine +BABEL_OP1_206_82391_20121221_015423_inLine +BABEL_OP1_206_82391_20121221_015423_outLine +BABEL_OP1_206_82425_20121129_212519_inLine +BABEL_OP1_206_82425_20121129_212519_outLine +BABEL_OP1_206_82473_20121206_004738_inLine +BABEL_OP1_206_82473_20121206_004738_outLine +BABEL_OP1_206_82622_20130604_222219_inLine +BABEL_OP1_206_82622_20130604_222219_outLine +BABEL_OP1_206_83455_20121205_024244_inLine +BABEL_OP1_206_83455_20121205_024244_outLine +BABEL_OP1_206_84547_20121206_225105_inLine +BABEL_OP1_206_84547_20121206_225105_outLine +BABEL_OP1_206_84605_20121129_212603_inLine +BABEL_OP1_206_84605_20121129_212603_outLine +BABEL_OP1_206_84805_20121214_221155_inLine +BABEL_OP1_206_84805_20121214_221155_outLine +BABEL_OP1_206_85028_20121212_014236_inLine +BABEL_OP1_206_85028_20121212_014236_outLine +BABEL_OP1_206_85322_20130530_233851_inLine +BABEL_OP1_206_85322_20130530_233851_outLine +BABEL_OP1_206_85647_20121206_022317_inLine +BABEL_OP1_206_85647_20121206_022317_outLine +BABEL_OP1_206_85647_20121206_024354_inLine +BABEL_OP1_206_85647_20121206_024354_outLine +BABEL_OP1_206_85651_20130420_232505_inLine +BABEL_OP1_206_85651_20130420_232505_outLine +BABEL_OP1_206_86191_20121205_001218_inLine +BABEL_OP1_206_86191_20121205_001218_outLine +BABEL_OP1_206_86321_20121212_025212_inLine +BABEL_OP1_206_86321_20121212_025212_outLine +BABEL_OP1_206_86433_20121220_215310_inLine +BABEL_OP1_206_86433_20121220_215310_outLine +BABEL_OP1_206_86433_20121220_225718_inLine +BABEL_OP1_206_86433_20121220_225718_outLine +BABEL_OP1_206_86472_20121221_010912_inLine +BABEL_OP1_206_86472_20121221_010912_outLine +BABEL_OP1_206_86635_20121218_223238_inLine +BABEL_OP1_206_86635_20121218_223238_outLine +BABEL_OP1_206_86635_20121218_230141_inLine +BABEL_OP1_206_86635_20121218_230141_outLine +BABEL_OP1_206_86715_20130602_174900_inLine +BABEL_OP1_206_86715_20130602_174900_outLine +BABEL_OP1_206_86722_20121204_231838_inLine +BABEL_OP1_206_86722_20121204_231838_outLine +BABEL_OP1_206_87073_20130102_212334_inLine +BABEL_OP1_206_87073_20130102_212334_outLine +BABEL_OP1_206_87298_20121129_212519_inLine +BABEL_OP1_206_87298_20121129_212519_outLine +BABEL_OP1_206_87298_20121129_213610_inLine +BABEL_OP1_206_87298_20121129_213610_outLine +BABEL_OP1_206_87470_20121203_052237_inLine +BABEL_OP1_206_87470_20121203_052237_outLine +BABEL_OP1_206_87871_20121220_222250_inLine +BABEL_OP1_206_87871_20121220_222250_outLine +BABEL_OP1_206_87921_20121221_003205_inLine +BABEL_OP1_206_87921_20121221_003205_outLine +BABEL_OP1_206_88260_20121208_204256_inLine +BABEL_OP1_206_88260_20121208_204256_outLine +BABEL_OP1_206_88372_20130120_230911_inLine +BABEL_OP1_206_88372_20130120_230911_outLine +BABEL_OP1_206_88925_20130603_230637_inLine +BABEL_OP1_206_88925_20130603_230637_outLine +BABEL_OP1_206_89575_20121220_211420_inLine +BABEL_OP1_206_89575_20121220_211420_outLine +BABEL_OP1_206_89665_20121208_212046_inLine +BABEL_OP1_206_89665_20121208_212046_outLine +BABEL_OP1_206_89943_20121127_034521_inLine +BABEL_OP1_206_89943_20121127_034521_outLine +BABEL_OP1_206_89943_20121128_015307_inLine +BABEL_OP1_206_89943_20121128_015307_outLine +BABEL_OP1_206_90572_20130618_045832_inLine +BABEL_OP1_206_90572_20130618_045832_outLine +BABEL_OP1_206_90739_20130604_174758_inLine +BABEL_OP1_206_90739_20130604_174758_outLine +BABEL_OP1_206_90760_20130525_001351_inLine +BABEL_OP1_206_90760_20130525_001351_outLine +BABEL_OP1_206_91125_20121123_063516_inLine +BABEL_OP1_206_91125_20121123_063516_outLine +BABEL_OP1_206_91336_20121205_221404_inLine +BABEL_OP1_206_91336_20121205_221404_outLine +BABEL_OP1_206_92459_20130529_223322_inLine +BABEL_OP1_206_92459_20130529_223322_outLine +BABEL_OP1_206_92527_20121128_232151_inLine +BABEL_OP1_206_92527_20121128_232151_outLine +BABEL_OP1_206_92527_20121128_234105_inLine +BABEL_OP1_206_92527_20121128_234105_outLine +BABEL_OP1_206_92557_20121213_005100_inLine +BABEL_OP1_206_92557_20121213_005100_outLine +BABEL_OP1_206_92740_20121211_184826_inLine +BABEL_OP1_206_92740_20121211_184826_outLine +BABEL_OP1_206_93411_20121220_002408_inLine +BABEL_OP1_206_93411_20121220_002408_outLine +BABEL_OP1_206_93632_20121212_021207_inLine +BABEL_OP1_206_93632_20121212_021207_outLine +BABEL_OP1_206_93858_20130605_005238_inLine +BABEL_OP1_206_93858_20130605_005238_outLine +BABEL_OP1_206_93964_20121205_235339_inLine +BABEL_OP1_206_93964_20121205_235339_outLine +BABEL_OP1_206_94025_20121213_025224_inLine +BABEL_OP1_206_94025_20121213_025224_outLine +BABEL_OP1_206_94745_20130531_014707_inLine +BABEL_OP1_206_94745_20130531_014707_outLine +BABEL_OP1_206_94869_20121205_203951_inLine +BABEL_OP1_206_94869_20121205_203951_outLine +BABEL_OP1_206_95028_20130601_222202_inLine +BABEL_OP1_206_95028_20130601_222202_outLine +BABEL_OP1_206_95231_20130601_230414_inLine +BABEL_OP1_206_95231_20130601_230414_outLine +BABEL_OP1_206_95446_20121220_221335_inLine +BABEL_OP1_206_95446_20121220_221335_outLine +BABEL_OP1_206_96730_20121220_213139_inLine +BABEL_OP1_206_96730_20121220_213139_outLine +BABEL_OP1_206_96910_20121202_211324_inLine +BABEL_OP1_206_96910_20121202_211324_outLine +BABEL_OP1_206_97376_20121220_234456_inLine +BABEL_OP1_206_97376_20121220_234456_outLine +BABEL_OP1_206_97772_20121123_064042_inLine +BABEL_OP1_206_97772_20121123_064042_outLine +BABEL_OP1_206_98311_20130528_182109_inLine +BABEL_OP1_206_98311_20130528_182109_outLine +BABEL_OP1_206_98390_20121123_064010_inLine +BABEL_OP1_206_98390_20121123_064010_outLine +BABEL_OP1_206_99289_20130123_161855_inLine +BABEL_OP1_206_99289_20130123_161855_outLine +BABEL_OP1_206_99289_20130123_163456_inLine +BABEL_OP1_206_99289_20130123_163456_outLine +BABEL_OP1_206_99955_20121219_002822_inLine +BABEL_OP1_206_99955_20121219_002822_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list b/egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list new file mode 100644 index 00000000000..b9d6a50aad4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list @@ -0,0 +1,285 @@ +BABEL_OP1_206_10974_20121228_005413_inLine +BABEL_OP1_206_10974_20121228_005413_outLine +BABEL_OP1_206_10974_20121228_024429_inLine +BABEL_OP1_206_10974_20121228_024429_outLine +BABEL_OP1_206_14228_20130111_014154_inLine +BABEL_OP1_206_14228_20130111_014154_outLine +BABEL_OP1_206_15262_20121229_174321_inLine +BABEL_OP1_206_15262_20121229_174321_outLine +BABEL_OP1_206_15262_20121230_013109_inLine +BABEL_OP1_206_15262_20121230_013109_outLine +BABEL_OP1_206_15848_20121219_014456_inLine +BABEL_OP1_206_15848_20121219_014456_outLine +BABEL_OP1_206_15848_20121219_020128_inLine +BABEL_OP1_206_15848_20121219_020128_outLine +BABEL_OP1_206_16056_20130618_231336_inLine +BABEL_OP1_206_16056_20130618_231336_outLine +BABEL_OP1_206_16938_20130418_204901_inLine +BABEL_OP1_206_16938_20130418_204901_outLine +BABEL_OP1_206_17115_20130704_003152_inLine +BABEL_OP1_206_17115_20130704_003152_outLine +BABEL_OP1_206_17127_20130607_184256_inLine +BABEL_OP1_206_17127_20130607_184256_outLine +BABEL_OP1_206_17496_20121213_021057_inLine +BABEL_OP1_206_17496_20121213_021057_outLine +BABEL_OP1_206_17890_20121218_232607_inLine +BABEL_OP1_206_17890_20121218_232607_outLine +BABEL_OP1_206_17890_20121218_234135_inLine +BABEL_OP1_206_17890_20121218_234135_outLine +BABEL_OP1_206_19130_20130618_230729_inLine +BABEL_OP1_206_19130_20130618_230729_outLine +BABEL_OP1_206_19130_20130618_233209_inLine +BABEL_OP1_206_19130_20130618_233209_outLine +BABEL_OP1_206_19782_20121212_231659_inLine +BABEL_OP1_206_19782_20121212_231659_outLine +BABEL_OP1_206_19832_20130619_213422_inLine +BABEL_OP1_206_19832_20130619_213422_outLine +BABEL_OP1_206_19832_20130621_212156_inLine +BABEL_OP1_206_19832_20130621_212156_outLine +BABEL_OP1_206_21159_20130409_220748_inLine +BABEL_OP1_206_21159_20130409_220748_outLine +BABEL_OP1_206_22034_20130823_052902_inLine +BABEL_OP1_206_22034_20130823_052902_outLine +BABEL_OP1_206_22216_20121206_230217_inLine +BABEL_OP1_206_22216_20121206_230217_outLine +BABEL_OP1_206_22612_20130111_030229_inLine +BABEL_OP1_206_22612_20130111_030229_outLine +BABEL_OP1_206_23983_20130318_001202_inLine +BABEL_OP1_206_23983_20130318_001202_outLine +BABEL_OP1_206_24239_20130123_200948_inLine +BABEL_OP1_206_24239_20130123_200948_outLine +BABEL_OP1_206_28871_20130316_231654_inLine +BABEL_OP1_206_28871_20130316_231654_outLine +BABEL_OP1_206_29168_20121219_024841_inLine +BABEL_OP1_206_29168_20121219_024841_outLine +BABEL_OP1_206_29230_20130607_212302_inLine +BABEL_OP1_206_29230_20130607_212302_outLine +BABEL_OP1_206_29685_20121209_215754_inLine +BABEL_OP1_206_29685_20121209_215754_outLine +BABEL_OP1_206_29685_20121218_164410_inLine +BABEL_OP1_206_29685_20121218_164410_outLine +BABEL_OP1_206_30653_20130609_003734_inLine +BABEL_OP1_206_30653_20130609_003734_outLine +BABEL_OP1_206_30653_20130609_010837_inLine +BABEL_OP1_206_30653_20130609_010837_outLine +BABEL_OP1_206_30720_20130717_175529_inLine +BABEL_OP1_206_30720_20130717_175529_outLine +BABEL_OP1_206_32832_20121210_200734_inLine +BABEL_OP1_206_32832_20121210_200734_outLine +BABEL_OP1_206_32872_20130709_004706_inLine +BABEL_OP1_206_32872_20130709_004706_outLine +BABEL_OP1_206_32961_20130708_045618_inLine +BABEL_OP1_206_32961_20130708_045618_outLine +BABEL_OP1_206_34629_20130719_022535_inLine +BABEL_OP1_206_34629_20130719_022535_outLine +BABEL_OP1_206_36017_20130123_211455_inLine +BABEL_OP1_206_36017_20130123_211455_outLine +BABEL_OP1_206_38139_20130714_222440_inLine +BABEL_OP1_206_38139_20130714_222440_outLine +BABEL_OP1_206_39159_20121219_215221_inLine +BABEL_OP1_206_39159_20121219_215221_outLine +BABEL_OP1_206_41272_20130123_012754_inLine +BABEL_OP1_206_41272_20130123_012754_outLine +BABEL_OP1_206_43157_20130702_170155_inLine +BABEL_OP1_206_43157_20130702_170155_outLine +BABEL_OP1_206_43789_20130704_211632_inLine +BABEL_OP1_206_43789_20130704_211632_outLine +BABEL_OP1_206_43789_20130704_214224_inLine +BABEL_OP1_206_43789_20130704_214224_outLine +BABEL_OP1_206_43990_20130717_000515_inLine +BABEL_OP1_206_43990_20130717_000515_outLine +BABEL_OP1_206_44290_20130122_225754_inLine +BABEL_OP1_206_44290_20130122_225754_outLine +BABEL_OP1_206_44290_20130122_230740_inLine +BABEL_OP1_206_44290_20130122_230740_outLine +BABEL_OP1_206_44290_20130122_231733_inLine +BABEL_OP1_206_44290_20130122_231733_outLine +BABEL_OP1_206_44420_20121207_005913_inLine +BABEL_OP1_206_44420_20121207_005913_outLine +BABEL_OP1_206_44847_20121213_214340_inLine +BABEL_OP1_206_44847_20121213_214340_outLine +BABEL_OP1_206_45908_20130128_214430_inLine +BABEL_OP1_206_45908_20130128_214430_outLine +BABEL_OP1_206_46315_20121215_035427_inLine +BABEL_OP1_206_46315_20121215_035427_outLine +BABEL_OP1_206_46881_20121207_203628_inLine +BABEL_OP1_206_46881_20121207_203628_outLine +BABEL_OP1_206_46881_20121207_205322_inLine +BABEL_OP1_206_46881_20121207_205322_outLine +BABEL_OP1_206_46974_20121217_030549_inLine +BABEL_OP1_206_46974_20121217_030549_outLine +BABEL_OP1_206_46974_20121217_175603_inLine +BABEL_OP1_206_46974_20121217_175603_outLine +BABEL_OP1_206_47270_20130610_005427_inLine +BABEL_OP1_206_47270_20130610_005427_outLine +BABEL_OP1_206_48663_20130618_220742_inLine +BABEL_OP1_206_48663_20130618_220742_outLine +BABEL_OP1_206_49197_20130102_213736_inLine +BABEL_OP1_206_49197_20130102_213736_outLine +BABEL_OP1_206_49630_20121219_190512_inLine +BABEL_OP1_206_49630_20121219_190512_outLine +BABEL_OP1_206_52438_20121205_011303_inLine +BABEL_OP1_206_52438_20121205_011303_outLine +BABEL_OP1_206_52442_20130103_034355_inLine +BABEL_OP1_206_52442_20130103_034355_outLine +BABEL_OP1_206_52483_20130719_011409_inLine +BABEL_OP1_206_52483_20130719_011409_outLine +BABEL_OP1_206_53206_20130717_214929_inLine +BABEL_OP1_206_53206_20130717_214929_outLine +BABEL_OP1_206_56213_20121211_204232_inLine +BABEL_OP1_206_56213_20121211_204232_outLine +BABEL_OP1_206_56345_20130716_043400_inLine +BABEL_OP1_206_56345_20130716_043400_outLine +BABEL_OP1_206_56370_20121207_023036_inLine +BABEL_OP1_206_56370_20121207_023036_outLine +BABEL_OP1_206_56523_20121205_023208_inLine +BABEL_OP1_206_56523_20121205_023208_outLine +BABEL_OP1_206_57067_20130102_012254_inLine +BABEL_OP1_206_57067_20130102_012254_outLine +BABEL_OP1_206_60307_20130719_015514_inLine +BABEL_OP1_206_60307_20130719_015514_outLine +BABEL_OP1_206_60307_20130719_020926_inLine +BABEL_OP1_206_60307_20130719_020926_outLine +BABEL_OP1_206_60307_20130719_024339_inLine +BABEL_OP1_206_60307_20130719_024339_outLine +BABEL_OP1_206_60508_20121207_000229_inLine +BABEL_OP1_206_60508_20121207_000229_outLine +BABEL_OP1_206_60661_20121130_205836_inLine +BABEL_OP1_206_60661_20121130_205836_outLine +BABEL_OP1_206_62158_20130710_235209_inLine +BABEL_OP1_206_62158_20130710_235209_outLine +BABEL_OP1_206_62286_20121128_234346_inLine +BABEL_OP1_206_62286_20121128_234346_outLine +BABEL_OP1_206_62286_20121129_203539_inLine +BABEL_OP1_206_62286_20121129_203539_outLine +BABEL_OP1_206_62286_20121129_212959_inLine +BABEL_OP1_206_62286_20121129_212959_outLine +BABEL_OP1_206_62434_20121219_215717_inLine +BABEL_OP1_206_62434_20121219_215717_outLine +BABEL_OP1_206_62456_20121213_021820_inLine +BABEL_OP1_206_62456_20121213_021820_outLine +BABEL_OP1_206_64638_20121219_213206_inLine +BABEL_OP1_206_64638_20121219_213206_outLine +BABEL_OP1_206_66361_20130716_054608_inLine +BABEL_OP1_206_66361_20130716_054608_outLine +BABEL_OP1_206_67389_20130710_003945_inLine +BABEL_OP1_206_67389_20130710_003945_outLine +BABEL_OP1_206_67726_20130722_002158_inLine +BABEL_OP1_206_67726_20130722_002158_outLine +BABEL_OP1_206_67794_20121205_012401_inLine +BABEL_OP1_206_67794_20121205_012401_outLine +BABEL_OP1_206_68823_20130823_044634_inLine +BABEL_OP1_206_68823_20130823_044634_outLine +BABEL_OP1_206_69885_20130610_194001_inLine +BABEL_OP1_206_69885_20130610_194001_outLine +BABEL_OP1_206_69982_20130706_192449_inLine +BABEL_OP1_206_69982_20130706_192449_outLine +BABEL_OP1_206_70282_20121214_191323_inLine +BABEL_OP1_206_70282_20121214_191323_outLine +BABEL_OP1_206_70526_20130123_020108_inLine +BABEL_OP1_206_70526_20130123_020108_outLine +BABEL_OP1_206_70986_20130710_195224_inLine +BABEL_OP1_206_70986_20130710_195224_outLine +BABEL_OP1_206_70986_20130710_200021_inLine +BABEL_OP1_206_70986_20130710_200021_outLine +BABEL_OP1_206_71189_20130122_234213_inLine +BABEL_OP1_206_71189_20130122_234213_outLine +BABEL_OP1_206_72654_20130418_185023_inLine +BABEL_OP1_206_72654_20130418_185023_outLine +BABEL_OP1_206_73408_20130706_195257_inLine +BABEL_OP1_206_73408_20130706_195257_outLine +BABEL_OP1_206_74226_20130709_222957_inLine +BABEL_OP1_206_74226_20130709_222957_outLine +BABEL_OP1_206_75359_20130128_221313_inLine +BABEL_OP1_206_75359_20130128_221313_outLine +BABEL_OP1_206_75366_20130626_043947_inLine +BABEL_OP1_206_75366_20130626_043947_outLine +BABEL_OP1_206_77567_20121206_015015_inLine +BABEL_OP1_206_77567_20121206_015015_outLine +BABEL_OP1_206_78609_20121210_172907_inLine +BABEL_OP1_206_78609_20121210_172907_outLine +BABEL_OP1_206_79139_20121130_021538_inLine +BABEL_OP1_206_79139_20121130_021538_outLine +BABEL_OP1_206_79571_20121208_220739_inLine +BABEL_OP1_206_79571_20121208_220739_outLine +BABEL_OP1_206_79751_20130709_233219_inLine +BABEL_OP1_206_79751_20130709_233219_outLine +BABEL_OP1_206_80897_20121220_195655_inLine +BABEL_OP1_206_80897_20121220_195655_outLine +BABEL_OP1_206_81229_20121203_035326_inLine +BABEL_OP1_206_81229_20121203_035331_outLine +BABEL_OP1_206_81424_20130114_180200_inLine +BABEL_OP1_206_81424_20130114_180200_outLine +BABEL_OP1_206_81553_20130114_222622_inLine +BABEL_OP1_206_81553_20130114_222622_outLine +BABEL_OP1_206_82030_20130607_164514_inLine +BABEL_OP1_206_82030_20130607_164514_outLine +BABEL_OP1_206_82030_20130607_170355_inLine +BABEL_OP1_206_82030_20130607_170355_outLine +BABEL_OP1_206_83366_20130112_225127_inLine +BABEL_OP1_206_83366_20130112_225127_outLine +BABEL_OP1_206_83851_20121219_214118_inLine +BABEL_OP1_206_83851_20121219_214118_outLine +BABEL_OP1_206_84327_20130605_221231_inLine +BABEL_OP1_206_84327_20130605_221231_outLine +BABEL_OP1_206_84339_20130610_180645_inLine +BABEL_OP1_206_84339_20130610_180645_outLine +BABEL_OP1_206_84583_20121209_183927_inLine +BABEL_OP1_206_84583_20121209_183927_outLine +BABEL_OP1_206_84709_20130718_233020_inLine +BABEL_OP1_206_84709_20130718_233020_outLine +BABEL_OP1_206_85246_20130708_202906_inLine +BABEL_OP1_206_85246_20130708_202906_outLine +BABEL_OP1_206_85439_20130123_002202_inLine +BABEL_OP1_206_85439_20130123_002202_outLine +BABEL_OP1_206_87693_20121205_012117_inLine +BABEL_OP1_206_87693_20121205_012117_outLine +BABEL_OP1_206_89718_20130811_194933_inLine +BABEL_OP1_206_89718_20130811_194933_outLine +BABEL_OP1_206_89794_20121214_233120_inLine +BABEL_OP1_206_89794_20121214_233120_outLine +BABEL_OP1_206_90440_20130718_001037_inLine +BABEL_OP1_206_90440_20130718_001037_outLine +BABEL_OP1_206_90440_20130718_002114_inLine +BABEL_OP1_206_90440_20130718_002114_outLine +BABEL_OP1_206_91825_20121229_025012_inLine +BABEL_OP1_206_91825_20121229_025012_outLine +BABEL_OP1_206_91930_20130609_211010_inLine +BABEL_OP1_206_91930_20130609_211010_outLine +BABEL_OP1_206_92176_20121206_000728_inLine +BABEL_OP1_206_92176_20121206_000728_outLine +BABEL_OP1_206_92281_20130715_213202_inLine +BABEL_OP1_206_92281_20130715_213202_outLine +BABEL_OP1_206_92509_20121207_014928_inLine +BABEL_OP1_206_92509_20121207_014928_outLine +BABEL_OP1_206_92757_20121114_211008_inLine +BABEL_OP1_206_92757_20121114_211009_outLine +BABEL_OP1_206_93443_20130619_014744_inLine +BABEL_OP1_206_93443_20130619_014744_outLine +BABEL_OP1_206_94044_20130717_183259_inLine +BABEL_OP1_206_94044_20130717_183259_outLine +BABEL_OP1_206_94044_20130717_184742_inLine +BABEL_OP1_206_94044_20130717_184742_outLine +BABEL_OP1_206_94166_20130618_012452_inLine +BABEL_OP1_206_94166_20130618_012452_outLine +BABEL_OP1_206_94803_20130702_181918_inLine +BABEL_OP1_206_94803_20130702_181918_outLine +BABEL_OP1_206_94969_20130618_233618_inLine +BABEL_OP1_206_95598_20121218_232233_inLine +BABEL_OP1_206_95598_20121218_232233_outLine +BABEL_OP1_206_96088_20130123_015420_inLine +BABEL_OP1_206_96088_20130123_015420_outLine +BABEL_OP1_206_96446_20121219_003144_inLine +BABEL_OP1_206_96446_20121219_003144_outLine +BABEL_OP1_206_96940_20130723_004026_inLine +BABEL_OP1_206_96940_20130723_004026_outLine +BABEL_OP1_206_96985_20121212_205933_inLine +BABEL_OP1_206_96985_20121212_205933_outLine +BABEL_OP1_206_97570_20130111_233033_inLine +BABEL_OP1_206_97570_20130111_233033_outLine +BABEL_OP1_206_99516_20121123_082052_inLine +BABEL_OP1_206_99516_20121123_082052_outLine +BABEL_OP1_206_99920_20130524_010643_inLine +BABEL_OP1_206_99920_20130524_010643_outLine +BABEL_OP1_206_99920_20130524_012051_inLine +BABEL_OP1_206_99920_20130524_012051_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/dev.list b/egs/babel/s5d/conf/lists/207-tokpisin/dev.list new file mode 100644 index 00000000000..a8ed2a6bc2a --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/dev.list @@ -0,0 +1,132 @@ +BABEL_OP2_207_14141_20130927_123928_inLine +BABEL_OP2_207_14141_20130927_123928_outLine +BABEL_OP2_207_14229_20130801_102759_inLine +BABEL_OP2_207_14229_20130801_102759_outLine +BABEL_OP2_207_14440_20130824_152406_inLine +BABEL_OP2_207_14440_20130824_152406_outLine +BABEL_OP2_207_14440_20130824_153139_inLine +BABEL_OP2_207_14440_20130824_153139_outLine +BABEL_OP2_207_14440_20130824_153643_inLine +BABEL_OP2_207_14440_20130824_153643_outLine +BABEL_OP2_207_14875_20130731_170626_inLine +BABEL_OP2_207_14875_20130731_170626_outLine +BABEL_OP2_207_15848_20130623_210617_inLine +BABEL_OP2_207_15848_20130623_210617_outLine +BABEL_OP2_207_17127_20130925_073246_inLine +BABEL_OP2_207_17127_20130925_073246_outLine +BABEL_OP2_207_17923_20130629_151018_inLine +BABEL_OP2_207_17923_20130629_151018_outLine +BABEL_OP2_207_20916_20130623_184646_inLine +BABEL_OP2_207_20916_20130623_184646_outLine +BABEL_OP2_207_20916_20130623_190432_inLine +BABEL_OP2_207_20916_20130623_190432_outLine +BABEL_OP2_207_21244_20131010_122553_inLine +BABEL_OP2_207_21244_20131010_122553_outLine +BABEL_OP2_207_22216_20130801_104847_inLine +BABEL_OP2_207_22216_20130801_104847_outLine +BABEL_OP2_207_23505_20130626_153607_inLine +BABEL_OP2_207_23505_20130626_153607_outLine +BABEL_OP2_207_23893_20130909_152137_inLine +BABEL_OP2_207_23893_20130909_152137_outLine +BABEL_OP2_207_24589_20130722_131056_inLine +BABEL_OP2_207_24589_20130722_131056_outLine +BABEL_OP2_207_27218_20130701_174655_inLine +BABEL_OP2_207_27218_20130701_174655_outLine +BABEL_OP2_207_29911_20131212_174224_inLine +BABEL_OP2_207_29911_20131212_174224_outLine +BABEL_OP2_207_32708_20130730_130556_inLine +BABEL_OP2_207_32708_20130730_130556_outLine +BABEL_OP2_207_32832_20130922_122814_inLine +BABEL_OP2_207_32832_20130922_122814_outLine +BABEL_OP2_207_33111_20130930_120538_inLine +BABEL_OP2_207_33111_20130930_120538_outLine +BABEL_OP2_207_33175_20130621_162225_inLine +BABEL_OP2_207_33175_20130621_162225_outLine +BABEL_OP2_207_34477_20130722_140642_inLine +BABEL_OP2_207_34477_20130722_140642_outLine +BABEL_OP2_207_38431_20130915_163140_inLine +BABEL_OP2_207_38431_20130915_163140_outLine +BABEL_OP2_207_40713_20130711_151622_inLine +BABEL_OP2_207_40713_20130711_151622_outLine +BABEL_OP2_207_41100_20130712_160739_inLine +BABEL_OP2_207_41100_20130712_160739_outLine +BABEL_OP2_207_43646_20130624_165324_inLine +BABEL_OP2_207_43646_20130624_165324_outLine +BABEL_OP2_207_45697_20130925_144605_inLine +BABEL_OP2_207_45697_20130925_144605_outLine +BABEL_OP2_207_46535_20131219_223648_inLine +BABEL_OP2_207_46535_20131219_223648_outLine +BABEL_OP2_207_46625_20130627_133432_inLine +BABEL_OP2_207_46625_20130627_133432_outLine +BABEL_OP2_207_46881_20130626_133140_inLine +BABEL_OP2_207_46881_20130626_133140_outLine +BABEL_OP2_207_47270_20130926_142206_inLine +BABEL_OP2_207_47270_20130926_142206_outLine +BABEL_OP2_207_54744_20130627_200004_inLine +BABEL_OP2_207_54744_20130627_200004_outLine +BABEL_OP2_207_56468_20131102_114004_inLine +BABEL_OP2_207_56468_20131102_114004_outLine +BABEL_OP2_207_59898_20130625_211705_inLine +BABEL_OP2_207_59898_20130625_211705_outLine +BABEL_OP2_207_59898_20130625_212216_inLine +BABEL_OP2_207_59898_20130625_212216_outLine +BABEL_OP2_207_59898_20130625_212948_inLine +BABEL_OP2_207_59898_20130625_212948_outLine +BABEL_OP2_207_60706_20130623_230602_inLine +BABEL_OP2_207_60706_20130623_230602_outLine +BABEL_OP2_207_61011_20130624_164607_inLine +BABEL_OP2_207_61011_20130624_164607_outLine +BABEL_OP2_207_61357_20130822_150714_inLine +BABEL_OP2_207_61357_20130822_150714_outLine +BABEL_OP2_207_61963_20130830_141616_inLine +BABEL_OP2_207_61963_20130830_141616_outLine +BABEL_OP2_207_65252_20131008_183014_inLine +BABEL_OP2_207_65252_20131008_183014_outLine +BABEL_OP2_207_67213_20131218_185924_inLine +BABEL_OP2_207_67213_20131218_185924_outLine +BABEL_OP2_207_70110_20130621_125315_inLine +BABEL_OP2_207_70110_20130621_125315_outLine +BABEL_OP2_207_70726_20131222_161540_inLine +BABEL_OP2_207_70726_20131222_161540_outLine +BABEL_OP2_207_73072_20130730_140848_inLine +BABEL_OP2_207_73072_20130730_140848_outLine +BABEL_OP2_207_74226_20130828_115915_inLine +BABEL_OP2_207_74226_20130828_115915_outLine +BABEL_OP2_207_76218_20130809_145308_inLine +BABEL_OP2_207_76218_20130809_145308_outLine +BABEL_OP2_207_76837_20131207_184347_inLine +BABEL_OP2_207_76837_20131207_184347_outLine +BABEL_OP2_207_77730_20130628_215628_inLine +BABEL_OP2_207_77730_20130628_215628_outLine +BABEL_OP2_207_79131_20130915_155341_inLine +BABEL_OP2_207_79131_20130915_155341_outLine +BABEL_OP2_207_80577_20130930_204532_inLine +BABEL_OP2_207_80577_20130930_204532_outLine +BABEL_OP2_207_80881_20130621_220309_inLine +BABEL_OP2_207_80881_20130621_220309_outLine +BABEL_OP2_207_82742_20130915_204759_inLine +BABEL_OP2_207_82742_20130915_204759_outLine +BABEL_OP2_207_83851_20130731_154045_inLine +BABEL_OP2_207_83851_20130731_154045_outLine +BABEL_OP2_207_84815_20130911_144350_inLine +BABEL_OP2_207_84815_20130911_144350_outLine +BABEL_OP2_207_85179_20130920_130213_inLine +BABEL_OP2_207_85179_20130920_130213_outLine +BABEL_OP2_207_85439_20131009_141636_inLine +BABEL_OP2_207_85439_20131009_141636_outLine +BABEL_OP2_207_86557_20130621_160840_inLine +BABEL_OP2_207_86557_20130621_160840_outLine +BABEL_OP2_207_86557_20130621_161939_inLine +BABEL_OP2_207_86557_20130621_161939_outLine +BABEL_OP2_207_90777_20130725_111134_inLine +BABEL_OP2_207_90777_20130725_111134_outLine +BABEL_OP2_207_92886_20130711_144627_inLine +BABEL_OP2_207_92886_20130711_144627_outLine +BABEL_OP2_207_96324_20130625_154301_inLine +BABEL_OP2_207_96324_20130625_154301_outLine +BABEL_OP2_207_97136_20131003_120422_inLine +BABEL_OP2_207_97136_20131003_120422_outLine +BABEL_OP2_207_97849_20131003_125642_inLine +BABEL_OP2_207_97849_20131003_125642_outLine +BABEL_OP2_207_99975_20131027_145501_inLine +BABEL_OP2_207_99975_20131027_145501_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/eval.list b/egs/babel/s5d/conf/lists/207-tokpisin/eval.list new file mode 100644 index 00000000000..57c92f399f4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/eval.list @@ -0,0 +1,192 @@ +BABEL_OP2_207_10416_20130808_151430_inLine +BABEL_OP2_207_10416_20130808_151430_outLine +BABEL_OP2_207_10974_20130821_152545_inLine +BABEL_OP2_207_10974_20130821_152545_outLine +BABEL_OP2_207_13040_20130711_172945_inLine +BABEL_OP2_207_13040_20130711_172945_outLine +BABEL_OP2_207_13427_20130817_155156_inLine +BABEL_OP2_207_13427_20130817_155156_outLine +BABEL_OP2_207_15042_20130915_183113_inLine +BABEL_OP2_207_15042_20130915_183113_outLine +BABEL_OP2_207_15163_20130809_152912_inLine +BABEL_OP2_207_15163_20130809_152912_outLine +BABEL_OP2_207_15926_20130905_125437_inLine +BABEL_OP2_207_15926_20130905_125437_outLine +BABEL_OP2_207_16184_20130625_002017_inLine +BABEL_OP2_207_16184_20130625_002017_outLine +BABEL_OP2_207_16467_20130918_155738_inLine +BABEL_OP2_207_16467_20130918_155738_outLine +BABEL_OP2_207_16467_20130918_160609_inLine +BABEL_OP2_207_16467_20130918_160609_outLine +BABEL_OP2_207_16601_20130906_133242_inLine +BABEL_OP2_207_16601_20130906_133242_outLine +BABEL_OP2_207_19545_20130821_135751_inLine +BABEL_OP2_207_19545_20130821_135751_outLine +BABEL_OP2_207_19672_20130903_141816_inLine +BABEL_OP2_207_19672_20130903_141816_outLine +BABEL_OP2_207_20896_20131224_170209_inLine +BABEL_OP2_207_20896_20131224_170209_outLine +BABEL_OP2_207_21029_20130702_120434_inLine +BABEL_OP2_207_21029_20130702_120434_outLine +BABEL_OP2_207_21581_20130724_161007_inLine +BABEL_OP2_207_21581_20130724_161007_outLine +BABEL_OP2_207_22170_20130828_151813_inLine +BABEL_OP2_207_22170_20130828_151813_outLine +BABEL_OP2_207_24010_20131023_153049_inLine +BABEL_OP2_207_24010_20131023_153049_outLine +BABEL_OP2_207_24033_20130930_123827_inLine +BABEL_OP2_207_24033_20130930_123827_outLine +BABEL_OP2_207_24221_20131028_153502_inLine +BABEL_OP2_207_24221_20131028_153502_outLine +BABEL_OP2_207_27082_20130812_162844_inLine +BABEL_OP2_207_27082_20130812_162844_outLine +BABEL_OP2_207_28422_20130905_135311_inLine +BABEL_OP2_207_28422_20130905_135311_outLine +BABEL_OP2_207_28871_20130621_163843_inLine +BABEL_OP2_207_28871_20130621_163843_outLine +BABEL_OP2_207_29230_20131015_133532_inLine +BABEL_OP2_207_29230_20131015_133532_outLine +BABEL_OP2_207_30250_20130720_111643_inLine +BABEL_OP2_207_30250_20130720_111643_outLine +BABEL_OP2_207_31484_20130906_164627_inLine +BABEL_OP2_207_31484_20130906_164627_outLine +BABEL_OP2_207_34019_20131218_205039_inLine +BABEL_OP2_207_34019_20131218_205039_outLine +BABEL_OP2_207_36017_20131003_111732_inLine +BABEL_OP2_207_36017_20131003_111732_outLine +BABEL_OP2_207_37068_20131211_133052_inLine +BABEL_OP2_207_37068_20131211_133052_outLine +BABEL_OP2_207_37499_20131009_162024_inLine +BABEL_OP2_207_37499_20131009_162024_outLine +BABEL_OP2_207_41493_20130628_222817_inLine +BABEL_OP2_207_41493_20130628_222817_outLine +BABEL_OP2_207_41920_20130730_105920_inLine +BABEL_OP2_207_41920_20130730_105920_outLine +BABEL_OP2_207_42600_20130724_152811_inLine +BABEL_OP2_207_42600_20130724_152811_outLine +BABEL_OP2_207_42600_20130724_154332_inLine +BABEL_OP2_207_42600_20130724_154332_outLine +BABEL_OP2_207_44255_20130925_074247_inLine +BABEL_OP2_207_44255_20130925_074247_outLine +BABEL_OP2_207_44678_20131029_142212_inLine +BABEL_OP2_207_44678_20131029_142212_outLine +BABEL_OP2_207_45235_20130918_123528_inLine +BABEL_OP2_207_45235_20130918_123528_outLine +BABEL_OP2_207_45777_20130731_140413_inLine +BABEL_OP2_207_45777_20130731_140413_outLine +BABEL_OP2_207_46041_20130919_111546_inLine +BABEL_OP2_207_46041_20130919_111546_outLine +BABEL_OP2_207_46702_20130627_192620_inLine +BABEL_OP2_207_46702_20130627_192620_outLine +BABEL_OP2_207_48663_20130828_133856_inLine +BABEL_OP2_207_48663_20130828_133856_outLine +BABEL_OP2_207_49775_20130711_130307_inLine +BABEL_OP2_207_49775_20130711_130307_outLine +BABEL_OP2_207_50186_20131207_163954_inLine +BABEL_OP2_207_50186_20131207_163954_outLine +BABEL_OP2_207_50962_20130712_152844_inLine +BABEL_OP2_207_50962_20130712_152844_outLine +BABEL_OP2_207_52070_20131018_160716_inLine +BABEL_OP2_207_52070_20131018_160716_outLine +BABEL_OP2_207_52694_20130819_142518_inLine +BABEL_OP2_207_52694_20130819_142518_outLine +BABEL_OP2_207_52854_20130701_173625_inLine +BABEL_OP2_207_52854_20130701_173625_outLine +BABEL_OP2_207_53419_20130915_212209_inLine +BABEL_OP2_207_53419_20130915_212209_outLine +BABEL_OP2_207_55742_20130628_204255_inLine +BABEL_OP2_207_55742_20130628_204255_outLine +BABEL_OP2_207_56429_20130729_115308_inLine +BABEL_OP2_207_56429_20130729_115308_outLine +BABEL_OP2_207_56743_20130731_145617_inLine +BABEL_OP2_207_56743_20130731_145617_outLine +BABEL_OP2_207_57654_20130711_145355_inLine +BABEL_OP2_207_57654_20130711_145355_outLine +BABEL_OP2_207_57654_20130711_150856_inLine +BABEL_OP2_207_57654_20130711_150856_outLine +BABEL_OP2_207_58815_20130917_153637_inLine +BABEL_OP2_207_58815_20130917_153637_outLine +BABEL_OP2_207_59993_20130712_145207_inLine +BABEL_OP2_207_59993_20130712_145207_outLine +BABEL_OP2_207_60418_20130829_155821_inLine +BABEL_OP2_207_60418_20130829_155821_outLine +BABEL_OP2_207_60508_20130801_182520_inLine +BABEL_OP2_207_60508_20130801_182520_outLine +BABEL_OP2_207_62430_20130930_120306_inLine +BABEL_OP2_207_62430_20130930_120306_outLine +BABEL_OP2_207_63445_20130730_154254_inLine +BABEL_OP2_207_63445_20130730_154254_outLine +BABEL_OP2_207_64796_20130627_095719_inLine +BABEL_OP2_207_64796_20130627_095719_outLine +BABEL_OP2_207_64796_20130627_102602_inLine +BABEL_OP2_207_64796_20130627_102602_outLine +BABEL_OP2_207_66519_20130724_134257_inLine +BABEL_OP2_207_66519_20130724_134257_outLine +BABEL_OP2_207_66519_20130724_135210_inLine +BABEL_OP2_207_66519_20130724_135210_outLine +BABEL_OP2_207_67373_20130629_154522_inLine +BABEL_OP2_207_67373_20130629_154522_outLine +BABEL_OP2_207_67794_20130629_150014_inLine +BABEL_OP2_207_67794_20130629_150014_outLine +BABEL_OP2_207_67794_20130629_152744_inLine +BABEL_OP2_207_67794_20130629_152744_outLine +BABEL_OP2_207_67842_20130711_144619_inLine +BABEL_OP2_207_67842_20130711_144619_outLine +BABEL_OP2_207_71333_20130711_155031_inLine +BABEL_OP2_207_71333_20130711_155031_outLine +BABEL_OP2_207_71704_20130701_154358_inLine +BABEL_OP2_207_71704_20130701_154358_outLine +BABEL_OP2_207_74111_20130922_211430_inLine +BABEL_OP2_207_74111_20130922_211430_outLine +BABEL_OP2_207_75366_20131018_141443_inLine +BABEL_OP2_207_75366_20131018_141443_outLine +BABEL_OP2_207_75465_20130919_133102_inLine +BABEL_OP2_207_75465_20130919_133102_outLine +BABEL_OP2_207_76372_20130930_220003_inLine +BABEL_OP2_207_76372_20130930_220003_outLine +BABEL_OP2_207_77139_20130624_231111_inLine +BABEL_OP2_207_77139_20130624_231111_outLine +BABEL_OP2_207_78630_20130802_140131_inLine +BABEL_OP2_207_78630_20130802_140131_outLine +BABEL_OP2_207_78976_20130701_162332_inLine +BABEL_OP2_207_78976_20130701_162332_outLine +BABEL_OP2_207_79028_20131211_173303_inLine +BABEL_OP2_207_79028_20131211_173303_outLine +BABEL_OP2_207_79660_20131011_163724_inLine +BABEL_OP2_207_79660_20131011_163724_outLine +BABEL_OP2_207_80655_20131001_101140_inLine +BABEL_OP2_207_80655_20131001_101140_outLine +BABEL_OP2_207_80721_20130910_121013_inLine +BABEL_OP2_207_80721_20130910_121013_outLine +BABEL_OP2_207_81392_20130905_165515_inLine +BABEL_OP2_207_81392_20130905_165515_outLine +BABEL_OP2_207_83366_20130824_150458_inLine +BABEL_OP2_207_83366_20130824_150458_outLine +BABEL_OP2_207_83545_20131009_133016_inLine +BABEL_OP2_207_83545_20131009_133016_outLine +BABEL_OP2_207_89888_20130730_133532_inLine +BABEL_OP2_207_89888_20130730_133532_outLine +BABEL_OP2_207_90318_20131224_133452_inLine +BABEL_OP2_207_90318_20131224_133452_outLine +BABEL_OP2_207_90935_20130725_162432_inLine +BABEL_OP2_207_90935_20130725_162432_outLine +BABEL_OP2_207_92941_20130722_163301_inLine +BABEL_OP2_207_92941_20130722_163301_outLine +BABEL_OP2_207_95598_20130625_170733_inLine +BABEL_OP2_207_95598_20130625_170733_outLine +BABEL_OP2_207_95966_20130811_204100_inLine +BABEL_OP2_207_95966_20130811_204100_outLine +BABEL_OP2_207_96934_20130723_143258_inLine +BABEL_OP2_207_96934_20130723_143258_outLine +BABEL_OP2_207_96985_20130626_084229_inLine +BABEL_OP2_207_96985_20130626_084229_outLine +BABEL_OP2_207_97988_20130909_215057_inLine +BABEL_OP2_207_97988_20130909_215057_outLine +BABEL_OP2_207_98165_20130724_141743_inLine +BABEL_OP2_207_98165_20130724_141743_outLine +BABEL_OP2_207_98506_20130930_135511_inLine +BABEL_OP2_207_98506_20130930_135511_outLine +BABEL_OP2_207_98580_20130809_144219_inLine +BABEL_OP2_207_98580_20130809_144219_outLine +BABEL_OP2_207_98678_20131001_204525_inLine +BABEL_OP2_207_98678_20131001_204525_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list b/egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list new file mode 100644 index 00000000000..042fde9446d --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list @@ -0,0 +1,64 @@ +BABEL_OP2_207_10416_20130808_151430_inLine +BABEL_OP2_207_10416_20130808_151430_outLine +BABEL_OP2_207_15926_20130905_125437_inLine +BABEL_OP2_207_15926_20130905_125437_outLine +BABEL_OP2_207_19545_20130821_135751_inLine +BABEL_OP2_207_19545_20130821_135751_outLine +BABEL_OP2_207_24033_20130930_123827_inLine +BABEL_OP2_207_24033_20130930_123827_outLine +BABEL_OP2_207_28422_20130905_135311_inLine +BABEL_OP2_207_28422_20130905_135311_outLine +BABEL_OP2_207_30250_20130720_111643_inLine +BABEL_OP2_207_30250_20130720_111643_outLine +BABEL_OP2_207_31484_20130906_164627_inLine +BABEL_OP2_207_31484_20130906_164627_outLine +BABEL_OP2_207_34019_20131218_205039_inLine +BABEL_OP2_207_34019_20131218_205039_outLine +BABEL_OP2_207_42600_20130724_152811_inLine +BABEL_OP2_207_42600_20130724_152811_outLine +BABEL_OP2_207_42600_20130724_154332_inLine +BABEL_OP2_207_42600_20130724_154332_outLine +BABEL_OP2_207_44255_20130925_074247_inLine +BABEL_OP2_207_44255_20130925_074247_outLine +BABEL_OP2_207_44678_20131029_142212_inLine +BABEL_OP2_207_44678_20131029_142212_outLine +BABEL_OP2_207_48663_20130828_133856_inLine +BABEL_OP2_207_48663_20130828_133856_outLine +BABEL_OP2_207_49775_20130711_130307_inLine +BABEL_OP2_207_49775_20130711_130307_outLine +BABEL_OP2_207_50962_20130712_152844_inLine +BABEL_OP2_207_50962_20130712_152844_outLine +BABEL_OP2_207_52070_20131018_160716_inLine +BABEL_OP2_207_52070_20131018_160716_outLine +BABEL_OP2_207_55742_20130628_204255_inLine +BABEL_OP2_207_55742_20130628_204255_outLine +BABEL_OP2_207_57654_20130711_145355_inLine +BABEL_OP2_207_57654_20130711_145355_outLine +BABEL_OP2_207_57654_20130711_150856_inLine +BABEL_OP2_207_57654_20130711_150856_outLine +BABEL_OP2_207_58815_20130917_153637_inLine +BABEL_OP2_207_58815_20130917_153637_outLine +BABEL_OP2_207_59993_20130712_145207_inLine +BABEL_OP2_207_59993_20130712_145207_outLine +BABEL_OP2_207_60508_20130801_182520_inLine +BABEL_OP2_207_60508_20130801_182520_outLine +BABEL_OP2_207_67373_20130629_154522_inLine +BABEL_OP2_207_67373_20130629_154522_outLine +BABEL_OP2_207_71704_20130701_154358_inLine +BABEL_OP2_207_71704_20130701_154358_outLine +BABEL_OP2_207_74111_20130922_211430_inLine +BABEL_OP2_207_74111_20130922_211430_outLine +BABEL_OP2_207_78976_20130701_162332_inLine +BABEL_OP2_207_78976_20130701_162332_outLine +BABEL_OP2_207_80655_20131001_101140_inLine +BABEL_OP2_207_80655_20131001_101140_outLine +BABEL_OP2_207_90935_20130725_162432_inLine +BABEL_OP2_207_90935_20130725_162432_outLine +BABEL_OP2_207_92941_20130722_163301_inLine +BABEL_OP2_207_92941_20130722_163301_outLine +BABEL_OP2_207_95966_20130811_204100_inLine +BABEL_OP2_207_95966_20130811_204100_outLine +BABEL_OP2_207_98580_20130809_144219_inLine +BABEL_OP2_207_98580_20130809_144219_outLine +BABEL_OP2_207_98678_20131001_204525_inLine +BABEL_OP2_207_98678_20131001_204525_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list new file mode 100644 index 00000000000..0f3cabb11e7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list @@ -0,0 +1,126 @@ +BABEL_OP2_207_10058_20131001_123723_inLine +BABEL_OP2_207_10058_20131001_123723_outLine +BABEL_OP2_207_11681_20130701_131708_inLine +BABEL_OP2_207_11681_20130701_131708_outLine +BABEL_OP2_207_11723_20131029_192512_inLine +BABEL_OP2_207_11723_20131029_192512_outLine +BABEL_OP2_207_13178_20130828_124504_inLine +BABEL_OP2_207_13178_20130828_124504_outLine +BABEL_OP2_207_13324_20130628_205651_inLine +BABEL_OP2_207_13324_20130628_205651_outLine +BABEL_OP2_207_13490_20130811_183642_inLine +BABEL_OP2_207_13490_20130811_183642_outLine +BABEL_OP2_207_13792_20130725_131748_inLine +BABEL_OP2_207_13792_20130725_131748_outLine +BABEL_OP2_207_14137_20130702_122633_inLine +BABEL_OP2_207_14137_20130702_122633_outLine +BABEL_OP2_207_16839_20130923_202105_inLine +BABEL_OP2_207_16839_20130923_202105_outLine +BABEL_OP2_207_17032_20130906_140931_inLine +BABEL_OP2_207_17032_20130906_140931_outLine +BABEL_OP2_207_17420_20130925_143517_inLine +BABEL_OP2_207_17420_20130925_143517_outLine +BABEL_OP2_207_17440_20130911_132642_inLine +BABEL_OP2_207_17440_20130911_132642_outLine +BABEL_OP2_207_22021_20131220_151707_inLine +BABEL_OP2_207_22021_20131220_151707_outLine +BABEL_OP2_207_26999_20130903_135935_inLine +BABEL_OP2_207_26999_20130903_135935_outLine +BABEL_OP2_207_28945_20130719_160541_inLine +BABEL_OP2_207_28945_20130719_160541_outLine +BABEL_OP2_207_29023_20130702_110704_inLine +BABEL_OP2_207_29023_20130702_110704_outLine +BABEL_OP2_207_29168_20130624_215131_inLine +BABEL_OP2_207_29168_20130624_215131_outLine +BABEL_OP2_207_30576_20131003_141444_inLine +BABEL_OP2_207_30576_20131003_141444_outLine +BABEL_OP2_207_31490_20130626_143343_inLine +BABEL_OP2_207_31490_20130626_143343_outLine +BABEL_OP2_207_31624_20130722_163153_inLine +BABEL_OP2_207_31624_20130722_163153_outLine +BABEL_OP2_207_32727_20130910_153130_inLine +BABEL_OP2_207_32727_20130910_153130_outLine +BABEL_OP2_207_33355_20130626_141603_inLine +BABEL_OP2_207_33355_20130626_141603_outLine +BABEL_OP2_207_34197_20130625_162431_inLine +BABEL_OP2_207_34197_20130625_162431_outLine +BABEL_OP2_207_42497_20130628_234333_inLine +BABEL_OP2_207_42497_20130628_234333_outLine +BABEL_OP2_207_42834_20130828_121531_inLine +BABEL_OP2_207_42834_20130828_121531_outLine +BABEL_OP2_207_44029_20131224_183902_inLine +BABEL_OP2_207_44029_20131224_183902_outLine +BABEL_OP2_207_44619_20130720_150103_inLine +BABEL_OP2_207_44619_20130720_150103_outLine +BABEL_OP2_207_48610_20130627_142410_inLine +BABEL_OP2_207_48610_20130627_142410_outLine +BABEL_OP2_207_50175_20130627_131732_inLine +BABEL_OP2_207_50175_20130627_131732_outLine +BABEL_OP2_207_50565_20130625_145121_inLine +BABEL_OP2_207_50565_20130625_145121_outLine +BABEL_OP2_207_52804_20130729_144756_inLine +BABEL_OP2_207_52804_20130729_144756_outLine +BABEL_OP2_207_53917_20130926_150707_inLine +BABEL_OP2_207_53917_20130926_150707_outLine +BABEL_OP2_207_54953_20130725_154539_inLine +BABEL_OP2_207_54953_20130725_154539_outLine +BABEL_OP2_207_56198_20130702_120906_inLine +BABEL_OP2_207_56198_20130702_120906_outLine +BABEL_OP2_207_60661_20130719_154858_inLine +BABEL_OP2_207_60661_20130719_154858_outLine +BABEL_OP2_207_60661_20130719_160027_inLine +BABEL_OP2_207_60661_20130719_160027_outLine +BABEL_OP2_207_62289_20130828_152328_inLine +BABEL_OP2_207_62289_20130828_152328_outLine +BABEL_OP2_207_62800_20130625_222225_inLine +BABEL_OP2_207_62800_20130625_222225_outLine +BABEL_OP2_207_64768_20130722_132745_inLine +BABEL_OP2_207_64768_20130722_132745_outLine +BABEL_OP2_207_69574_20130624_154052_inLine +BABEL_OP2_207_69574_20130624_154052_outLine +BABEL_OP2_207_69574_20130624_162442_inLine +BABEL_OP2_207_69574_20130624_162442_outLine +BABEL_OP2_207_70216_20131212_112351_inLine +BABEL_OP2_207_70216_20131212_112351_outLine +BABEL_OP2_207_70716_20131005_160013_inLine +BABEL_OP2_207_70716_20131005_160013_outLine +BABEL_OP2_207_71038_20130831_112716_inLine +BABEL_OP2_207_71038_20130831_112716_outLine +BABEL_OP2_207_71121_20131212_125525_inLine +BABEL_OP2_207_71121_20131212_125525_outLine +BABEL_OP2_207_74280_20130623_173429_inLine +BABEL_OP2_207_74280_20130623_173429_outLine +BABEL_OP2_207_77744_20130720_130633_inLine +BABEL_OP2_207_77744_20130720_130633_outLine +BABEL_OP2_207_78194_20130622_152343_inLine +BABEL_OP2_207_78194_20130622_152343_outLine +BABEL_OP2_207_78604_20130629_143534_inLine +BABEL_OP2_207_78604_20130629_143534_outLine +BABEL_OP2_207_78943_20130701_150832_inLine +BABEL_OP2_207_78943_20130701_150832_outLine +BABEL_OP2_207_86467_20130621_164129_inLine +BABEL_OP2_207_86467_20130621_164129_outLine +BABEL_OP2_207_86826_20131010_131452_inLine +BABEL_OP2_207_86826_20131010_131452_outLine +BABEL_OP2_207_87074_20130702_114658_inLine +BABEL_OP2_207_87074_20130702_114658_outLine +BABEL_OP2_207_87298_20130722_163007_inLine +BABEL_OP2_207_87298_20130722_163007_outLine +BABEL_OP2_207_87298_20130722_164947_inLine +BABEL_OP2_207_87298_20130722_164947_outLine +BABEL_OP2_207_89650_20131220_191027_inLine +BABEL_OP2_207_89650_20131220_191027_outLine +BABEL_OP2_207_95269_20130725_140512_inLine +BABEL_OP2_207_95269_20130725_140512_outLine +BABEL_OP2_207_97588_20130720_172415_inLine +BABEL_OP2_207_97588_20130720_172415_outLine +BABEL_OP2_207_97731_20130920_141703_inLine +BABEL_OP2_207_97731_20130920_141703_outLine +BABEL_OP2_207_97836_20130930_145119_inLine +BABEL_OP2_207_97836_20130930_145119_outLine +BABEL_OP2_207_97896_20130807_165056_inLine +BABEL_OP2_207_97896_20130807_165056_outLine +BABEL_OP2_207_97911_20131017_134323_inLine +BABEL_OP2_207_97911_20131017_134323_outLine +BABEL_OP2_207_98489_20130712_001025_inLine +BABEL_OP2_207_98489_20130712_001025_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list new file mode 100644 index 00000000000..7fa52da3207 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP2_207_10036_20130724_130953_inLine +BABEL_OP2_207_10036_20130724_130953_outLine +BABEL_OP2_207_10638_20131023_161558_inLine +BABEL_OP2_207_10638_20131023_161558_outLine +BABEL_OP2_207_10647_20130930_130411_inLine +BABEL_OP2_207_10647_20130930_130411_outLine +BABEL_OP2_207_10938_20130723_154630_inLine +BABEL_OP2_207_10938_20130723_154630_outLine +BABEL_OP2_207_12036_20130628_172018_inLine +BABEL_OP2_207_12036_20130628_172018_outLine +BABEL_OP2_207_12242_20130720_122145_inLine +BABEL_OP2_207_12242_20130720_122145_outLine +BABEL_OP2_207_12851_20130624_231520_inLine +BABEL_OP2_207_12851_20130624_231520_outLine +BABEL_OP2_207_13483_20130914_124412_inLine +BABEL_OP2_207_13483_20130914_124412_outLine +BABEL_OP2_207_13664_20130624_131414_inLine +BABEL_OP2_207_13664_20130624_131414_outLine +BABEL_OP2_207_13709_20130925_114224_inLine +BABEL_OP2_207_13709_20130925_114224_outLine +BABEL_OP2_207_13776_20131010_175808_inLine +BABEL_OP2_207_13776_20131010_175808_outLine +BABEL_OP2_207_14179_20130905_113236_inLine +BABEL_OP2_207_14179_20130905_113236_outLine +BABEL_OP2_207_14972_20130821_111242_inLine +BABEL_OP2_207_14972_20130821_111242_outLine +BABEL_OP2_207_15024_20130820_131419_inLine +BABEL_OP2_207_15024_20130820_131419_outLine +BABEL_OP2_207_15382_20130827_130728_inLine +BABEL_OP2_207_15382_20130827_130728_outLine +BABEL_OP2_207_15730_20130627_154012_inLine +BABEL_OP2_207_15730_20130627_154012_outLine +BABEL_OP2_207_16149_20130720_115211_inLine +BABEL_OP2_207_16149_20130720_115211_outLine +BABEL_OP2_207_16749_20130830_154859_inLine +BABEL_OP2_207_16749_20130830_154859_outLine +BABEL_OP2_207_17472_20130910_165052_inLine +BABEL_OP2_207_17472_20130910_165052_outLine +BABEL_OP2_207_17496_20130827_154835_inLine +BABEL_OP2_207_17496_20130827_154835_outLine +BABEL_OP2_207_17520_20130820_160316_inLine +BABEL_OP2_207_17520_20130820_160316_outLine +BABEL_OP2_207_17615_20130903_123606_inLine +BABEL_OP2_207_17615_20130903_123606_outLine +BABEL_OP2_207_18078_20130920_135919_inLine +BABEL_OP2_207_18078_20130920_135919_outLine +BABEL_OP2_207_18297_20130828_161347_inLine +BABEL_OP2_207_18297_20130828_161347_outLine +BABEL_OP2_207_18370_20131205_182514_inLine +BABEL_OP2_207_18370_20131205_182514_outLine +BABEL_OP2_207_19134_20130822_145954_inLine +BABEL_OP2_207_19134_20130822_145954_outLine +BABEL_OP2_207_19703_20130720_154219_inLine +BABEL_OP2_207_19703_20130720_154219_outLine +BABEL_OP2_207_19818_20130826_134257_inLine +BABEL_OP2_207_19818_20130826_134257_outLine +BABEL_OP2_207_19877_20130912_151401_inLine +BABEL_OP2_207_19877_20130912_151401_outLine +BABEL_OP2_207_20437_20131030_165858_inLine +BABEL_OP2_207_20437_20131030_165858_outLine +BABEL_OP2_207_20985_20130905_145111_inLine +BABEL_OP2_207_20985_20130905_145111_outLine +BABEL_OP2_207_21004_20130909_140247_inLine +BABEL_OP2_207_21004_20130909_140247_outLine +BABEL_OP2_207_21004_20130909_141426_inLine +BABEL_OP2_207_21004_20130909_141426_outLine +BABEL_OP2_207_21206_20130630_201617_inLine +BABEL_OP2_207_21206_20130630_201617_outLine +BABEL_OP2_207_21327_20130912_132010_inLine +BABEL_OP2_207_21327_20130912_132010_outLine +BABEL_OP2_207_22446_20130725_155758_inLine +BABEL_OP2_207_22446_20130725_155758_outLine +BABEL_OP2_207_23006_20130722_133014_inLine +BABEL_OP2_207_23006_20130722_133014_outLine +BABEL_OP2_207_23046_20130729_122607_inLine +BABEL_OP2_207_23046_20130729_122607_outLine +BABEL_OP2_207_23092_20130911_151410_inLine +BABEL_OP2_207_23092_20130911_151410_outLine +BABEL_OP2_207_24532_20130626_162254_inLine +BABEL_OP2_207_24532_20130626_162254_outLine +BABEL_OP2_207_24586_20130930_115553_inLine +BABEL_OP2_207_24586_20130930_115553_outLine +BABEL_OP2_207_24590_20130807_162732_inLine +BABEL_OP2_207_24590_20130807_162732_outLine +BABEL_OP2_207_24679_20130625_144735_inLine +BABEL_OP2_207_24679_20130625_144735_outLine +BABEL_OP2_207_24982_20130729_152422_inLine +BABEL_OP2_207_24982_20130729_152422_outLine +BABEL_OP2_207_25767_20130628_220921_inLine +BABEL_OP2_207_25767_20130628_220921_outLine +BABEL_OP2_207_26388_20130722_152932_inLine +BABEL_OP2_207_26388_20130722_152932_outLine +BABEL_OP2_207_27590_20130912_155435_inLine +BABEL_OP2_207_27590_20130912_155435_outLine +BABEL_OP2_207_28012_20130920_162354_inLine +BABEL_OP2_207_28012_20130920_162354_outLine +BABEL_OP2_207_28303_20130731_132124_inLine +BABEL_OP2_207_28303_20130731_132124_outLine +BABEL_OP2_207_28522_20130906_172331_inLine +BABEL_OP2_207_28522_20130906_172331_outLine +BABEL_OP2_207_28595_20131022_154118_inLine +BABEL_OP2_207_28595_20131022_154118_outLine +BABEL_OP2_207_29404_20130930_154214_inLine +BABEL_OP2_207_29404_20130930_154214_outLine +BABEL_OP2_207_29633_20131001_114745_inLine +BABEL_OP2_207_29633_20131001_114745_outLine +BABEL_OP2_207_30058_20130927_094530_inLine +BABEL_OP2_207_30058_20130927_094530_outLine +BABEL_OP2_207_30180_20130725_150836_inLine +BABEL_OP2_207_30180_20130725_150836_outLine +BABEL_OP2_207_30180_20130725_152116_inLine +BABEL_OP2_207_30180_20130725_152116_outLine +BABEL_OP2_207_30395_20130701_130920_inLine +BABEL_OP2_207_30395_20130701_130920_outLine +BABEL_OP2_207_31039_20131219_232002_inLine +BABEL_OP2_207_31039_20131219_232002_outLine +BABEL_OP2_207_31074_20131206_183901_inLine +BABEL_OP2_207_31074_20131206_183901_outLine +BABEL_OP2_207_32122_20130725_140342_inLine +BABEL_OP2_207_32122_20130725_140342_outLine +BABEL_OP2_207_33951_20130812_152815_inLine +BABEL_OP2_207_33951_20130812_152815_outLine +BABEL_OP2_207_34486_20131009_154321_inLine +BABEL_OP2_207_34486_20131009_154321_outLine +BABEL_OP2_207_34679_20130722_131020_inLine +BABEL_OP2_207_34679_20130722_131020_outLine +BABEL_OP2_207_34860_20131031_170619_inLine +BABEL_OP2_207_34860_20131031_170619_outLine +BABEL_OP2_207_35008_20130909_114545_inLine +BABEL_OP2_207_35008_20130909_114545_outLine +BABEL_OP2_207_35139_20130701_113506_inLine +BABEL_OP2_207_35139_20130701_113506_outLine +BABEL_OP2_207_35467_20130627_092105_inLine +BABEL_OP2_207_35467_20130627_092105_outLine +BABEL_OP2_207_35467_20130627_093134_inLine +BABEL_OP2_207_35467_20130627_093134_outLine +BABEL_OP2_207_36293_20130722_173251_inLine +BABEL_OP2_207_36293_20130722_173251_outLine +BABEL_OP2_207_36642_20131007_171446_inLine +BABEL_OP2_207_36642_20131007_171446_outLine +BABEL_OP2_207_37285_20130906_152635_inLine +BABEL_OP2_207_37285_20130906_152635_outLine +BABEL_OP2_207_38741_20130702_112110_inLine +BABEL_OP2_207_38741_20130702_112110_outLine +BABEL_OP2_207_39307_20130625_162418_inLine +BABEL_OP2_207_39307_20130625_162418_outLine +BABEL_OP2_207_41542_20130925_125258_inLine +BABEL_OP2_207_41542_20130925_125258_outLine +BABEL_OP2_207_41680_20130621_172501_inLine +BABEL_OP2_207_41680_20130621_172501_outLine +BABEL_OP2_207_41720_20131031_110123_inLine +BABEL_OP2_207_41720_20131031_110123_outLine +BABEL_OP2_207_43794_20131010_152749_inLine +BABEL_OP2_207_43794_20131010_152749_outLine +BABEL_OP2_207_46268_20130626_132448_inLine +BABEL_OP2_207_46268_20130626_132448_outLine +BABEL_OP2_207_46550_20130720_181026_inLine +BABEL_OP2_207_46550_20130720_181026_outLine +BABEL_OP2_207_46558_20130622_140751_inLine +BABEL_OP2_207_46558_20130622_140751_outLine +BABEL_OP2_207_46589_20130904_135639_inLine +BABEL_OP2_207_46589_20130904_135639_outLine +BABEL_OP2_207_46681_20130702_082940_inLine +BABEL_OP2_207_46681_20130702_082940_outLine +BABEL_OP2_207_47283_20130719_175044_inLine +BABEL_OP2_207_47283_20130719_175044_outLine +BABEL_OP2_207_47451_20130909_142242_inLine +BABEL_OP2_207_47451_20130909_142242_outLine +BABEL_OP2_207_47637_20131212_210756_inLine +BABEL_OP2_207_47637_20131212_210756_outLine +BABEL_OP2_207_48844_20130712_140038_inLine +BABEL_OP2_207_48844_20130712_140038_outLine +BABEL_OP2_207_49768_20130722_145407_inLine +BABEL_OP2_207_49768_20130722_145407_outLine +BABEL_OP2_207_50427_20130820_120507_inLine +BABEL_OP2_207_50427_20130820_120507_outLine +BABEL_OP2_207_51185_20131025_171803_inLine +BABEL_OP2_207_51185_20131025_171803_outLine +BABEL_OP2_207_51955_20130702_113003_inLine +BABEL_OP2_207_51955_20130702_113003_outLine +BABEL_OP2_207_51955_20130702_113703_inLine +BABEL_OP2_207_51955_20130702_113703_outLine +BABEL_OP2_207_52272_20130729_145134_inLine +BABEL_OP2_207_52272_20130729_145134_outLine +BABEL_OP2_207_52322_20131022_130920_inLine +BABEL_OP2_207_52322_20131022_130920_outLine +BABEL_OP2_207_52404_20130903_132311_inLine +BABEL_OP2_207_52404_20130903_132311_outLine +BABEL_OP2_207_52490_20130731_141151_inLine +BABEL_OP2_207_52490_20130731_141151_outLine +BABEL_OP2_207_52499_20131224_143602_inLine +BABEL_OP2_207_52499_20131224_143602_outLine +BABEL_OP2_207_52932_20130712_142557_inLine +BABEL_OP2_207_52932_20130712_142557_outLine +BABEL_OP2_207_52932_20130712_143902_inLine +BABEL_OP2_207_52932_20130712_143902_outLine +BABEL_OP2_207_53063_20130915_191541_inLine +BABEL_OP2_207_53063_20130915_191541_outLine +BABEL_OP2_207_53957_20130914_133951_inLine +BABEL_OP2_207_53957_20130914_133951_outLine +BABEL_OP2_207_54390_20130720_163619_inLine +BABEL_OP2_207_54390_20130720_163619_outLine +BABEL_OP2_207_54530_20130914_111523_inLine +BABEL_OP2_207_54530_20130914_111523_outLine +BABEL_OP2_207_55902_20131026_192303_inLine +BABEL_OP2_207_55902_20131026_192303_outLine +BABEL_OP2_207_56326_20131105_180513_inLine +BABEL_OP2_207_56326_20131105_180513_outLine +BABEL_OP2_207_58006_20131001_163445_inLine +BABEL_OP2_207_58006_20131001_163445_outLine +BABEL_OP2_207_58926_20130720_155800_inLine +BABEL_OP2_207_58926_20130720_155800_outLine +BABEL_OP2_207_58926_20130720_162011_inLine +BABEL_OP2_207_58926_20130720_162011_outLine +BABEL_OP2_207_59720_20130723_144903_inLine +BABEL_OP2_207_59720_20130723_144903_outLine +BABEL_OP2_207_60115_20130905_120839_inLine +BABEL_OP2_207_60115_20130905_120839_outLine +BABEL_OP2_207_60474_20130724_150210_inLine +BABEL_OP2_207_60474_20130724_150210_outLine +BABEL_OP2_207_62734_20130724_141406_inLine +BABEL_OP2_207_62734_20130724_141406_outLine +BABEL_OP2_207_62810_20130628_195519_inLine +BABEL_OP2_207_62810_20130628_195519_outLine +BABEL_OP2_207_63787_20130628_150319_inLine +BABEL_OP2_207_63787_20130628_150319_outLine +BABEL_OP2_207_64065_20130711_144127_inLine +BABEL_OP2_207_64065_20130711_144127_outLine +BABEL_OP2_207_65723_20130628_225606_inLine +BABEL_OP2_207_65723_20130628_225606_outLine +BABEL_OP2_207_65882_20130711_131739_inLine +BABEL_OP2_207_65882_20130711_131739_outLine +BABEL_OP2_207_66001_20130627_130307_inLine +BABEL_OP2_207_66001_20130627_130307_outLine +BABEL_OP2_207_66916_20130625_141125_inLine +BABEL_OP2_207_66916_20130625_141125_outLine +BABEL_OP2_207_67283_20130626_165836_inLine +BABEL_OP2_207_67283_20130626_165836_outLine +BABEL_OP2_207_67659_20130730_103326_inLine +BABEL_OP2_207_67659_20130730_103326_outLine +BABEL_OP2_207_67659_20130730_104313_inLine +BABEL_OP2_207_67659_20130730_104313_outLine +BABEL_OP2_207_67726_20131212_115926_inLine +BABEL_OP2_207_67726_20131212_115926_outLine +BABEL_OP2_207_68924_20130824_111816_inLine +BABEL_OP2_207_68924_20130824_111816_outLine +BABEL_OP2_207_69636_20130903_113702_inLine +BABEL_OP2_207_69636_20130903_113702_outLine +BABEL_OP2_207_69992_20130628_145720_inLine +BABEL_OP2_207_69992_20130628_145720_outLine +BABEL_OP2_207_69992_20130628_151110_inLine +BABEL_OP2_207_69992_20130628_151110_outLine +BABEL_OP2_207_70452_20130719_143347_inLine +BABEL_OP2_207_70452_20130719_143347_outLine +BABEL_OP2_207_70794_20130622_150717_inLine +BABEL_OP2_207_70794_20130622_150717_outLine +BABEL_OP2_207_71404_20130712_141658_inLine +BABEL_OP2_207_71404_20130712_141658_outLine +BABEL_OP2_207_72587_20130826_152730_inLine +BABEL_OP2_207_72587_20130826_152730_outLine +BABEL_OP2_207_73022_20130924_132328_inLine +BABEL_OP2_207_73022_20130924_132328_outLine +BABEL_OP2_207_73591_20130625_194125_inLine +BABEL_OP2_207_73591_20130625_194125_outLine +BABEL_OP2_207_73814_20130822_124306_inLine +BABEL_OP2_207_73814_20130822_124306_outLine +BABEL_OP2_207_73990_20131029_162659_inLine +BABEL_OP2_207_73990_20131029_162659_outLine +BABEL_OP2_207_74667_20130808_161304_inLine +BABEL_OP2_207_74667_20130808_161304_outLine +BABEL_OP2_207_75064_20130720_134326_inLine +BABEL_OP2_207_75064_20130720_134326_outLine +BABEL_OP2_207_75505_20130627_155926_inLine +BABEL_OP2_207_75505_20130627_155926_outLine +BABEL_OP2_207_77146_20130625_205452_inLine +BABEL_OP2_207_77146_20130625_205452_outLine +BABEL_OP2_207_77803_20130626_144156_inLine +BABEL_OP2_207_77803_20130626_144156_outLine +BABEL_OP2_207_77990_20130701_144426_inLine +BABEL_OP2_207_77990_20130701_144426_outLine +BABEL_OP2_207_78482_20130919_144242_inLine +BABEL_OP2_207_78482_20130919_144242_outLine +BABEL_OP2_207_79080_20130922_214849_inLine +BABEL_OP2_207_79080_20130922_214849_outLine +BABEL_OP2_207_79367_20130626_150601_inLine +BABEL_OP2_207_79367_20130626_150601_outLine +BABEL_OP2_207_79451_20130712_135228_inLine +BABEL_OP2_207_79451_20130712_135228_outLine +BABEL_OP2_207_80439_20130722_161436_inLine +BABEL_OP2_207_80439_20130722_161436_outLine +BABEL_OP2_207_80559_20130712_144234_inLine +BABEL_OP2_207_80559_20130712_144234_outLine +BABEL_OP2_207_81971_20130623_113232_inLine +BABEL_OP2_207_81971_20130623_113232_outLine +BABEL_OP2_207_82425_20130626_153351_inLine +BABEL_OP2_207_82425_20130626_153351_outLine +BABEL_OP2_207_84547_20130626_230549_inLine +BABEL_OP2_207_84547_20130626_230549_outLine +BABEL_OP2_207_84611_20130630_210848_inLine +BABEL_OP2_207_84611_20130630_210848_outLine +BABEL_OP2_207_84768_20130627_204526_inLine +BABEL_OP2_207_84768_20130627_204526_outLine +BABEL_OP2_207_84805_20130922_111910_inLine +BABEL_OP2_207_84805_20130922_111910_outLine +BABEL_OP2_207_85010_20131031_114820_inLine +BABEL_OP2_207_85010_20131031_114820_outLine +BABEL_OP2_207_85340_20130731_141136_inLine +BABEL_OP2_207_85340_20130731_141136_outLine +BABEL_OP2_207_86191_20130720_132952_inLine +BABEL_OP2_207_86191_20130720_132952_outLine +BABEL_OP2_207_86628_20131011_145244_inLine +BABEL_OP2_207_86628_20131011_145244_outLine +BABEL_OP2_207_86713_20130924_095726_inLine +BABEL_OP2_207_86713_20130924_095726_outLine +BABEL_OP2_207_86722_20130723_173932_inLine +BABEL_OP2_207_86722_20130723_173932_outLine +BABEL_OP2_207_87489_20130925_122043_inLine +BABEL_OP2_207_87489_20130925_122043_outLine +BABEL_OP2_207_87777_20130827_113252_inLine +BABEL_OP2_207_87777_20130827_113252_outLine +BABEL_OP2_207_87884_20130911_154713_inLine +BABEL_OP2_207_87884_20130911_154713_outLine +BABEL_OP2_207_87921_20130909_222741_inLine +BABEL_OP2_207_87921_20130909_222741_outLine +BABEL_OP2_207_88776_20130628_223035_inLine +BABEL_OP2_207_88776_20130628_223035_outLine +BABEL_OP2_207_89059_20130830_150700_inLine +BABEL_OP2_207_89059_20130830_150700_outLine +BABEL_OP2_207_89877_20130822_133155_inLine +BABEL_OP2_207_89877_20130822_133155_outLine +BABEL_OP2_207_90572_20130927_112514_inLine +BABEL_OP2_207_90572_20130927_112514_outLine +BABEL_OP2_207_91125_20130622_154739_inLine +BABEL_OP2_207_91125_20130622_154739_outLine +BABEL_OP2_207_91383_20131017_164250_inLine +BABEL_OP2_207_91383_20131017_164250_outLine +BABEL_OP2_207_91760_20131008_175549_inLine +BABEL_OP2_207_91760_20131008_175549_outLine +BABEL_OP2_207_91888_20131002_140054_inLine +BABEL_OP2_207_91888_20131002_140054_outLine +BABEL_OP2_207_92736_20130913_142730_inLine +BABEL_OP2_207_92736_20130913_142730_outLine +BABEL_OP2_207_93475_20130712_141154_inLine +BABEL_OP2_207_93475_20130712_141154_outLine +BABEL_OP2_207_94262_20130912_223931_inLine +BABEL_OP2_207_94262_20130912_223931_outLine +BABEL_OP2_207_94869_20130627_162540_inLine +BABEL_OP2_207_94869_20130627_162540_outLine +BABEL_OP2_207_95077_20130910_113448_inLine +BABEL_OP2_207_95077_20130910_113448_outLine +BABEL_OP2_207_95231_20131029_145824_inLine +BABEL_OP2_207_95231_20131029_145824_outLine +BABEL_OP2_207_95663_20130626_085943_inLine +BABEL_OP2_207_95663_20130626_085943_outLine +BABEL_OP2_207_96190_20130730_105000_inLine +BABEL_OP2_207_96190_20130730_105000_outLine +BABEL_OP2_207_96525_20130919_151001_inLine +BABEL_OP2_207_96525_20130919_151001_outLine +BABEL_OP2_207_96690_20130808_133431_inLine +BABEL_OP2_207_96690_20130808_133431_outLine +BABEL_OP2_207_96808_20131007_222455_inLine +BABEL_OP2_207_96808_20131007_222455_outLine +BABEL_OP2_207_96820_20130815_171850_inLine +BABEL_OP2_207_96820_20130815_171850_outLine +BABEL_OP2_207_96820_20130815_172511_inLine +BABEL_OP2_207_96820_20130815_172511_outLine +BABEL_OP2_207_96910_20130723_132125_inLine +BABEL_OP2_207_96910_20130723_132125_outLine +BABEL_OP2_207_97220_20131015_210228_inLine +BABEL_OP2_207_97220_20131015_210228_outLine +BABEL_OP2_207_97557_20130824_125158_inLine +BABEL_OP2_207_97557_20130824_125158_outLine +BABEL_OP2_207_98390_20130630_121753_inLine +BABEL_OP2_207_98390_20130630_121753_outLine +BABEL_OP2_207_98565_20131220_143328_inLine +BABEL_OP2_207_98565_20131220_143328_outLine +BABEL_OP2_207_99289_20130930_212352_inLine +BABEL_OP2_207_99289_20130930_212352_outLine +BABEL_OP2_207_99998_20130730_104201_inLine +BABEL_OP2_207_99998_20130730_104201_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/training.list b/egs/babel/s5d/conf/lists/207-tokpisin/training.list new file mode 100644 index 00000000000..265ad40a321 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/training.list @@ -0,0 +1,506 @@ +BABEL_OP2_207_10036_20130724_130953_inLine +BABEL_OP2_207_10036_20130724_130953_outLine +BABEL_OP2_207_10058_20131001_123723_inLine +BABEL_OP2_207_10058_20131001_123723_outLine +BABEL_OP2_207_10638_20131023_161558_inLine +BABEL_OP2_207_10638_20131023_161558_outLine +BABEL_OP2_207_10647_20130930_130411_inLine +BABEL_OP2_207_10647_20130930_130411_outLine +BABEL_OP2_207_10938_20130723_154630_inLine +BABEL_OP2_207_10938_20130723_154630_outLine +BABEL_OP2_207_11681_20130701_131708_inLine +BABEL_OP2_207_11681_20130701_131708_outLine +BABEL_OP2_207_11723_20131029_192512_inLine +BABEL_OP2_207_11723_20131029_192512_outLine +BABEL_OP2_207_12036_20130628_172018_inLine +BABEL_OP2_207_12036_20130628_172018_outLine +BABEL_OP2_207_12242_20130720_122145_inLine +BABEL_OP2_207_12242_20130720_122145_outLine +BABEL_OP2_207_12851_20130624_231520_inLine +BABEL_OP2_207_12851_20130624_231520_outLine +BABEL_OP2_207_13178_20130828_124504_inLine +BABEL_OP2_207_13178_20130828_124504_outLine +BABEL_OP2_207_13324_20130628_205651_inLine +BABEL_OP2_207_13324_20130628_205651_outLine +BABEL_OP2_207_13483_20130914_124412_inLine +BABEL_OP2_207_13483_20130914_124412_outLine +BABEL_OP2_207_13490_20130811_183642_inLine +BABEL_OP2_207_13490_20130811_183642_outLine +BABEL_OP2_207_13664_20130624_131414_inLine +BABEL_OP2_207_13664_20130624_131414_outLine +BABEL_OP2_207_13709_20130925_114224_inLine +BABEL_OP2_207_13709_20130925_114224_outLine +BABEL_OP2_207_13776_20131010_175808_inLine +BABEL_OP2_207_13776_20131010_175808_outLine +BABEL_OP2_207_13792_20130725_131748_inLine +BABEL_OP2_207_13792_20130725_131748_outLine +BABEL_OP2_207_14137_20130702_122633_inLine +BABEL_OP2_207_14137_20130702_122633_outLine +BABEL_OP2_207_14179_20130905_113236_inLine +BABEL_OP2_207_14179_20130905_113236_outLine +BABEL_OP2_207_14972_20130821_111242_inLine +BABEL_OP2_207_14972_20130821_111242_outLine +BABEL_OP2_207_15024_20130820_131419_inLine +BABEL_OP2_207_15024_20130820_131419_outLine +BABEL_OP2_207_15382_20130827_130728_inLine +BABEL_OP2_207_15382_20130827_130728_outLine +BABEL_OP2_207_15730_20130627_154012_inLine +BABEL_OP2_207_15730_20130627_154012_outLine +BABEL_OP2_207_16149_20130720_115211_inLine +BABEL_OP2_207_16149_20130720_115211_outLine +BABEL_OP2_207_16749_20130830_154859_inLine +BABEL_OP2_207_16749_20130830_154859_outLine +BABEL_OP2_207_16839_20130923_202105_inLine +BABEL_OP2_207_16839_20130923_202105_outLine +BABEL_OP2_207_17032_20130906_140931_inLine +BABEL_OP2_207_17032_20130906_140931_outLine +BABEL_OP2_207_17420_20130925_143517_inLine +BABEL_OP2_207_17420_20130925_143517_outLine +BABEL_OP2_207_17440_20130911_132642_inLine +BABEL_OP2_207_17440_20130911_132642_outLine +BABEL_OP2_207_17472_20130910_165052_inLine +BABEL_OP2_207_17472_20130910_165052_outLine +BABEL_OP2_207_17496_20130827_154835_inLine +BABEL_OP2_207_17496_20130827_154835_outLine +BABEL_OP2_207_17520_20130820_160316_inLine +BABEL_OP2_207_17520_20130820_160316_outLine +BABEL_OP2_207_17615_20130903_123606_inLine +BABEL_OP2_207_17615_20130903_123606_outLine +BABEL_OP2_207_18078_20130920_135919_inLine +BABEL_OP2_207_18078_20130920_135919_outLine +BABEL_OP2_207_18297_20130828_161347_inLine +BABEL_OP2_207_18297_20130828_161347_outLine +BABEL_OP2_207_18370_20131205_182514_inLine +BABEL_OP2_207_18370_20131205_182514_outLine +BABEL_OP2_207_19134_20130822_145954_inLine +BABEL_OP2_207_19134_20130822_145954_outLine +BABEL_OP2_207_19703_20130720_154219_inLine +BABEL_OP2_207_19703_20130720_154219_outLine +BABEL_OP2_207_19818_20130826_134257_inLine +BABEL_OP2_207_19818_20130826_134257_outLine +BABEL_OP2_207_19877_20130912_151401_inLine +BABEL_OP2_207_19877_20130912_151401_outLine +BABEL_OP2_207_20437_20131030_165858_inLine +BABEL_OP2_207_20437_20131030_165858_outLine +BABEL_OP2_207_20985_20130905_145111_inLine +BABEL_OP2_207_20985_20130905_145111_outLine +BABEL_OP2_207_21004_20130909_140247_inLine +BABEL_OP2_207_21004_20130909_140247_outLine +BABEL_OP2_207_21004_20130909_141426_inLine +BABEL_OP2_207_21004_20130909_141426_outLine +BABEL_OP2_207_21206_20130630_201617_inLine +BABEL_OP2_207_21206_20130630_201617_outLine +BABEL_OP2_207_21327_20130912_132010_inLine +BABEL_OP2_207_21327_20130912_132010_outLine +BABEL_OP2_207_22021_20131220_151707_inLine +BABEL_OP2_207_22021_20131220_151707_outLine +BABEL_OP2_207_22446_20130725_155758_inLine +BABEL_OP2_207_22446_20130725_155758_outLine +BABEL_OP2_207_23006_20130722_133014_inLine +BABEL_OP2_207_23006_20130722_133014_outLine +BABEL_OP2_207_23046_20130729_122607_inLine +BABEL_OP2_207_23046_20130729_122607_outLine +BABEL_OP2_207_23092_20130911_151410_inLine +BABEL_OP2_207_23092_20130911_151410_outLine +BABEL_OP2_207_24532_20130626_162254_inLine +BABEL_OP2_207_24532_20130626_162254_outLine +BABEL_OP2_207_24586_20130930_115553_inLine +BABEL_OP2_207_24586_20130930_115553_outLine +BABEL_OP2_207_24590_20130807_162732_inLine +BABEL_OP2_207_24590_20130807_162732_outLine +BABEL_OP2_207_24679_20130625_144735_inLine +BABEL_OP2_207_24679_20130625_144735_outLine +BABEL_OP2_207_24982_20130729_152422_inLine +BABEL_OP2_207_24982_20130729_152422_outLine +BABEL_OP2_207_25767_20130628_220921_inLine +BABEL_OP2_207_25767_20130628_220921_outLine +BABEL_OP2_207_26388_20130722_152932_inLine +BABEL_OP2_207_26388_20130722_152932_outLine +BABEL_OP2_207_26999_20130903_135935_inLine +BABEL_OP2_207_26999_20130903_135935_outLine +BABEL_OP2_207_27590_20130912_155435_inLine +BABEL_OP2_207_27590_20130912_155435_outLine +BABEL_OP2_207_28012_20130920_162354_inLine +BABEL_OP2_207_28012_20130920_162354_outLine +BABEL_OP2_207_28303_20130731_132124_inLine +BABEL_OP2_207_28303_20130731_132124_outLine +BABEL_OP2_207_28522_20130906_172331_inLine +BABEL_OP2_207_28522_20130906_172331_outLine +BABEL_OP2_207_28595_20131022_154118_inLine +BABEL_OP2_207_28595_20131022_154118_outLine +BABEL_OP2_207_28945_20130719_160541_inLine +BABEL_OP2_207_28945_20130719_160541_outLine +BABEL_OP2_207_29023_20130702_110704_inLine +BABEL_OP2_207_29023_20130702_110704_outLine +BABEL_OP2_207_29168_20130624_215131_inLine +BABEL_OP2_207_29168_20130624_215131_outLine +BABEL_OP2_207_29404_20130930_154214_inLine +BABEL_OP2_207_29404_20130930_154214_outLine +BABEL_OP2_207_29633_20131001_114745_inLine +BABEL_OP2_207_29633_20131001_114745_outLine +BABEL_OP2_207_30058_20130927_094530_inLine +BABEL_OP2_207_30058_20130927_094530_outLine +BABEL_OP2_207_30180_20130725_150836_inLine +BABEL_OP2_207_30180_20130725_150836_outLine +BABEL_OP2_207_30180_20130725_152116_inLine +BABEL_OP2_207_30180_20130725_152116_outLine +BABEL_OP2_207_30395_20130701_130920_inLine +BABEL_OP2_207_30395_20130701_130920_outLine +BABEL_OP2_207_30576_20131003_141444_inLine +BABEL_OP2_207_30576_20131003_141444_outLine +BABEL_OP2_207_31039_20131219_232002_inLine +BABEL_OP2_207_31039_20131219_232002_outLine +BABEL_OP2_207_31074_20131206_183901_inLine +BABEL_OP2_207_31074_20131206_183901_outLine +BABEL_OP2_207_31490_20130626_143343_inLine +BABEL_OP2_207_31490_20130626_143343_outLine +BABEL_OP2_207_31624_20130722_163153_inLine +BABEL_OP2_207_31624_20130722_163153_outLine +BABEL_OP2_207_32122_20130725_140342_inLine +BABEL_OP2_207_32122_20130725_140342_outLine +BABEL_OP2_207_32727_20130910_153130_inLine +BABEL_OP2_207_32727_20130910_153130_outLine +BABEL_OP2_207_33355_20130626_141603_inLine +BABEL_OP2_207_33355_20130626_141603_outLine +BABEL_OP2_207_33951_20130812_152815_inLine +BABEL_OP2_207_33951_20130812_152815_outLine +BABEL_OP2_207_34197_20130625_162431_inLine +BABEL_OP2_207_34197_20130625_162431_outLine +BABEL_OP2_207_34486_20131009_154321_inLine +BABEL_OP2_207_34486_20131009_154321_outLine +BABEL_OP2_207_34679_20130722_131020_inLine +BABEL_OP2_207_34679_20130722_131020_outLine +BABEL_OP2_207_34860_20131031_170619_inLine +BABEL_OP2_207_34860_20131031_170619_outLine +BABEL_OP2_207_35008_20130909_114545_inLine +BABEL_OP2_207_35008_20130909_114545_outLine +BABEL_OP2_207_35139_20130701_113506_inLine +BABEL_OP2_207_35139_20130701_113506_outLine +BABEL_OP2_207_35467_20130627_092105_inLine +BABEL_OP2_207_35467_20130627_092105_outLine +BABEL_OP2_207_35467_20130627_093134_inLine +BABEL_OP2_207_35467_20130627_093134_outLine +BABEL_OP2_207_36293_20130722_173251_inLine +BABEL_OP2_207_36293_20130722_173251_outLine +BABEL_OP2_207_36642_20131007_171446_inLine +BABEL_OP2_207_36642_20131007_171446_outLine +BABEL_OP2_207_37285_20130906_152635_inLine +BABEL_OP2_207_37285_20130906_152635_outLine +BABEL_OP2_207_38741_20130702_112110_inLine +BABEL_OP2_207_38741_20130702_112110_outLine +BABEL_OP2_207_39307_20130625_162418_inLine +BABEL_OP2_207_39307_20130625_162418_outLine +BABEL_OP2_207_41542_20130925_125258_inLine +BABEL_OP2_207_41542_20130925_125258_outLine +BABEL_OP2_207_41680_20130621_172501_inLine +BABEL_OP2_207_41680_20130621_172501_outLine +BABEL_OP2_207_41720_20131031_110123_inLine +BABEL_OP2_207_41720_20131031_110123_outLine +BABEL_OP2_207_42497_20130628_234333_inLine +BABEL_OP2_207_42497_20130628_234333_outLine +BABEL_OP2_207_42834_20130828_121531_inLine +BABEL_OP2_207_42834_20130828_121531_outLine +BABEL_OP2_207_43794_20131010_152749_inLine +BABEL_OP2_207_43794_20131010_152749_outLine +BABEL_OP2_207_44029_20131224_183902_inLine +BABEL_OP2_207_44029_20131224_183902_outLine +BABEL_OP2_207_44619_20130720_150103_inLine +BABEL_OP2_207_44619_20130720_150103_outLine +BABEL_OP2_207_46268_20130626_132448_inLine +BABEL_OP2_207_46268_20130626_132448_outLine +BABEL_OP2_207_46550_20130720_181026_inLine +BABEL_OP2_207_46550_20130720_181026_outLine +BABEL_OP2_207_46558_20130622_140751_inLine +BABEL_OP2_207_46558_20130622_140751_outLine +BABEL_OP2_207_46589_20130904_135639_inLine +BABEL_OP2_207_46589_20130904_135639_outLine +BABEL_OP2_207_46681_20130702_082940_inLine +BABEL_OP2_207_46681_20130702_082940_outLine +BABEL_OP2_207_47283_20130719_175044_inLine +BABEL_OP2_207_47283_20130719_175044_outLine +BABEL_OP2_207_47451_20130909_142242_inLine +BABEL_OP2_207_47451_20130909_142242_outLine +BABEL_OP2_207_47637_20131212_210756_inLine +BABEL_OP2_207_47637_20131212_210756_outLine +BABEL_OP2_207_48610_20130627_142410_inLine +BABEL_OP2_207_48610_20130627_142410_outLine +BABEL_OP2_207_48844_20130712_140038_inLine +BABEL_OP2_207_48844_20130712_140038_outLine +BABEL_OP2_207_49768_20130722_145407_inLine +BABEL_OP2_207_49768_20130722_145407_outLine +BABEL_OP2_207_50175_20130627_131732_inLine +BABEL_OP2_207_50175_20130627_131732_outLine +BABEL_OP2_207_50427_20130820_120507_inLine +BABEL_OP2_207_50427_20130820_120507_outLine +BABEL_OP2_207_50565_20130625_145121_inLine +BABEL_OP2_207_50565_20130625_145121_outLine +BABEL_OP2_207_51185_20131025_171803_inLine +BABEL_OP2_207_51185_20131025_171803_outLine +BABEL_OP2_207_51955_20130702_113003_inLine +BABEL_OP2_207_51955_20130702_113003_outLine +BABEL_OP2_207_51955_20130702_113703_inLine +BABEL_OP2_207_51955_20130702_113703_outLine +BABEL_OP2_207_52272_20130729_145134_inLine +BABEL_OP2_207_52272_20130729_145134_outLine +BABEL_OP2_207_52322_20131022_130920_inLine +BABEL_OP2_207_52322_20131022_130920_outLine +BABEL_OP2_207_52404_20130903_132311_inLine +BABEL_OP2_207_52404_20130903_132311_outLine +BABEL_OP2_207_52490_20130731_141151_inLine +BABEL_OP2_207_52490_20130731_141151_outLine +BABEL_OP2_207_52499_20131224_143602_inLine +BABEL_OP2_207_52499_20131224_143602_outLine +BABEL_OP2_207_52804_20130729_144756_inLine +BABEL_OP2_207_52804_20130729_144756_outLine +BABEL_OP2_207_52932_20130712_142557_inLine +BABEL_OP2_207_52932_20130712_142557_outLine +BABEL_OP2_207_52932_20130712_143902_inLine +BABEL_OP2_207_52932_20130712_143902_outLine +BABEL_OP2_207_53063_20130915_191541_inLine +BABEL_OP2_207_53063_20130915_191541_outLine +BABEL_OP2_207_53917_20130926_150707_inLine +BABEL_OP2_207_53917_20130926_150707_outLine +BABEL_OP2_207_53957_20130914_133951_inLine +BABEL_OP2_207_53957_20130914_133951_outLine +BABEL_OP2_207_54390_20130720_163619_inLine +BABEL_OP2_207_54390_20130720_163619_outLine +BABEL_OP2_207_54530_20130914_111523_inLine +BABEL_OP2_207_54530_20130914_111523_outLine +BABEL_OP2_207_54953_20130725_154539_inLine +BABEL_OP2_207_54953_20130725_154539_outLine +BABEL_OP2_207_55902_20131026_192303_inLine +BABEL_OP2_207_55902_20131026_192303_outLine +BABEL_OP2_207_56198_20130702_120906_inLine +BABEL_OP2_207_56198_20130702_120906_outLine +BABEL_OP2_207_56326_20131105_180513_inLine +BABEL_OP2_207_56326_20131105_180513_outLine +BABEL_OP2_207_58006_20131001_163445_inLine +BABEL_OP2_207_58006_20131001_163445_outLine +BABEL_OP2_207_58926_20130720_155800_inLine +BABEL_OP2_207_58926_20130720_155800_outLine +BABEL_OP2_207_58926_20130720_162011_inLine +BABEL_OP2_207_58926_20130720_162011_outLine +BABEL_OP2_207_59720_20130723_144903_inLine +BABEL_OP2_207_59720_20130723_144903_outLine +BABEL_OP2_207_60115_20130905_120839_inLine +BABEL_OP2_207_60115_20130905_120839_outLine +BABEL_OP2_207_60474_20130724_150210_inLine +BABEL_OP2_207_60474_20130724_150210_outLine +BABEL_OP2_207_60661_20130719_154858_inLine +BABEL_OP2_207_60661_20130719_154858_outLine +BABEL_OP2_207_60661_20130719_160027_inLine +BABEL_OP2_207_60661_20130719_160027_outLine +BABEL_OP2_207_62289_20130828_152328_inLine +BABEL_OP2_207_62289_20130828_152328_outLine +BABEL_OP2_207_62734_20130724_141406_inLine +BABEL_OP2_207_62734_20130724_141406_outLine +BABEL_OP2_207_62800_20130625_222225_inLine +BABEL_OP2_207_62800_20130625_222225_outLine +BABEL_OP2_207_62810_20130628_195519_inLine +BABEL_OP2_207_62810_20130628_195519_outLine +BABEL_OP2_207_63787_20130628_150319_inLine +BABEL_OP2_207_63787_20130628_150319_outLine +BABEL_OP2_207_64065_20130711_144127_inLine +BABEL_OP2_207_64065_20130711_144127_outLine +BABEL_OP2_207_64768_20130722_132745_inLine +BABEL_OP2_207_64768_20130722_132745_outLine +BABEL_OP2_207_65723_20130628_225606_inLine +BABEL_OP2_207_65723_20130628_225606_outLine +BABEL_OP2_207_65882_20130711_131739_inLine +BABEL_OP2_207_65882_20130711_131739_outLine +BABEL_OP2_207_66001_20130627_130307_inLine +BABEL_OP2_207_66001_20130627_130307_outLine +BABEL_OP2_207_66916_20130625_141125_inLine +BABEL_OP2_207_66916_20130625_141125_outLine +BABEL_OP2_207_67283_20130626_165836_inLine +BABEL_OP2_207_67283_20130626_165836_outLine +BABEL_OP2_207_67659_20130730_103326_inLine +BABEL_OP2_207_67659_20130730_103326_outLine +BABEL_OP2_207_67659_20130730_104313_inLine +BABEL_OP2_207_67659_20130730_104313_outLine +BABEL_OP2_207_67726_20131212_115926_inLine +BABEL_OP2_207_67726_20131212_115926_outLine +BABEL_OP2_207_68924_20130824_111816_inLine +BABEL_OP2_207_68924_20130824_111816_outLine +BABEL_OP2_207_69574_20130624_154052_inLine +BABEL_OP2_207_69574_20130624_154052_outLine +BABEL_OP2_207_69574_20130624_162442_inLine +BABEL_OP2_207_69574_20130624_162442_outLine +BABEL_OP2_207_69636_20130903_113702_inLine +BABEL_OP2_207_69636_20130903_113702_outLine +BABEL_OP2_207_69992_20130628_145720_inLine +BABEL_OP2_207_69992_20130628_145720_outLine +BABEL_OP2_207_69992_20130628_151110_inLine +BABEL_OP2_207_69992_20130628_151110_outLine +BABEL_OP2_207_70216_20131212_112351_inLine +BABEL_OP2_207_70216_20131212_112351_outLine +BABEL_OP2_207_70452_20130719_143347_inLine +BABEL_OP2_207_70452_20130719_143347_outLine +BABEL_OP2_207_70716_20131005_160013_inLine +BABEL_OP2_207_70716_20131005_160013_outLine +BABEL_OP2_207_70794_20130622_150717_inLine +BABEL_OP2_207_70794_20130622_150717_outLine +BABEL_OP2_207_71038_20130831_112716_inLine +BABEL_OP2_207_71038_20130831_112716_outLine +BABEL_OP2_207_71121_20131212_125525_inLine +BABEL_OP2_207_71121_20131212_125525_outLine +BABEL_OP2_207_71404_20130712_141658_inLine +BABEL_OP2_207_71404_20130712_141658_outLine +BABEL_OP2_207_72587_20130826_152730_inLine +BABEL_OP2_207_72587_20130826_152730_outLine +BABEL_OP2_207_73022_20130924_132328_inLine +BABEL_OP2_207_73022_20130924_132328_outLine +BABEL_OP2_207_73591_20130625_194125_inLine +BABEL_OP2_207_73591_20130625_194125_outLine +BABEL_OP2_207_73814_20130822_124306_inLine +BABEL_OP2_207_73814_20130822_124306_outLine +BABEL_OP2_207_73990_20131029_162659_inLine +BABEL_OP2_207_73990_20131029_162659_outLine +BABEL_OP2_207_74280_20130623_173429_inLine +BABEL_OP2_207_74280_20130623_173429_outLine +BABEL_OP2_207_74667_20130808_161304_inLine +BABEL_OP2_207_74667_20130808_161304_outLine +BABEL_OP2_207_75064_20130720_134326_inLine +BABEL_OP2_207_75064_20130720_134326_outLine +BABEL_OP2_207_75505_20130627_155926_inLine +BABEL_OP2_207_75505_20130627_155926_outLine +BABEL_OP2_207_77146_20130625_205452_inLine +BABEL_OP2_207_77146_20130625_205452_outLine +BABEL_OP2_207_77744_20130720_130633_inLine +BABEL_OP2_207_77744_20130720_130633_outLine +BABEL_OP2_207_77803_20130626_144156_inLine +BABEL_OP2_207_77803_20130626_144156_outLine +BABEL_OP2_207_77990_20130701_144426_inLine +BABEL_OP2_207_77990_20130701_144426_outLine +BABEL_OP2_207_78194_20130622_152343_inLine +BABEL_OP2_207_78194_20130622_152343_outLine +BABEL_OP2_207_78482_20130919_144242_inLine +BABEL_OP2_207_78482_20130919_144242_outLine +BABEL_OP2_207_78604_20130629_143534_inLine +BABEL_OP2_207_78604_20130629_143534_outLine +BABEL_OP2_207_78943_20130701_150832_inLine +BABEL_OP2_207_78943_20130701_150832_outLine +BABEL_OP2_207_79080_20130922_214849_inLine +BABEL_OP2_207_79080_20130922_214849_outLine +BABEL_OP2_207_79367_20130626_150601_inLine +BABEL_OP2_207_79367_20130626_150601_outLine +BABEL_OP2_207_79451_20130712_135228_inLine +BABEL_OP2_207_79451_20130712_135228_outLine +BABEL_OP2_207_80439_20130722_161436_inLine +BABEL_OP2_207_80439_20130722_161436_outLine +BABEL_OP2_207_80559_20130712_144234_inLine +BABEL_OP2_207_80559_20130712_144234_outLine +BABEL_OP2_207_81971_20130623_113232_inLine +BABEL_OP2_207_81971_20130623_113232_outLine +BABEL_OP2_207_82425_20130626_153351_inLine +BABEL_OP2_207_82425_20130626_153351_outLine +BABEL_OP2_207_84547_20130626_230549_inLine +BABEL_OP2_207_84547_20130626_230549_outLine +BABEL_OP2_207_84611_20130630_210848_inLine +BABEL_OP2_207_84611_20130630_210848_outLine +BABEL_OP2_207_84768_20130627_204526_inLine +BABEL_OP2_207_84768_20130627_204526_outLine +BABEL_OP2_207_84805_20130922_111910_inLine +BABEL_OP2_207_84805_20130922_111910_outLine +BABEL_OP2_207_85010_20131031_114820_inLine +BABEL_OP2_207_85010_20131031_114820_outLine +BABEL_OP2_207_85340_20130731_141136_inLine +BABEL_OP2_207_85340_20130731_141136_outLine +BABEL_OP2_207_86191_20130720_132952_inLine +BABEL_OP2_207_86191_20130720_132952_outLine +BABEL_OP2_207_86467_20130621_164129_inLine +BABEL_OP2_207_86467_20130621_164129_outLine +BABEL_OP2_207_86628_20131011_145244_inLine +BABEL_OP2_207_86628_20131011_145244_outLine +BABEL_OP2_207_86713_20130924_095726_inLine +BABEL_OP2_207_86713_20130924_095726_outLine +BABEL_OP2_207_86722_20130723_173932_inLine +BABEL_OP2_207_86722_20130723_173932_outLine +BABEL_OP2_207_86826_20131010_131452_inLine +BABEL_OP2_207_86826_20131010_131452_outLine +BABEL_OP2_207_87074_20130702_114658_inLine +BABEL_OP2_207_87074_20130702_114658_outLine +BABEL_OP2_207_87298_20130722_163007_inLine +BABEL_OP2_207_87298_20130722_163007_outLine +BABEL_OP2_207_87298_20130722_164947_inLine +BABEL_OP2_207_87298_20130722_164947_outLine +BABEL_OP2_207_87489_20130925_122043_inLine +BABEL_OP2_207_87489_20130925_122043_outLine +BABEL_OP2_207_87777_20130827_113252_inLine +BABEL_OP2_207_87777_20130827_113252_outLine +BABEL_OP2_207_87884_20130911_154713_inLine +BABEL_OP2_207_87884_20130911_154713_outLine +BABEL_OP2_207_87921_20130909_222741_inLine +BABEL_OP2_207_87921_20130909_222741_outLine +BABEL_OP2_207_88776_20130628_223035_inLine +BABEL_OP2_207_88776_20130628_223035_outLine +BABEL_OP2_207_89059_20130830_150700_inLine +BABEL_OP2_207_89059_20130830_150700_outLine +BABEL_OP2_207_89650_20131220_191027_inLine +BABEL_OP2_207_89650_20131220_191027_outLine +BABEL_OP2_207_89877_20130822_133155_inLine +BABEL_OP2_207_89877_20130822_133155_outLine +BABEL_OP2_207_90572_20130927_112514_inLine +BABEL_OP2_207_90572_20130927_112514_outLine +BABEL_OP2_207_91125_20130622_154739_inLine +BABEL_OP2_207_91125_20130622_154739_outLine +BABEL_OP2_207_91383_20131017_164250_inLine +BABEL_OP2_207_91383_20131017_164250_outLine +BABEL_OP2_207_91760_20131008_175549_inLine +BABEL_OP2_207_91760_20131008_175549_outLine +BABEL_OP2_207_91888_20131002_140054_inLine +BABEL_OP2_207_91888_20131002_140054_outLine +BABEL_OP2_207_92736_20130913_142730_inLine +BABEL_OP2_207_92736_20130913_142730_outLine +BABEL_OP2_207_93475_20130712_141154_inLine +BABEL_OP2_207_93475_20130712_141154_outLine +BABEL_OP2_207_94262_20130912_223931_inLine +BABEL_OP2_207_94262_20130912_223931_outLine +BABEL_OP2_207_94869_20130627_162540_inLine +BABEL_OP2_207_94869_20130627_162540_outLine +BABEL_OP2_207_95077_20130910_113448_inLine +BABEL_OP2_207_95077_20130910_113448_outLine +BABEL_OP2_207_95231_20131029_145824_inLine +BABEL_OP2_207_95231_20131029_145824_outLine +BABEL_OP2_207_95269_20130725_140512_inLine +BABEL_OP2_207_95269_20130725_140512_outLine +BABEL_OP2_207_95663_20130626_085943_inLine +BABEL_OP2_207_95663_20130626_085943_outLine +BABEL_OP2_207_96190_20130730_105000_inLine +BABEL_OP2_207_96190_20130730_105000_outLine +BABEL_OP2_207_96525_20130919_151001_inLine +BABEL_OP2_207_96525_20130919_151001_outLine +BABEL_OP2_207_96690_20130808_133431_inLine +BABEL_OP2_207_96690_20130808_133431_outLine +BABEL_OP2_207_96808_20131007_222455_inLine +BABEL_OP2_207_96808_20131007_222455_outLine +BABEL_OP2_207_96820_20130815_171850_inLine +BABEL_OP2_207_96820_20130815_171850_outLine +BABEL_OP2_207_96820_20130815_172511_inLine +BABEL_OP2_207_96820_20130815_172511_outLine +BABEL_OP2_207_96910_20130723_132125_inLine +BABEL_OP2_207_96910_20130723_132125_outLine +BABEL_OP2_207_97220_20131015_210228_inLine +BABEL_OP2_207_97220_20131015_210228_outLine +BABEL_OP2_207_97557_20130824_125158_inLine +BABEL_OP2_207_97557_20130824_125158_outLine +BABEL_OP2_207_97588_20130720_172415_inLine +BABEL_OP2_207_97588_20130720_172415_outLine +BABEL_OP2_207_97731_20130920_141703_inLine +BABEL_OP2_207_97731_20130920_141703_outLine +BABEL_OP2_207_97836_20130930_145119_inLine +BABEL_OP2_207_97836_20130930_145119_outLine +BABEL_OP2_207_97896_20130807_165056_inLine +BABEL_OP2_207_97896_20130807_165056_outLine +BABEL_OP2_207_97911_20131017_134323_inLine +BABEL_OP2_207_97911_20131017_134323_outLine +BABEL_OP2_207_98390_20130630_121753_inLine +BABEL_OP2_207_98390_20130630_121753_outLine +BABEL_OP2_207_98489_20130712_001025_inLine +BABEL_OP2_207_98489_20130712_001025_outLine +BABEL_OP2_207_98565_20131220_143328_inLine +BABEL_OP2_207_98565_20131220_143328_outLine +BABEL_OP2_207_99289_20130930_212352_inLine +BABEL_OP2_207_99289_20130930_212352_outLine +BABEL_OP2_207_99998_20130730_104201_inLine +BABEL_OP2_207_99998_20130730_104201_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list b/egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list new file mode 100644 index 00000000000..bd95fc6c89a --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list @@ -0,0 +1,539 @@ +BABEL_OP2_207_11096_20131010_155716_inLine +BABEL_OP2_207_11096_20131010_155716_outLine +BABEL_OP2_207_12635_20130926_134703_inLine +BABEL_OP2_207_12635_20130926_134703_outLine +BABEL_OP2_207_13189_20130924_113930_inLine +BABEL_OP2_207_13189_20130924_113930_outLine +BABEL_OP2_207_14097_20131211_145352_inLine +BABEL_OP2_207_14097_20131211_145352_outLine +BABEL_OP2_207_15324_20130824_114737_inLine +BABEL_OP2_207_15324_20130824_114737_outLine +BABEL_OP2_207_15324_20130824_115222_inLine +BABEL_OP2_207_15324_20130824_115222_outLine +BABEL_OP2_207_15324_20130824_120315_inLine +BABEL_OP2_207_15324_20130824_120315_outLine +BABEL_OP2_207_16787_20130807_141736_inLine +BABEL_OP2_207_16787_20130807_141736_outLine +BABEL_OP2_207_17165_20130811_161522_inLine +BABEL_OP2_207_17165_20130811_161522_outLine +BABEL_OP2_207_17582_20131014_140754_inLine +BABEL_OP2_207_17582_20131014_140754_outLine +BABEL_OP2_207_17881_20130927_103059_inLine +BABEL_OP2_207_17881_20130927_103059_outLine +BABEL_OP2_207_17890_20130905_165333_inLine +BABEL_OP2_207_17890_20130905_165333_outLine +BABEL_OP2_207_17914_20130926_134141_inLine +BABEL_OP2_207_17914_20130926_134141_outLine +BABEL_OP2_207_18118_20130912_105508_inLine +BABEL_OP2_207_18118_20130912_105508_outLine +BABEL_OP2_207_18380_20130811_091120_inLine +BABEL_OP2_207_18380_20130811_091120_outLine +BABEL_OP2_207_18766_20131007_145032_inLine +BABEL_OP2_207_18766_20131007_145032_outLine +BABEL_OP2_207_19120_20131001_112430_inLine +BABEL_OP2_207_19120_20131001_113821_inLine +BABEL_OP2_207_19130_20130915_130323_inLine +BABEL_OP2_207_19130_20130915_130323_outLine +BABEL_OP2_207_19130_20130915_170627_inLine +BABEL_OP2_207_19130_20130915_170627_outLine +BABEL_OP2_207_19444_20131027_115915_inLine +BABEL_OP2_207_19444_20131027_115915_outLine +BABEL_OP2_207_19621_20130820_123522_inLine +BABEL_OP2_207_19621_20130820_123522_outLine +BABEL_OP2_207_19663_20130808_130208_inLine +BABEL_OP2_207_19663_20130808_130208_outLine +BABEL_OP2_207_19832_20131023_131334_inLine +BABEL_OP2_207_19832_20131023_131334_outLine +BABEL_OP2_207_20738_20130925_150141_inLine +BABEL_OP2_207_20738_20130925_150141_outLine +BABEL_OP2_207_20768_20130918_153000_inLine +BABEL_OP2_207_20768_20130918_153000_outLine +BABEL_OP2_207_21393_20131009_171742_inLine +BABEL_OP2_207_21393_20131009_172913_inLine +BABEL_OP2_207_21435_20130930_113048_inLine +BABEL_OP2_207_21435_20130930_113048_outLine +BABEL_OP2_207_23355_20131028_195808_inLine +BABEL_OP2_207_23355_20131028_195808_outLine +BABEL_OP2_207_23395_20130819_172407_inLine +BABEL_OP2_207_23395_20130819_172407_outLine +BABEL_OP2_207_24017_20130920_143300_inLine +BABEL_OP2_207_24017_20130920_143300_outLine +BABEL_OP2_207_24231_20131004_142046_inLine +BABEL_OP2_207_24231_20131004_142046_outLine +BABEL_OP2_207_24241_20131022_163927_inLine +BABEL_OP2_207_24241_20131022_163927_outLine +BABEL_OP2_207_24587_20131028_181902_inLine +BABEL_OP2_207_24587_20131028_181902_outLine +BABEL_OP2_207_25068_20131128_155214_inLine +BABEL_OP2_207_25068_20131128_155214_outLine +BABEL_OP2_207_25198_20131009_144048_inLine +BABEL_OP2_207_25198_20131009_144048_outLine +BABEL_OP2_207_26206_20130905_123052_inLine +BABEL_OP2_207_26206_20130905_123052_outLine +BABEL_OP2_207_26398_20131007_122710_inLine +BABEL_OP2_207_26398_20131007_122710_outLine +BABEL_OP2_207_27042_20130913_145438_inLine +BABEL_OP2_207_27042_20130913_145438_outLine +BABEL_OP2_207_28538_20130809_210336_inLine +BABEL_OP2_207_28538_20130809_210336_outLine +BABEL_OP2_207_28585_20130921_125721_inLine +BABEL_OP2_207_28585_20130921_125721_outLine +BABEL_OP2_207_29021_20131002_153001_inLine +BABEL_OP2_207_29021_20131002_153001_outLine +BABEL_OP2_207_29208_20130808_130335_inLine +BABEL_OP2_207_29208_20130808_130335_outLine +BABEL_OP2_207_29643_20131010_214342_inLine +BABEL_OP2_207_29643_20131010_214342_outLine +BABEL_OP2_207_30497_20131001_130218_inLine +BABEL_OP2_207_30497_20131001_130218_outLine +BABEL_OP2_207_30869_20130920_162014_inLine +BABEL_OP2_207_30869_20130920_162014_outLine +BABEL_OP2_207_31182_20130917_210449_inLine +BABEL_OP2_207_31182_20130917_210449_outLine +BABEL_OP2_207_31184_20130809_153124_inLine +BABEL_OP2_207_31184_20130809_153124_outLine +BABEL_OP2_207_31583_20130916_202055_inLine +BABEL_OP2_207_31583_20130916_202055_outLine +BABEL_OP2_207_31628_20130829_135440_inLine +BABEL_OP2_207_31628_20130829_135440_outLine +BABEL_OP2_207_31979_20130808_164711_inLine +BABEL_OP2_207_31979_20130808_164711_outLine +BABEL_OP2_207_31979_20130808_165705_inLine +BABEL_OP2_207_31979_20130808_165705_outLine +BABEL_OP2_207_32301_20130905_171450_inLine +BABEL_OP2_207_32301_20130905_171450_outLine +BABEL_OP2_207_32861_20131001_201155_inLine +BABEL_OP2_207_32861_20131001_201155_outLine +BABEL_OP2_207_32872_20131007_132753_inLine +BABEL_OP2_207_32872_20131007_132753_outLine +BABEL_OP2_207_32914_20130828_143138_inLine +BABEL_OP2_207_32914_20130828_143138_outLine +BABEL_OP2_207_33251_20130826_113656_inLine +BABEL_OP2_207_33251_20130826_113656_outLine +BABEL_OP2_207_33635_20130810_084448_inLine +BABEL_OP2_207_33635_20130810_084448_outLine +BABEL_OP2_207_34336_20130723_154022_inLine +BABEL_OP2_207_34336_20130723_154022_outLine +BABEL_OP2_207_34903_20130826_111451_inLine +BABEL_OP2_207_34903_20130826_111451_outLine +BABEL_OP2_207_34903_20130826_112452_inLine +BABEL_OP2_207_34903_20130826_112452_outLine +BABEL_OP2_207_35202_20130904_140235_inLine +BABEL_OP2_207_35202_20130904_140235_outLine +BABEL_OP2_207_36059_20130828_142450_inLine +BABEL_OP2_207_36059_20130828_142450_outLine +BABEL_OP2_207_36147_20131128_144158_inLine +BABEL_OP2_207_36147_20131128_144158_outLine +BABEL_OP2_207_36219_20130801_142236_inLine +BABEL_OP2_207_36219_20130801_142236_outLine +BABEL_OP2_207_36990_20130813_203843_inLine +BABEL_OP2_207_36990_20130813_203843_outLine +BABEL_OP2_207_36990_20130813_205054_inLine +BABEL_OP2_207_36990_20130813_205054_outLine +BABEL_OP2_207_37064_20130802_163007_inLine +BABEL_OP2_207_37064_20130802_163007_outLine +BABEL_OP2_207_37229_20131014_133555_inLine +BABEL_OP2_207_37229_20131014_133555_outLine +BABEL_OP2_207_37281_20130809_155629_inLine +BABEL_OP2_207_37281_20130809_155629_outLine +BABEL_OP2_207_37598_20130822_115445_inLine +BABEL_OP2_207_37598_20130822_115445_outLine +BABEL_OP2_207_38076_20130828_114052_inLine +BABEL_OP2_207_38076_20130828_114052_outLine +BABEL_OP2_207_38750_20130912_115957_inLine +BABEL_OP2_207_38750_20130912_115957_outLine +BABEL_OP2_207_38979_20130925_142422_inLine +BABEL_OP2_207_38979_20130925_142422_outLine +BABEL_OP2_207_39059_20130924_141830_inLine +BABEL_OP2_207_39059_20130924_141830_outLine +BABEL_OP2_207_39159_20130802_184611_inLine +BABEL_OP2_207_39159_20130802_184611_outLine +BABEL_OP2_207_39680_20130924_150026_inLine +BABEL_OP2_207_39680_20130924_150026_outLine +BABEL_OP2_207_41097_20130826_120511_inLine +BABEL_OP2_207_41097_20130826_120511_outLine +BABEL_OP2_207_41233_20130919_151406_inLine +BABEL_OP2_207_41233_20130919_151406_outLine +BABEL_OP2_207_41692_20131008_115554_inLine +BABEL_OP2_207_41692_20131008_115554_outLine +BABEL_OP2_207_42155_20130820_153344_inLine +BABEL_OP2_207_42155_20130820_153344_outLine +BABEL_OP2_207_42155_20130820_155002_inLine +BABEL_OP2_207_42155_20130820_155002_outLine +BABEL_OP2_207_42243_20130730_171007_inLine +BABEL_OP2_207_42243_20130730_171620_inLine +BABEL_OP2_207_42526_20130903_162134_inLine +BABEL_OP2_207_42526_20130903_162134_outLine +BABEL_OP2_207_42526_20130903_163434_inLine +BABEL_OP2_207_42526_20130903_163434_outLine +BABEL_OP2_207_42718_20130828_165932_inLine +BABEL_OP2_207_42718_20130828_165932_outLine +BABEL_OP2_207_42848_20131010_143925_inLine +BABEL_OP2_207_42848_20131010_143925_outLine +BABEL_OP2_207_42883_20131008_131439_inLine +BABEL_OP2_207_42883_20131008_131439_outLine +BABEL_OP2_207_43074_20131213_105423_inLine +BABEL_OP2_207_43074_20131213_105423_outLine +BABEL_OP2_207_43285_20130905_162602_inLine +BABEL_OP2_207_43285_20130905_162602_outLine +BABEL_OP2_207_43388_20130809_194529_inLine +BABEL_OP2_207_43388_20130809_194529_outLine +BABEL_OP2_207_43789_20130809_213917_inLine +BABEL_OP2_207_43789_20130809_213917_outLine +BABEL_OP2_207_43990_20131027_190409_inLine +BABEL_OP2_207_43990_20131027_190409_outLine +BABEL_OP2_207_44290_20131002_160104_inLine +BABEL_OP2_207_44290_20131002_160104_outLine +BABEL_OP2_207_44847_20130827_155200_inLine +BABEL_OP2_207_44847_20130827_155200_outLine +BABEL_OP2_207_44868_20130904_135956_inLine +BABEL_OP2_207_44868_20130904_135956_outLine +BABEL_OP2_207_45106_20130823_154724_inLine +BABEL_OP2_207_45106_20130823_154724_outLine +BABEL_OP2_207_46315_20130905_150622_inLine +BABEL_OP2_207_46315_20130905_150622_outLine +BABEL_OP2_207_47799_20131023_123730_inLine +BABEL_OP2_207_47799_20131023_123730_outLine +BABEL_OP2_207_47877_20130902_143454_inLine +BABEL_OP2_207_47877_20130902_143454_outLine +BABEL_OP2_207_48200_20130921_155444_inLine +BABEL_OP2_207_48200_20130921_155444_outLine +BABEL_OP2_207_48789_20130812_134605_inLine +BABEL_OP2_207_48789_20130812_134605_outLine +BABEL_OP2_207_49118_20130920_121936_inLine +BABEL_OP2_207_49118_20130920_121936_outLine +BABEL_OP2_207_49197_20130807_131817_inLine +BABEL_OP2_207_49197_20130807_131817_outLine +BABEL_OP2_207_49812_20130922_204620_inLine +BABEL_OP2_207_49812_20130922_204620_outLine +BABEL_OP2_207_49902_20130724_154629_inLine +BABEL_OP2_207_49902_20130724_154629_outLine +BABEL_OP2_207_50630_20130905_150725_inLine +BABEL_OP2_207_50630_20130905_150725_outLine +BABEL_OP2_207_50745_20130930_091255_inLine +BABEL_OP2_207_50745_20130930_091255_outLine +BABEL_OP2_207_50810_20130625_080815_outLine +BABEL_OP2_207_50940_20131212_122606_inLine +BABEL_OP2_207_50940_20131212_122606_outLine +BABEL_OP2_207_50958_20130808_153539_inLine +BABEL_OP2_207_50958_20130808_155452_inLine +BABEL_OP2_207_51414_20131008_124320_inLine +BABEL_OP2_207_51414_20131008_124320_outLine +BABEL_OP2_207_51540_20130920_151858_inLine +BABEL_OP2_207_51540_20130920_151858_outLine +BABEL_OP2_207_52222_20131101_145127_inLine +BABEL_OP2_207_52222_20131101_145127_outLine +BABEL_OP2_207_52442_20130814_204040_inLine +BABEL_OP2_207_52442_20130814_204040_outLine +BABEL_OP2_207_52483_20131023_121543_inLine +BABEL_OP2_207_52483_20131023_121543_outLine +BABEL_OP2_207_53072_20131007_135601_inLine +BABEL_OP2_207_53072_20131007_135601_outLine +BABEL_OP2_207_53665_20131001_124434_inLine +BABEL_OP2_207_53665_20131001_124434_outLine +BABEL_OP2_207_54634_20131106_133052_inLine +BABEL_OP2_207_54634_20131106_133052_outLine +BABEL_OP2_207_56023_20130922_114453_inLine +BABEL_OP2_207_56023_20130922_114453_outLine +BABEL_OP2_207_56213_20130911_155753_inLine +BABEL_OP2_207_56213_20130911_155753_outLine +BABEL_OP2_207_56306_20130903_120057_inLine +BABEL_OP2_207_56306_20130903_120057_outLine +BABEL_OP2_207_56345_20131030_214035_inLine +BABEL_OP2_207_56345_20131030_214035_outLine +BABEL_OP2_207_56677_20130911_181638_inLine +BABEL_OP2_207_56677_20130911_181638_outLine +BABEL_OP2_207_56720_20130910_114920_inLine +BABEL_OP2_207_56720_20130910_114920_outLine +BABEL_OP2_207_57065_20130903_113823_inLine +BABEL_OP2_207_57065_20130903_113823_outLine +BABEL_OP2_207_57219_20131016_161014_inLine +BABEL_OP2_207_57219_20131016_161014_outLine +BABEL_OP2_207_57464_20131002_141306_inLine +BABEL_OP2_207_57464_20131002_141306_outLine +BABEL_OP2_207_57566_20130921_125810_inLine +BABEL_OP2_207_57566_20130921_125810_outLine +BABEL_OP2_207_57609_20130819_123817_inLine +BABEL_OP2_207_57609_20130819_123817_outLine +BABEL_OP2_207_57678_20130802_172845_inLine +BABEL_OP2_207_57678_20130802_172845_outLine +BABEL_OP2_207_57919_20131130_111652_inLine +BABEL_OP2_207_57919_20131130_111652_outLine +BABEL_OP2_207_58026_20131017_142517_inLine +BABEL_OP2_207_58026_20131017_142517_outLine +BABEL_OP2_207_58717_20130821_131155_inLine +BABEL_OP2_207_58717_20130821_131155_outLine +BABEL_OP2_207_59928_20130701_151227_inLine +BABEL_OP2_207_60310_20130915_115206_inLine +BABEL_OP2_207_60310_20130915_115206_outLine +BABEL_OP2_207_61040_20130915_174923_inLine +BABEL_OP2_207_61040_20130915_174923_outLine +BABEL_OP2_207_61225_20130624_075839_inLine +BABEL_OP2_207_61225_20130624_075839_outLine +BABEL_OP2_207_61225_20130816_102014_inLine +BABEL_OP2_207_61225_20130816_102014_outLine +BABEL_OP2_207_61435_20130920_151632_inLine +BABEL_OP2_207_61435_20130920_151632_outLine +BABEL_OP2_207_61888_20130925_153044_inLine +BABEL_OP2_207_61888_20130925_153044_outLine +BABEL_OP2_207_61971_20131002_122934_inLine +BABEL_OP2_207_61971_20131002_122934_outLine +BABEL_OP2_207_61971_20131002_124937_inLine +BABEL_OP2_207_61971_20131002_124937_outLine +BABEL_OP2_207_62286_20130808_152914_inLine +BABEL_OP2_207_62286_20130808_152914_outLine +BABEL_OP2_207_62456_20131008_120833_inLine +BABEL_OP2_207_62456_20131008_120833_outLine +BABEL_OP2_207_62835_20130813_200412_inLine +BABEL_OP2_207_62835_20130813_200412_outLine +BABEL_OP2_207_63220_20130826_161151_inLine +BABEL_OP2_207_63220_20130826_161151_outLine +BABEL_OP2_207_63309_20131218_175444_inLine +BABEL_OP2_207_63309_20131218_175444_outLine +BABEL_OP2_207_63425_20130829_145909_inLine +BABEL_OP2_207_63425_20130829_145909_outLine +BABEL_OP2_207_63523_20130829_130857_inLine +BABEL_OP2_207_63523_20130829_130857_outLine +BABEL_OP2_207_63523_20130829_131711_inLine +BABEL_OP2_207_63523_20130829_131711_outLine +BABEL_OP2_207_63730_20131015_214600_inLine +BABEL_OP2_207_63730_20131015_214600_outLine +BABEL_OP2_207_63938_20130926_154144_inLine +BABEL_OP2_207_63938_20130926_154144_outLine +BABEL_OP2_207_63938_20130926_155144_inLine +BABEL_OP2_207_63938_20130926_155144_outLine +BABEL_OP2_207_64014_20130926_150824_inLine +BABEL_OP2_207_64014_20130926_150824_outLine +BABEL_OP2_207_64259_20131102_110911_inLine +BABEL_OP2_207_64259_20131102_110911_outLine +BABEL_OP2_207_64638_20130829_133013_inLine +BABEL_OP2_207_64638_20130829_133013_outLine +BABEL_OP2_207_64902_20130930_143110_inLine +BABEL_OP2_207_64902_20130930_143110_outLine +BABEL_OP2_207_65064_20130820_141717_inLine +BABEL_OP2_207_65064_20130820_141717_outLine +BABEL_OP2_207_65477_20130807_163701_inLine +BABEL_OP2_207_65640_20131002_143110_inLine +BABEL_OP2_207_65640_20131002_143110_outLine +BABEL_OP2_207_66026_20130911_163013_inLine +BABEL_OP2_207_66026_20130911_163013_outLine +BABEL_OP2_207_66959_20130910_082006_inLine +BABEL_OP2_207_66959_20130910_082006_outLine +BABEL_OP2_207_66959_20130910_082705_inLine +BABEL_OP2_207_66959_20130910_082705_outLine +BABEL_OP2_207_66959_20130910_083542_inLine +BABEL_OP2_207_66959_20130910_083542_outLine +BABEL_OP2_207_66975_20131203_124359_inLine +BABEL_OP2_207_66975_20131203_124359_outLine +BABEL_OP2_207_67085_20131004_122616_inLine +BABEL_OP2_207_67085_20131004_122616_outLine +BABEL_OP2_207_67552_20130904_171052_inLine +BABEL_OP2_207_67552_20130904_171052_outLine +BABEL_OP2_207_67964_20131003_163118_inLine +BABEL_OP2_207_67964_20131003_163118_outLine +BABEL_OP2_207_68306_20130906_161631_inLine +BABEL_OP2_207_68306_20130906_161631_outLine +BABEL_OP2_207_69107_20130821_115813_inLine +BABEL_OP2_207_69107_20130821_115813_outLine +BABEL_OP2_207_69107_20130821_120807_inLine +BABEL_OP2_207_69107_20130821_120807_outLine +BABEL_OP2_207_69153_20130912_183854_inLine +BABEL_OP2_207_69153_20130912_183854_outLine +BABEL_OP2_207_69885_20130907_114201_inLine +BABEL_OP2_207_69885_20130907_114201_outLine +BABEL_OP2_207_69982_20131018_120252_inLine +BABEL_OP2_207_69982_20131018_120252_outLine +BABEL_OP2_207_70182_20131014_163540_inLine +BABEL_OP2_207_70182_20131014_163540_outLine +BABEL_OP2_207_70343_20130907_114751_inLine +BABEL_OP2_207_70343_20130907_114751_outLine +BABEL_OP2_207_70460_20130925_151332_inLine +BABEL_OP2_207_70460_20130925_151332_outLine +BABEL_OP2_207_70460_20130925_152713_inLine +BABEL_OP2_207_70460_20130925_152713_outLine +BABEL_OP2_207_70526_20130908_193512_inLine +BABEL_OP2_207_70526_20130908_193512_outLine +BABEL_OP2_207_70986_20131030_190232_inLine +BABEL_OP2_207_70986_20131030_190232_outLine +BABEL_OP2_207_71189_20130930_121030_inLine +BABEL_OP2_207_71189_20130930_121030_outLine +BABEL_OP2_207_71460_20131128_152217_inLine +BABEL_OP2_207_71460_20131128_152217_outLine +BABEL_OP2_207_72007_20130906_152449_inLine +BABEL_OP2_207_72007_20130906_152449_outLine +BABEL_OP2_207_72349_20131002_145602_inLine +BABEL_OP2_207_72349_20131002_145602_outLine +BABEL_OP2_207_73301_20130801_133004_inLine +BABEL_OP2_207_73301_20130801_133004_outLine +BABEL_OP2_207_73485_20130907_132923_inLine +BABEL_OP2_207_73485_20130907_132923_outLine +BABEL_OP2_207_73757_20130813_005856_inLine +BABEL_OP2_207_73757_20130813_005856_outLine +BABEL_OP2_207_73757_20130813_011142_inLine +BABEL_OP2_207_73757_20130813_011142_outLine +BABEL_OP2_207_75342_20130906_143544_inLine +BABEL_OP2_207_75342_20130906_143544_outLine +BABEL_OP2_207_75460_20131014_160822_inLine +BABEL_OP2_207_75460_20131014_160822_outLine +BABEL_OP2_207_76793_20131028_174027_inLine +BABEL_OP2_207_76793_20131028_174027_outLine +BABEL_OP2_207_76970_20131018_142728_inLine +BABEL_OP2_207_76970_20131018_142728_outLine +BABEL_OP2_207_77242_20131015_210438_inLine +BABEL_OP2_207_77242_20131015_210438_outLine +BABEL_OP2_207_78016_20130725_161812_outLine +BABEL_OP2_207_78116_20130906_165511_inLine +BABEL_OP2_207_78116_20130906_165511_outLine +BABEL_OP2_207_78360_20130926_154542_inLine +BABEL_OP2_207_78360_20130926_154542_outLine +BABEL_OP2_207_78544_20130829_140559_inLine +BABEL_OP2_207_78544_20130829_140559_outLine +BABEL_OP2_207_79139_20130811_111254_inLine +BABEL_OP2_207_79139_20130811_111254_outLine +BABEL_OP2_207_80622_20130824_120649_inLine +BABEL_OP2_207_80622_20130824_120649_outLine +BABEL_OP2_207_80897_20130824_111625_inLine +BABEL_OP2_207_80897_20130824_111625_outLine +BABEL_OP2_207_81229_20130807_135935_inLine +BABEL_OP2_207_81229_20130807_135935_outLine +BABEL_OP2_207_81810_20130831_144019_inLine +BABEL_OP2_207_81810_20130831_144019_outLine +BABEL_OP2_207_81810_20130831_145233_inLine +BABEL_OP2_207_81810_20130831_145233_outLine +BABEL_OP2_207_82089_20130809_131053_inLine +BABEL_OP2_207_82089_20130809_131053_outLine +BABEL_OP2_207_82224_20130923_132931_inLine +BABEL_OP2_207_82361_20131001_152932_inLine +BABEL_OP2_207_82361_20131001_152932_outLine +BABEL_OP2_207_82473_20130702_072644_inLine +BABEL_OP2_207_82966_20130918_130322_inLine +BABEL_OP2_207_82966_20130918_130322_outLine +BABEL_OP2_207_83062_20131002_115950_inLine +BABEL_OP2_207_83062_20131002_115950_outLine +BABEL_OP2_207_83929_20130621_172038_outLine +BABEL_OP2_207_83935_20130906_160858_inLine +BABEL_OP2_207_83935_20130906_160858_outLine +BABEL_OP2_207_84055_20130926_130321_inLine +BABEL_OP2_207_84055_20130926_130321_outLine +BABEL_OP2_207_84055_20130926_131535_inLine +BABEL_OP2_207_84055_20130926_131535_outLine +BABEL_OP2_207_84061_20130725_161035_inLine +BABEL_OP2_207_84061_20130725_161035_outLine +BABEL_OP2_207_84327_20130907_112232_inLine +BABEL_OP2_207_84327_20130907_112232_outLine +BABEL_OP2_207_84339_20130908_213808_inLine +BABEL_OP2_207_84339_20130908_213808_outLine +BABEL_OP2_207_84370_20131017_114407_inLine +BABEL_OP2_207_84370_20131017_114407_outLine +BABEL_OP2_207_84458_20130911_150603_inLine +BABEL_OP2_207_84458_20130911_150603_outLine +BABEL_OP2_207_84469_20130911_152956_inLine +BABEL_OP2_207_84469_20130911_152956_outLine +BABEL_OP2_207_84709_20131025_164240_inLine +BABEL_OP2_207_84709_20131025_164240_outLine +BABEL_OP2_207_84737_20130924_104520_inLine +BABEL_OP2_207_84737_20130924_104520_outLine +BABEL_OP2_207_84838_20130918_142125_inLine +BABEL_OP2_207_84838_20130918_142125_outLine +BABEL_OP2_207_85254_20131016_163511_inLine +BABEL_OP2_207_85254_20131016_163511_outLine +BABEL_OP2_207_85325_20130908_204430_inLine +BABEL_OP2_207_85325_20130908_204430_outLine +BABEL_OP2_207_86597_20131015_223953_inLine +BABEL_OP2_207_86597_20131015_223953_outLine +BABEL_OP2_207_86888_20130823_120853_inLine +BABEL_OP2_207_86888_20130823_120853_outLine +BABEL_OP2_207_86888_20130823_122304_inLine +BABEL_OP2_207_86888_20130823_122304_outLine +BABEL_OP2_207_87545_20131004_134332_inLine +BABEL_OP2_207_87545_20131004_134332_outLine +BABEL_OP2_207_87889_20130828_123340_inLine +BABEL_OP2_207_87889_20130828_123340_outLine +BABEL_OP2_207_88372_20130927_123913_inLine +BABEL_OP2_207_88372_20130927_123913_outLine +BABEL_OP2_207_88550_20131002_133933_inLine +BABEL_OP2_207_88550_20131002_133933_outLine +BABEL_OP2_207_88601_20130812_143956_inLine +BABEL_OP2_207_88601_20130812_143956_outLine +BABEL_OP2_207_88669_20130823_134613_inLine +BABEL_OP2_207_88669_20130823_134613_outLine +BABEL_OP2_207_89358_20130820_133904_inLine +BABEL_OP2_207_89358_20130820_133904_outLine +BABEL_OP2_207_89794_20130828_091302_inLine +BABEL_OP2_207_89794_20130828_091302_outLine +BABEL_OP2_207_90080_20131003_143629_inLine +BABEL_OP2_207_90080_20131003_143629_outLine +BABEL_OP2_207_90440_20131027_175417_inLine +BABEL_OP2_207_90440_20131027_175417_outLine +BABEL_OP2_207_90709_20130627_182820_inLine +BABEL_OP2_207_90709_20130627_182820_outLine +BABEL_OP2_207_90739_20130807_151133_inLine +BABEL_OP2_207_90739_20130807_151133_outLine +BABEL_OP2_207_90760_20131016_111829_inLine +BABEL_OP2_207_90760_20131016_111829_outLine +BABEL_OP2_207_91189_20131011_125932_inLine +BABEL_OP2_207_91189_20131011_125932_outLine +BABEL_OP2_207_91372_20130909_134637_inLine +BABEL_OP2_207_91372_20130909_134637_outLine +BABEL_OP2_207_91930_20131001_222834_inLine +BABEL_OP2_207_91930_20131001_222834_outLine +BABEL_OP2_207_91930_20131001_223632_inLine +BABEL_OP2_207_91930_20131001_223632_outLine +BABEL_OP2_207_92077_20131007_163003_inLine +BABEL_OP2_207_92077_20131007_163003_outLine +BABEL_OP2_207_92176_20130813_133457_inLine +BABEL_OP2_207_92176_20130813_133457_outLine +BABEL_OP2_207_92557_20130924_134800_inLine +BABEL_OP2_207_92557_20130924_134800_outLine +BABEL_OP2_207_92643_20131007_150231_inLine +BABEL_OP2_207_92643_20131007_150231_outLine +BABEL_OP2_207_92698_20130812_235059_inLine +BABEL_OP2_207_92698_20130812_235059_outLine +BABEL_OP2_207_92757_20130902_145657_inLine +BABEL_OP2_207_92757_20130902_145657_outLine +BABEL_OP2_207_92757_20130902_151025_inLine +BABEL_OP2_207_92757_20130902_151025_outLine +BABEL_OP2_207_92757_20130902_152031_inLine +BABEL_OP2_207_92757_20130902_152031_outLine +BABEL_OP2_207_93469_20131004_145605_inLine +BABEL_OP2_207_93469_20131004_145605_outLine +BABEL_OP2_207_94002_20130813_140301_inLine +BABEL_OP2_207_94002_20130813_140301_outLine +BABEL_OP2_207_94025_20130904_125944_inLine +BABEL_OP2_207_94025_20130904_125944_outLine +BABEL_OP2_207_94025_20130904_130959_inLine +BABEL_OP2_207_94025_20130904_130959_outLine +BABEL_OP2_207_94166_20130925_152248_inLine +BABEL_OP2_207_94166_20130925_152248_outLine +BABEL_OP2_207_94237_20131004_202859_inLine +BABEL_OP2_207_94237_20131004_202859_outLine +BABEL_OP2_207_94409_20130809_220412_inLine +BABEL_OP2_207_94409_20130809_220412_outLine +BABEL_OP2_207_94465_20130909_125729_inLine +BABEL_OP2_207_94465_20130909_125729_outLine +BABEL_OP2_207_94465_20130909_130933_inLine +BABEL_OP2_207_94465_20130909_130933_outLine +BABEL_OP2_207_94745_20130829_131647_inLine +BABEL_OP2_207_94745_20130829_131647_outLine +BABEL_OP2_207_94803_20131101_171456_inLine +BABEL_OP2_207_94803_20131101_171456_outLine +BABEL_OP2_207_95670_20130801_184732_inLine +BABEL_OP2_207_95670_20130801_184732_outLine +BABEL_OP2_207_95670_20130801_185813_inLine +BABEL_OP2_207_95670_20130801_185813_outLine +BABEL_OP2_207_95903_20130927_143755_inLine +BABEL_OP2_207_95903_20130927_143755_outLine +BABEL_OP2_207_96088_20131002_131712_inLine +BABEL_OP2_207_96088_20131002_131712_outLine +BABEL_OP2_207_96205_20130820_122740_inLine +BABEL_OP2_207_96205_20130820_122740_outLine +BABEL_OP2_207_96405_20130802_164853_inLine +BABEL_OP2_207_96405_20130802_164853_outLine +BABEL_OP2_207_96504_20130719_153914_inLine +BABEL_OP2_207_96504_20130719_155023_inLine +BABEL_OP2_207_96504_20130802_132920_inLine +BABEL_OP2_207_96584_20130926_074218_inLine +BABEL_OP2_207_96584_20130926_074218_outLine +BABEL_OP2_207_97097_20131001_132614_inLine +BABEL_OP2_207_97097_20131001_132614_outLine +BABEL_OP2_207_99887_20130924_102355_inLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/dev.list b/egs/babel/s5d/conf/lists/301-cebuano/dev.list new file mode 100644 index 00000000000..ecf3753ee7d --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/dev.list @@ -0,0 +1,134 @@ +BABEL_OP2_301_13792_20131111_122219_inLine +BABEL_OP2_301_13792_20131111_122219_outLine +BABEL_OP2_301_14141_20140118_202248_inLine +BABEL_OP2_301_14141_20140118_202248_outLine +BABEL_OP2_301_15262_20131105_213812_inLine +BABEL_OP2_301_15262_20131105_213812_outLine +BABEL_OP2_301_15262_20131105_230709_inLine +BABEL_OP2_301_15262_20131105_230709_outLine +BABEL_OP2_301_15638_20131210_131327_inLine +BABEL_OP2_301_15638_20131210_131327_outLine +BABEL_OP2_301_17127_20140106_175906_inLine +BABEL_OP2_301_17127_20140106_175906_outLine +BABEL_OP2_301_17881_20140122_201653_inLine +BABEL_OP2_301_17881_20140122_201653_outLine +BABEL_OP2_301_18078_20131226_153406_inLine +BABEL_OP2_301_18078_20131226_153406_outLine +BABEL_OP2_301_21109_20140102_180619_inLine +BABEL_OP2_301_21109_20140102_180619_outLine +BABEL_OP2_301_22280_20140206_202952_inLine +BABEL_OP2_301_22280_20140206_202952_outLine +BABEL_OP2_301_22466_20131015_174457_inLine +BABEL_OP2_301_22466_20131015_174457_outLine +BABEL_OP2_301_22612_20131217_202720_inLine +BABEL_OP2_301_22612_20131217_202720_outLine +BABEL_OP2_301_23505_20131023_135517_inLine +BABEL_OP2_301_23505_20131023_135517_outLine +BABEL_OP2_301_24241_20140214_170629_inLine +BABEL_OP2_301_24241_20140214_170629_outLine +BABEL_OP2_301_27082_20131209_203149_inLine +BABEL_OP2_301_27082_20131209_203149_outLine +BABEL_OP2_301_29685_20131203_182746_inLine +BABEL_OP2_301_29685_20131203_182746_outLine +BABEL_OP2_301_29685_20131203_184526_inLine +BABEL_OP2_301_29685_20131203_184526_outLine +BABEL_OP2_301_36059_20140118_204512_inLine +BABEL_OP2_301_36059_20140118_204512_outLine +BABEL_OP2_301_37281_20131205_190107_inLine +BABEL_OP2_301_37281_20131205_190107_outLine +BABEL_OP2_301_38340_20131128_145618_inLine +BABEL_OP2_301_38340_20131128_145618_outLine +BABEL_OP2_301_40713_20131126_193850_inLine +BABEL_OP2_301_40713_20131126_193850_outLine +BABEL_OP2_301_41958_20131127_145018_inLine +BABEL_OP2_301_41958_20131127_145018_outLine +BABEL_OP2_301_43239_20140102_190746_inLine +BABEL_OP2_301_43239_20140102_190746_outLine +BABEL_OP2_301_43646_20131019_165638_inLine +BABEL_OP2_301_43646_20131019_165638_outLine +BABEL_OP2_301_46008_20140126_192930_inLine +BABEL_OP2_301_46008_20140126_192930_outLine +BABEL_OP2_301_46333_20131027_181031_inLine +BABEL_OP2_301_46333_20131027_181031_outLine +BABEL_OP2_301_48789_20131209_181711_inLine +BABEL_OP2_301_48789_20131209_181711_outLine +BABEL_OP2_301_49902_20131127_180426_inLine +BABEL_OP2_301_49902_20131127_180426_outLine +BABEL_OP2_301_50565_20131025_202729_inLine +BABEL_OP2_301_50565_20131025_202729_outLine +BABEL_OP2_301_51530_20140125_195307_inLine +BABEL_OP2_301_51530_20140125_195307_outLine +BABEL_OP2_301_51955_20131125_182037_inLine +BABEL_OP2_301_51955_20131125_182037_outLine +BABEL_OP2_301_52301_20131107_133036_inLine +BABEL_OP2_301_52301_20131107_133036_outLine +BABEL_OP2_301_52301_20131107_135543_inLine +BABEL_OP2_301_52301_20131107_135543_outLine +BABEL_OP2_301_54744_20131202_184432_inLine +BABEL_OP2_301_54744_20131202_184432_outLine +BABEL_OP2_301_56370_20131101_175739_inLine +BABEL_OP2_301_56370_20131101_175739_outLine +BABEL_OP2_301_60299_20140202_130806_inLine +BABEL_OP2_301_60299_20140202_130806_outLine +BABEL_OP2_301_62362_20140129_154002_inLine +BABEL_OP2_301_62362_20140129_154002_outLine +BABEL_OP2_301_63425_20131213_184303_inLine +BABEL_OP2_301_63425_20131213_184303_outLine +BABEL_OP2_301_64759_20131103_154236_inLine +BABEL_OP2_301_64759_20131103_154236_outLine +BABEL_OP2_301_64870_20131226_133240_inLine +BABEL_OP2_301_64870_20131226_133240_outLine +BABEL_OP2_301_65252_20140126_190555_inLine +BABEL_OP2_301_65252_20140126_190555_outLine +BABEL_OP2_301_66026_20131216_194850_inLine +BABEL_OP2_301_66026_20131216_194850_outLine +BABEL_OP2_301_67085_20140126_181613_inLine +BABEL_OP2_301_67085_20140126_181613_outLine +BABEL_OP2_301_68306_20131212_171648_inLine +BABEL_OP2_301_68306_20131212_171648_outLine +BABEL_OP2_301_71404_20131112_205323_inLine +BABEL_OP2_301_71404_20131112_205323_outLine +BABEL_OP2_301_71404_20131112_211451_inLine +BABEL_OP2_301_71404_20131112_211451_outLine +BABEL_OP2_301_74226_20131213_195309_inLine +BABEL_OP2_301_74226_20131213_195309_outLine +BABEL_OP2_301_74455_20140115_152935_inLine +BABEL_OP2_301_74455_20140115_152935_outLine +BABEL_OP2_301_78194_20131015_181857_inLine +BABEL_OP2_301_78194_20131015_181857_outLine +BABEL_OP2_301_78194_20131015_183910_inLine +BABEL_OP2_301_78194_20131015_183910_outLine +BABEL_OP2_301_78360_20140110_190526_inLine +BABEL_OP2_301_78360_20140110_190526_outLine +BABEL_OP2_301_79660_20140201_160331_inLine +BABEL_OP2_301_79660_20140201_160331_outLine +BABEL_OP2_301_79820_20131127_235459_inLine +BABEL_OP2_301_79820_20131127_235459_outLine +BABEL_OP2_301_80897_20140206_142309_inLine +BABEL_OP2_301_80897_20140206_142309_outLine +BABEL_OP2_301_81427_20131126_151401_inLine +BABEL_OP2_301_81427_20131126_151401_outLine +BABEL_OP2_301_84611_20131125_193454_inLine +BABEL_OP2_301_84611_20131125_193454_outLine +BABEL_OP2_301_84709_20140220_141332_inLine +BABEL_OP2_301_84709_20140220_141332_outLine +BABEL_OP2_301_85179_20131227_172225_inLine +BABEL_OP2_301_85179_20131227_172225_outLine +BABEL_OP2_301_86467_20131112_182159_inLine +BABEL_OP2_301_86467_20131112_182159_outLine +BABEL_OP2_301_86467_20131112_193636_inLine +BABEL_OP2_301_86467_20131112_193636_outLine +BABEL_OP2_301_88550_20140128_150822_inLine +BABEL_OP2_301_88550_20140128_150822_outLine +BABEL_OP2_301_88873_20131202_130910_inLine +BABEL_OP2_301_88873_20131202_130910_outLine +BABEL_OP2_301_92792_20140123_104047_inLine +BABEL_OP2_301_92792_20140123_104047_outLine +BABEL_OP2_301_96985_20131021_164130_inLine +BABEL_OP2_301_96985_20131021_164130_outLine +BABEL_OP2_301_98489_20131123_232017_inLine +BABEL_OP2_301_98489_20131123_232017_outLine +BABEL_OP2_301_98489_20131123_233440_inLine +BABEL_OP2_301_98489_20131123_233440_outLine +BABEL_OP2_301_99516_20131022_111915_inLine +BABEL_OP2_301_99516_20131022_111915_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/eval.list b/egs/babel/s5d/conf/lists/301-cebuano/eval.list new file mode 100644 index 00000000000..6958122726d --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/eval.list @@ -0,0 +1,190 @@ +BABEL_OP2_301_10019_20131127_165625_inLine +BABEL_OP2_301_10019_20131127_165625_outLine +BABEL_OP2_301_10416_20131203_193332_inLine +BABEL_OP2_301_10416_20131203_193332_outLine +BABEL_OP2_301_12767_20131122_204245_inLine +BABEL_OP2_301_12767_20131122_204245_outLine +BABEL_OP2_301_13427_20131226_153605_inLine +BABEL_OP2_301_13427_20131226_153605_outLine +BABEL_OP2_301_13490_20131209_200441_inLine +BABEL_OP2_301_13490_20131209_200441_outLine +BABEL_OP2_301_14440_20131217_152957_inLine +BABEL_OP2_301_14440_20131217_152957_outLine +BABEL_OP2_301_14537_20140126_192700_inLine +BABEL_OP2_301_14537_20140126_192700_outLine +BABEL_OP2_301_16056_20131112_135620_inLine +BABEL_OP2_301_16056_20131112_135620_outLine +BABEL_OP2_301_16056_20131112_140413_inLine +BABEL_OP2_301_16056_20131112_140413_outLine +BABEL_OP2_301_16184_20131018_004611_inLine +BABEL_OP2_301_16184_20131018_004611_outLine +BABEL_OP2_301_16839_20140106_195749_inLine +BABEL_OP2_301_16839_20140106_195749_outLine +BABEL_OP2_301_17165_20131203_150708_inLine +BABEL_OP2_301_17165_20131203_150708_outLine +BABEL_OP2_301_18766_20140127_140851_inLine +BABEL_OP2_301_18766_20140127_140851_outLine +BABEL_OP2_301_19782_20131220_143639_inLine +BABEL_OP2_301_19782_20131220_143639_outLine +BABEL_OP2_301_19832_20140214_144414_inLine +BABEL_OP2_301_19832_20140214_144414_outLine +BABEL_OP2_301_20800_20131119_233324_inLine +BABEL_OP2_301_20800_20131119_233324_outLine +BABEL_OP2_301_22641_20131112_223928_inLine +BABEL_OP2_301_22641_20131112_223928_outLine +BABEL_OP2_301_23196_20140224_145440_inLine +BABEL_OP2_301_23196_20140224_145440_outLine +BABEL_OP2_301_23628_20131121_202709_inLine +BABEL_OP2_301_23628_20131121_202709_outLine +BABEL_OP2_301_26074_20140214_150738_inLine +BABEL_OP2_301_26074_20140214_150738_outLine +BABEL_OP2_301_28585_20140103_174051_inLine +BABEL_OP2_301_28585_20140103_174051_outLine +BABEL_OP2_301_29777_20131227_175745_inLine +BABEL_OP2_301_29777_20131227_175745_outLine +BABEL_OP2_301_32914_20140102_183534_inLine +BABEL_OP2_301_32914_20140102_183534_outLine +BABEL_OP2_301_33992_20140128_153304_inLine +BABEL_OP2_301_33992_20140128_153304_outLine +BABEL_OP2_301_35069_20140104_210141_inLine +BABEL_OP2_301_35069_20140104_210141_outLine +BABEL_OP2_301_36219_20131125_140227_inLine +BABEL_OP2_301_36219_20131125_140227_outLine +BABEL_OP2_301_36219_20131125_141324_inLine +BABEL_OP2_301_36219_20131125_141324_outLine +BABEL_OP2_301_36341_20131024_131700_inLine +BABEL_OP2_301_36341_20131024_131700_outLine +BABEL_OP2_301_36341_20131025_165924_inLine +BABEL_OP2_301_36341_20131025_165924_outLine +BABEL_OP2_301_37499_20140129_153724_inLine +BABEL_OP2_301_37499_20140129_153724_outLine +BABEL_OP2_301_40740_20140106_203616_inLine +BABEL_OP2_301_40740_20140106_203616_outLine +BABEL_OP2_301_41493_20131025_161722_inLine +BABEL_OP2_301_41493_20131025_161722_outLine +BABEL_OP2_301_41920_20131110_141258_inLine +BABEL_OP2_301_41920_20131110_141258_outLine +BABEL_OP2_301_41920_20131110_142621_inLine +BABEL_OP2_301_41920_20131110_142621_outLine +BABEL_OP2_301_42600_20131125_184712_inLine +BABEL_OP2_301_42600_20131125_184712_outLine +BABEL_OP2_301_42600_20131125_185254_inLine +BABEL_OP2_301_42600_20131125_185254_outLine +BABEL_OP2_301_43789_20131205_204932_inLine +BABEL_OP2_301_43789_20131205_204932_outLine +BABEL_OP2_301_45777_20131129_214116_inLine +BABEL_OP2_301_45777_20131129_214116_outLine +BABEL_OP2_301_47877_20140109_182631_inLine +BABEL_OP2_301_47877_20140109_182631_outLine +BABEL_OP2_301_48399_20131115_184608_inLine +BABEL_OP2_301_48399_20131115_184608_outLine +BABEL_OP2_301_48422_20140104_203017_inLine +BABEL_OP2_301_48422_20140104_203017_outLine +BABEL_OP2_301_49287_20140110_233951_inLine +BABEL_OP2_301_49287_20140110_233951_outLine +BABEL_OP2_301_49502_20131025_191447_inLine +BABEL_OP2_301_49502_20131025_191447_outLine +BABEL_OP2_301_49812_20140108_153912_inLine +BABEL_OP2_301_49812_20140108_153912_outLine +BABEL_OP2_301_51417_20140104_191034_inLine +BABEL_OP2_301_51417_20140104_191034_outLine +BABEL_OP2_301_52447_20140128_140241_inLine +BABEL_OP2_301_52447_20140128_140241_outLine +BABEL_OP2_301_58145_20140205_195241_inLine +BABEL_OP2_301_58145_20140205_195241_outLine +BABEL_OP2_301_58815_20131219_183200_inLine +BABEL_OP2_301_58815_20131219_183200_outLine +BABEL_OP2_301_58915_20140204_180046_inLine +BABEL_OP2_301_58915_20140204_180046_outLine +BABEL_OP2_301_60508_20131023_221321_inLine +BABEL_OP2_301_60508_20131023_221321_outLine +BABEL_OP2_301_61348_20131210_184944_inLine +BABEL_OP2_301_61348_20131210_184944_outLine +BABEL_OP2_301_61357_20140205_203135_inLine +BABEL_OP2_301_61357_20140205_203135_outLine +BABEL_OP2_301_61678_20131130_195119_inLine +BABEL_OP2_301_61678_20131130_195119_outLine +BABEL_OP2_301_61684_20140224_141104_inLine +BABEL_OP2_301_61684_20140224_141104_outLine +BABEL_OP2_301_62434_20131027_204412_inLine +BABEL_OP2_301_62434_20131027_204412_outLine +BABEL_OP2_301_62835_20131205_201607_inLine +BABEL_OP2_301_62835_20131205_201607_outLine +BABEL_OP2_301_62852_20131112_145306_inLine +BABEL_OP2_301_62852_20131112_145306_outLine +BABEL_OP2_301_63445_20131017_163305_inLine +BABEL_OP2_301_63445_20131017_163305_outLine +BABEL_OP2_301_63481_20131018_205953_inLine +BABEL_OP2_301_63481_20131018_205953_outLine +BABEL_OP2_301_63523_20140127_032850_inLine +BABEL_OP2_301_63523_20140127_032850_outLine +BABEL_OP2_301_65268_20140224_143314_inLine +BABEL_OP2_301_65268_20140224_143314_outLine +BABEL_OP2_301_66967_20131119_230046_inLine +BABEL_OP2_301_66967_20131119_230046_outLine +BABEL_OP2_301_67592_20131223_194021_inLine +BABEL_OP2_301_67592_20131223_194021_outLine +BABEL_OP2_301_69885_20140126_142648_inLine +BABEL_OP2_301_69885_20140126_142648_outLine +BABEL_OP2_301_71282_20140115_180924_inLine +BABEL_OP2_301_71282_20140115_180924_outLine +BABEL_OP2_301_71333_20131126_155505_inLine +BABEL_OP2_301_71333_20131126_155505_outLine +BABEL_OP2_301_73622_20131030_201514_inLine +BABEL_OP2_301_73622_20131030_201514_outLine +BABEL_OP2_301_75359_20140127_022948_inLine +BABEL_OP2_301_75359_20140127_022948_outLine +BABEL_OP2_301_75460_20140130_145829_inLine +BABEL_OP2_301_75460_20140130_145829_outLine +BABEL_OP2_301_76218_20131205_183037_inLine +BABEL_OP2_301_76218_20131205_183037_outLine +BABEL_OP2_301_77139_20131112_164236_inLine +BABEL_OP2_301_77139_20131112_164236_outLine +BABEL_OP2_301_78454_20140206_205852_inLine +BABEL_OP2_301_78454_20140206_205852_outLine +BABEL_OP2_301_78630_20131125_133236_inLine +BABEL_OP2_301_78630_20131125_133236_outLine +BABEL_OP2_301_79590_20131204_214240_inLine +BABEL_OP2_301_79590_20131204_214240_outLine +BABEL_OP2_301_80881_20131106_185321_inLine +BABEL_OP2_301_80881_20131106_185321_outLine +BABEL_OP2_301_83775_20131124_022216_inLine +BABEL_OP2_301_83775_20131124_022216_outLine +BABEL_OP2_301_84370_20140204_202527_inLine +BABEL_OP2_301_84370_20140204_202527_outLine +BABEL_OP2_301_85439_20140126_191119_inLine +BABEL_OP2_301_85439_20140126_191119_outLine +BABEL_OP2_301_86748_20140112_204921_inLine +BABEL_OP2_301_86748_20140112_204921_outLine +BABEL_OP2_301_87693_20131204_010154_inLine +BABEL_OP2_301_87693_20131204_010154_outLine +BABEL_OP2_301_88601_20131208_212307_inLine +BABEL_OP2_301_88601_20131208_212307_outLine +BABEL_OP2_301_88686_20131023_165851_inLine +BABEL_OP2_301_88686_20131023_165851_outLine +BABEL_OP2_301_89457_20131206_124818_inLine +BABEL_OP2_301_89457_20131206_124818_outLine +BABEL_OP2_301_90777_20131126_025413_inLine +BABEL_OP2_301_90777_20131126_025413_outLine +BABEL_OP2_301_92060_20140126_194852_inLine +BABEL_OP2_301_92060_20140126_194852_outLine +BABEL_OP2_301_92281_20140214_190838_inLine +BABEL_OP2_301_92281_20140214_190838_outLine +BABEL_OP2_301_92509_20131019_131304_inLine +BABEL_OP2_301_92509_20131019_131304_outLine +BABEL_OP2_301_92698_20131203_135210_inLine +BABEL_OP2_301_92698_20131203_135210_outLine +BABEL_OP2_301_93604_20140125_212930_inLine +BABEL_OP2_301_93604_20140125_212930_outLine +BABEL_OP2_301_94587_20131213_182558_inLine +BABEL_OP2_301_94587_20131213_182558_outLine +BABEL_OP2_301_95598_20131020_194214_inLine +BABEL_OP2_301_95598_20131020_194214_outLine +BABEL_OP2_301_95966_20131205_151956_inLine +BABEL_OP2_301_95966_20131205_151956_outLine +BABEL_OP2_301_96088_20140128_155726_inLine +BABEL_OP2_301_96088_20140128_155726_outLine +BABEL_OP2_301_96808_20140127_174411_inLine +BABEL_OP2_301_96808_20140127_174411_outLine +BABEL_OP2_301_98580_20131204_210023_inLine +BABEL_OP2_301_98580_20131204_210023_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list b/egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list new file mode 100644 index 00000000000..31455174b8e --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list @@ -0,0 +1,62 @@ +BABEL_OP2_301_13427_20131226_153605_inLine +BABEL_OP2_301_13427_20131226_153605_outLine +BABEL_OP2_301_18766_20140127_140851_inLine +BABEL_OP2_301_18766_20140127_140851_outLine +BABEL_OP2_301_19832_20140214_144414_inLine +BABEL_OP2_301_19832_20140214_144414_outLine +BABEL_OP2_301_23628_20131121_202709_inLine +BABEL_OP2_301_23628_20131121_202709_outLine +BABEL_OP2_301_26074_20140214_150738_inLine +BABEL_OP2_301_26074_20140214_150738_outLine +BABEL_OP2_301_28585_20140103_174051_inLine +BABEL_OP2_301_28585_20140103_174051_outLine +BABEL_OP2_301_33992_20140128_153304_inLine +BABEL_OP2_301_33992_20140128_153304_outLine +BABEL_OP2_301_42600_20131125_184712_inLine +BABEL_OP2_301_42600_20131125_184712_outLine +BABEL_OP2_301_42600_20131125_185254_inLine +BABEL_OP2_301_42600_20131125_185254_outLine +BABEL_OP2_301_60508_20131023_221321_inLine +BABEL_OP2_301_60508_20131023_221321_outLine +BABEL_OP2_301_61357_20140205_203135_inLine +BABEL_OP2_301_61357_20140205_203135_outLine +BABEL_OP2_301_62434_20131027_204412_inLine +BABEL_OP2_301_62434_20131027_204412_outLine +BABEL_OP2_301_62835_20131205_201607_inLine +BABEL_OP2_301_62835_20131205_201607_outLine +BABEL_OP2_301_62852_20131112_145306_inLine +BABEL_OP2_301_62852_20131112_145306_outLine +BABEL_OP2_301_63481_20131018_205953_inLine +BABEL_OP2_301_63481_20131018_205953_outLine +BABEL_OP2_301_63523_20140127_032850_inLine +BABEL_OP2_301_63523_20140127_032850_outLine +BABEL_OP2_301_71282_20140115_180924_inLine +BABEL_OP2_301_71282_20140115_180924_outLine +BABEL_OP2_301_71333_20131126_155505_inLine +BABEL_OP2_301_71333_20131126_155505_outLine +BABEL_OP2_301_75359_20140127_022948_inLine +BABEL_OP2_301_75359_20140127_022948_outLine +BABEL_OP2_301_75460_20140130_145829_inLine +BABEL_OP2_301_75460_20140130_145829_outLine +BABEL_OP2_301_78630_20131125_133236_inLine +BABEL_OP2_301_78630_20131125_133236_outLine +BABEL_OP2_301_83775_20131124_022216_inLine +BABEL_OP2_301_83775_20131124_022216_outLine +BABEL_OP2_301_86748_20140112_204921_inLine +BABEL_OP2_301_86748_20140112_204921_outLine +BABEL_OP2_301_88601_20131208_212307_inLine +BABEL_OP2_301_88601_20131208_212307_outLine +BABEL_OP2_301_92060_20140126_194852_inLine +BABEL_OP2_301_92060_20140126_194852_outLine +BABEL_OP2_301_92281_20140214_190838_inLine +BABEL_OP2_301_92281_20140214_190838_outLine +BABEL_OP2_301_93604_20140125_212930_inLine +BABEL_OP2_301_93604_20140125_212930_outLine +BABEL_OP2_301_94587_20131213_182558_inLine +BABEL_OP2_301_94587_20131213_182558_outLine +BABEL_OP2_301_95966_20131205_151956_inLine +BABEL_OP2_301_95966_20131205_151956_outLine +BABEL_OP2_301_96808_20140127_174411_inLine +BABEL_OP2_301_96808_20140127_174411_outLine +BABEL_OP2_301_98580_20131204_210023_inLine +BABEL_OP2_301_98580_20131204_210023_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/sub-train.list b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.list new file mode 100644 index 00000000000..8347770b847 --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.list @@ -0,0 +1,126 @@ +BABEL_OP2_301_10482_20131213_185259_inLine +BABEL_OP2_301_10482_20131213_185259_outLine +BABEL_OP2_301_11681_20131121_134611_inLine +BABEL_OP2_301_11681_20131121_134611_outLine +BABEL_OP2_301_12220_20131205_210711_inLine +BABEL_OP2_301_12220_20131205_210711_outLine +BABEL_OP2_301_14229_20131129_210206_inLine +BABEL_OP2_301_14229_20131129_210206_outLine +BABEL_OP2_301_14807_20140214_134654_inLine +BABEL_OP2_301_14807_20140214_134654_outLine +BABEL_OP2_301_15163_20131203_221053_inLine +BABEL_OP2_301_15163_20131203_221053_outLine +BABEL_OP2_301_17113_20140202_140244_inLine +BABEL_OP2_301_17113_20140202_140244_outLine +BABEL_OP2_301_18380_20131208_205543_inLine +BABEL_OP2_301_18380_20131208_205543_outLine +BABEL_OP2_301_20437_20140223_171247_inLine +BABEL_OP2_301_20437_20140223_171247_outLine +BABEL_OP2_301_22216_20131024_101416_inLine +BABEL_OP2_301_22216_20131024_101416_outLine +BABEL_OP2_301_28595_20140214_164503_inLine +BABEL_OP2_301_28595_20140214_164503_outLine +BABEL_OP2_301_28945_20131123_183004_inLine +BABEL_OP2_301_28945_20131123_183004_outLine +BABEL_OP2_301_32708_20131122_134009_inLine +BABEL_OP2_301_32708_20131122_134009_outLine +BABEL_OP2_301_32708_20131122_134900_inLine +BABEL_OP2_301_32708_20131122_134900_outLine +BABEL_OP2_301_33175_20131019_231650_inLine +BABEL_OP2_301_33175_20131019_231650_outLine +BABEL_OP2_301_33216_20140131_183344_inLine +BABEL_OP2_301_33216_20140131_183344_outLine +BABEL_OP2_301_33355_20131021_130538_inLine +BABEL_OP2_301_33355_20131021_130538_outLine +BABEL_OP2_301_34106_20131020_192105_inLine +BABEL_OP2_301_34106_20131020_192105_outLine +BABEL_OP2_301_34811_20131204_195646_inLine +BABEL_OP2_301_34811_20131204_195646_outLine +BABEL_OP2_301_37228_20140109_190716_inLine +BABEL_OP2_301_37228_20140109_190716_outLine +BABEL_OP2_301_38554_20131024_134203_inLine +BABEL_OP2_301_38554_20131024_134203_outLine +BABEL_OP2_301_39680_20140115_193747_inLine +BABEL_OP2_301_39680_20140115_193747_outLine +BABEL_OP2_301_41680_20131016_202751_inLine +BABEL_OP2_301_41680_20131016_202751_outLine +BABEL_OP2_301_43388_20131203_204504_inLine +BABEL_OP2_301_43388_20131203_204504_outLine +BABEL_OP2_301_45559_20140127_145550_inLine +BABEL_OP2_301_45559_20140127_145550_outLine +BABEL_OP2_301_45560_20131104_171401_inLine +BABEL_OP2_301_45560_20131104_171401_outLine +BABEL_OP2_301_46066_20140110_170456_inLine +BABEL_OP2_301_46066_20140110_170456_outLine +BABEL_OP2_301_46268_20131021_142020_inLine +BABEL_OP2_301_46268_20131021_142020_outLine +BABEL_OP2_301_50810_20131025_174542_inLine +BABEL_OP2_301_50810_20131025_174542_outLine +BABEL_OP2_301_52265_20140216_163445_inLine +BABEL_OP2_301_52265_20140216_163445_outLine +BABEL_OP2_301_54162_20131210_170602_inLine +BABEL_OP2_301_54162_20131210_170602_outLine +BABEL_OP2_301_54953_20131127_005926_inLine +BABEL_OP2_301_54953_20131127_005926_outLine +BABEL_OP2_301_55818_20131110_111534_inLine +BABEL_OP2_301_55818_20131110_111534_outLine +BABEL_OP2_301_55818_20131110_121457_inLine +BABEL_OP2_301_55818_20131110_121457_outLine +BABEL_OP2_301_56306_20140108_175350_inLine +BABEL_OP2_301_56306_20140108_175350_outLine +BABEL_OP2_301_64902_20140123_130547_inLine +BABEL_OP2_301_64902_20140123_130547_outLine +BABEL_OP2_301_65298_20140115_174724_inLine +BABEL_OP2_301_65298_20140115_174724_outLine +BABEL_OP2_301_67213_20140220_182122_inLine +BABEL_OP2_301_67213_20140220_182122_outLine +BABEL_OP2_301_67622_20131023_150210_inLine +BABEL_OP2_301_67622_20131023_150210_outLine +BABEL_OP2_301_68924_20131210_145459_inLine +BABEL_OP2_301_68924_20131210_145459_outLine +BABEL_OP2_301_69746_20140108_182845_inLine +BABEL_OP2_301_69746_20140108_182845_outLine +BABEL_OP2_301_71263_20140205_210654_inLine +BABEL_OP2_301_71263_20140205_210654_outLine +BABEL_OP2_301_72733_20140126_155036_inLine +BABEL_OP2_301_72733_20140126_155036_outLine +BABEL_OP2_301_73042_20131114_135827_inLine +BABEL_OP2_301_73042_20131114_135827_outLine +BABEL_OP2_301_73591_20131016_200144_inLine +BABEL_OP2_301_73591_20131016_200144_outLine +BABEL_OP2_301_73591_20131016_201810_inLine +BABEL_OP2_301_73591_20131016_201810_outLine +BABEL_OP2_301_75869_20140122_141000_inLine +BABEL_OP2_301_75869_20140122_141000_outLine +BABEL_OP2_301_78482_20131227_163840_inLine +BABEL_OP2_301_78482_20131227_163840_outLine +BABEL_OP2_301_81810_20131214_030628_inLine +BABEL_OP2_301_81810_20131214_030628_outLine +BABEL_OP2_301_81854_20140127_151841_inLine +BABEL_OP2_301_81854_20140127_151841_outLine +BABEL_OP2_301_84547_20131025_143053_inLine +BABEL_OP2_301_84547_20131025_143053_outLine +BABEL_OP2_301_85248_20140115_144605_inLine +BABEL_OP2_301_85248_20140115_144605_outLine +BABEL_OP2_301_87545_20140125_194128_inLine +BABEL_OP2_301_87545_20140125_194128_outLine +BABEL_OP2_301_91372_20140126_145526_inLine +BABEL_OP2_301_91372_20140126_145526_outLine +BABEL_OP2_301_91463_20140206_144651_inLine +BABEL_OP2_301_91463_20140206_144651_outLine +BABEL_OP2_301_91884_20140118_220510_inLine +BABEL_OP2_301_91884_20140118_220510_outLine +BABEL_OP2_301_93475_20131119_183619_inLine +BABEL_OP2_301_93475_20131119_183619_outLine +BABEL_OP2_301_93515_20140125_212344_inLine +BABEL_OP2_301_93515_20140125_212344_outLine +BABEL_OP2_301_94409_20131204_145545_inLine +BABEL_OP2_301_94409_20131204_145545_outLine +BABEL_OP2_301_95399_20131206_150920_inLine +BABEL_OP2_301_95399_20131206_150920_outLine +BABEL_OP2_301_96190_20131122_024403_inLine +BABEL_OP2_301_96190_20131122_024403_outLine +BABEL_OP2_301_99202_20131226_202006_inLine +BABEL_OP2_301_99202_20131226_202006_outLine +BABEL_OP2_301_99955_20140110_162703_inLine +BABEL_OP2_301_99955_20140110_162703_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list new file mode 100644 index 00000000000..690d88bbe06 --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list @@ -0,0 +1,376 @@ +BABEL_OP2_301_10647_20140122_182555_inLine +BABEL_OP2_301_10647_20140122_182555_outLine +BABEL_OP2_301_11581_20140214_131627_inLine +BABEL_OP2_301_11581_20140214_131627_outLine +BABEL_OP2_301_11673_20131025_130227_inLine +BABEL_OP2_301_11673_20131025_130227_outLine +BABEL_OP2_301_12846_20140130_151205_inLine +BABEL_OP2_301_12846_20140130_151205_outLine +BABEL_OP2_301_13184_20140106_154520_inLine +BABEL_OP2_301_13184_20140106_154520_outLine +BABEL_OP2_301_13776_20140129_150321_inLine +BABEL_OP2_301_13776_20140129_150321_outLine +BABEL_OP2_301_14137_20131129_173610_inLine +BABEL_OP2_301_14137_20131129_173610_outLine +BABEL_OP2_301_14729_20140120_213036_inLine +BABEL_OP2_301_14729_20140120_213036_outLine +BABEL_OP2_301_14929_20131204_221803_inLine +BABEL_OP2_301_14929_20131204_221803_outLine +BABEL_OP2_301_15466_20140220_163311_inLine +BABEL_OP2_301_15466_20140220_163311_outLine +BABEL_OP2_301_15617_20140216_200848_inLine +BABEL_OP2_301_15617_20140216_200848_outLine +BABEL_OP2_301_15730_20131103_191134_inLine +BABEL_OP2_301_15730_20131103_191134_outLine +BABEL_OP2_301_16149_20131122_182440_inLine +BABEL_OP2_301_16149_20131122_182440_outLine +BABEL_OP2_301_16749_20140109_145017_inLine +BABEL_OP2_301_16749_20140109_145017_outLine +BABEL_OP2_301_18118_20140214_195210_inLine +BABEL_OP2_301_18118_20140214_195210_outLine +BABEL_OP2_301_19589_20140126_182029_inLine +BABEL_OP2_301_19589_20140126_182029_outLine +BABEL_OP2_301_20133_20131017_002355_inLine +BABEL_OP2_301_20133_20131017_002355_outLine +BABEL_OP2_301_20922_20140103_201925_inLine +BABEL_OP2_301_20922_20140103_201925_outLine +BABEL_OP2_301_21206_20131113_200040_inLine +BABEL_OP2_301_21206_20131113_200040_outLine +BABEL_OP2_301_21435_20140123_135222_inLine +BABEL_OP2_301_21435_20140123_135222_outLine +BABEL_OP2_301_22321_20131101_141744_inLine +BABEL_OP2_301_22321_20131101_141744_outLine +BABEL_OP2_301_22494_20131210_134219_inLine +BABEL_OP2_301_22494_20131210_134219_outLine +BABEL_OP2_301_23893_20140214_163728_inLine +BABEL_OP2_301_23893_20140214_163728_outLine +BABEL_OP2_301_24239_20140126_152805_inLine +BABEL_OP2_301_24239_20140126_152805_outLine +BABEL_OP2_301_24679_20131023_181905_inLine +BABEL_OP2_301_24679_20131023_181905_outLine +BABEL_OP2_301_24982_20131125_001842_inLine +BABEL_OP2_301_24982_20131125_001842_outLine +BABEL_OP2_301_25719_20140104_183539_inLine +BABEL_OP2_301_25719_20140104_183539_outLine +BABEL_OP2_301_25767_20131112_191047_inLine +BABEL_OP2_301_25767_20131112_191047_outLine +BABEL_OP2_301_26072_20140110_183220_inLine +BABEL_OP2_301_26072_20140110_183220_outLine +BABEL_OP2_301_28012_20140103_194242_inLine +BABEL_OP2_301_28012_20140103_194242_outLine +BABEL_OP2_301_28303_20131125_142043_inLine +BABEL_OP2_301_28303_20131125_142043_outLine +BABEL_OP2_301_28600_20131220_191009_inLine +BABEL_OP2_301_28600_20131220_191009_outLine +BABEL_OP2_301_28814_20140109_152108_inLine +BABEL_OP2_301_28814_20140109_152108_outLine +BABEL_OP2_301_29023_20131123_171357_inLine +BABEL_OP2_301_29023_20131123_171357_outLine +BABEL_OP2_301_29023_20131123_173406_inLine +BABEL_OP2_301_29023_20131123_173406_outLine +BABEL_OP2_301_29168_20131018_141724_inLine +BABEL_OP2_301_29168_20131018_141724_outLine +BABEL_OP2_301_29323_20140112_184600_inLine +BABEL_OP2_301_29323_20140112_184600_outLine +BABEL_OP2_301_29404_20140123_123004_inLine +BABEL_OP2_301_29404_20140123_123004_outLine +BABEL_OP2_301_30645_20131116_183545_inLine +BABEL_OP2_301_30645_20131116_183545_outLine +BABEL_OP2_301_31490_20131130_190602_inLine +BABEL_OP2_301_31490_20131130_190602_outLine +BABEL_OP2_301_32630_20140127_181615_inLine +BABEL_OP2_301_32630_20140127_181615_outLine +BABEL_OP2_301_32998_20140206_212018_inLine +BABEL_OP2_301_32998_20140206_212018_outLine +BABEL_OP2_301_33672_20131112_192407_inLine +BABEL_OP2_301_33672_20131112_192407_outLine +BABEL_OP2_301_33672_20131112_194343_inLine +BABEL_OP2_301_33672_20131112_194343_outLine +BABEL_OP2_301_33806_20140204_204611_inLine +BABEL_OP2_301_33806_20140204_204611_outLine +BABEL_OP2_301_34019_20140220_175645_inLine +BABEL_OP2_301_34019_20140220_175645_outLine +BABEL_OP2_301_34629_20140224_174043_inLine +BABEL_OP2_301_34629_20140224_174043_outLine +BABEL_OP2_301_34860_20140224_170732_inLine +BABEL_OP2_301_34860_20140224_170732_outLine +BABEL_OP2_301_36039_20140121_002746_inLine +BABEL_OP2_301_36039_20140121_002746_outLine +BABEL_OP2_301_36669_20131208_191649_inLine +BABEL_OP2_301_36669_20131208_191649_outLine +BABEL_OP2_301_37598_20140206_190701_inLine +BABEL_OP2_301_37598_20140206_190701_outLine +BABEL_OP2_301_37682_20131128_161814_inLine +BABEL_OP2_301_37682_20131128_161814_outLine +BABEL_OP2_301_38323_20140205_184350_inLine +BABEL_OP2_301_38323_20140205_184350_outLine +BABEL_OP2_301_39006_20140204_195257_inLine +BABEL_OP2_301_39006_20140204_195257_outLine +BABEL_OP2_301_39099_20140127_003852_inLine +BABEL_OP2_301_39099_20140127_003852_outLine +BABEL_OP2_301_39307_20131024_204807_inLine +BABEL_OP2_301_39307_20131024_204807_outLine +BABEL_OP2_301_39638_20140224_155231_inLine +BABEL_OP2_301_39638_20140224_155231_outLine +BABEL_OP2_301_39848_20131204_163640_inLine +BABEL_OP2_301_39848_20131204_163640_outLine +BABEL_OP2_301_41469_20131123_114935_inLine +BABEL_OP2_301_41469_20131123_114935_outLine +BABEL_OP2_301_41469_20131123_115625_inLine +BABEL_OP2_301_41469_20131123_115625_outLine +BABEL_OP2_301_41685_20140223_155438_inLine +BABEL_OP2_301_41685_20140223_155438_outLine +BABEL_OP2_301_42029_20140115_163832_inLine +BABEL_OP2_301_42029_20140115_163832_outLine +BABEL_OP2_301_42434_20131127_190632_inLine +BABEL_OP2_301_42434_20131127_190632_outLine +BABEL_OP2_301_43115_20140125_145846_inLine +BABEL_OP2_301_43115_20140125_145846_outLine +BABEL_OP2_301_43323_20140223_191949_inLine +BABEL_OP2_301_43323_20140223_191949_outLine +BABEL_OP2_301_44347_20140102_122651_inLine +BABEL_OP2_301_44347_20140102_122651_outLine +BABEL_OP2_301_44619_20131122_014112_inLine +BABEL_OP2_301_44619_20131122_014112_outLine +BABEL_OP2_301_45121_20140127_190059_inLine +BABEL_OP2_301_45121_20140127_190059_outLine +BABEL_OP2_301_45851_20140127_224015_inLine +BABEL_OP2_301_45851_20140127_224015_outLine +BABEL_OP2_301_46310_20131104_001007_inLine +BABEL_OP2_301_46310_20131104_001007_outLine +BABEL_OP2_301_49001_20131126_004357_inLine +BABEL_OP2_301_49001_20131126_004357_outLine +BABEL_OP2_301_49216_20131020_181355_inLine +BABEL_OP2_301_49216_20131020_181355_outLine +BABEL_OP2_301_49945_20140127_184032_inLine +BABEL_OP2_301_49945_20140127_184032_outLine +BABEL_OP2_301_50175_20131019_212339_inLine +BABEL_OP2_301_50175_20131019_212339_outLine +BABEL_OP2_301_51484_20131220_211835_inLine +BABEL_OP2_301_51484_20131220_211835_outLine +BABEL_OP2_301_51540_20140106_172711_inLine +BABEL_OP2_301_51540_20140106_172711_outLine +BABEL_OP2_301_51701_20140205_193018_inLine +BABEL_OP2_301_51701_20140205_193018_outLine +BABEL_OP2_301_51968_20131204_190129_inLine +BABEL_OP2_301_51968_20131204_190129_outLine +BABEL_OP2_301_52272_20131030_202958_inLine +BABEL_OP2_301_52272_20131030_202958_outLine +BABEL_OP2_301_52381_20140109_155159_inLine +BABEL_OP2_301_52381_20140109_155159_outLine +BABEL_OP2_301_52404_20131211_192143_inLine +BABEL_OP2_301_52404_20131211_192143_outLine +BABEL_OP2_301_52804_20131122_192606_inLine +BABEL_OP2_301_52804_20131122_192606_outLine +BABEL_OP2_301_53842_20131205_212824_inLine +BABEL_OP2_301_53842_20131205_212824_outLine +BABEL_OP2_301_53842_20131205_214030_inLine +BABEL_OP2_301_53842_20131205_214030_outLine +BABEL_OP2_301_54074_20131204_200954_inLine +BABEL_OP2_301_54074_20131204_200954_outLine +BABEL_OP2_301_54530_20131218_184644_inLine +BABEL_OP2_301_54530_20131218_184644_outLine +BABEL_OP2_301_54567_20131205_193927_inLine +BABEL_OP2_301_54567_20131205_193927_outLine +BABEL_OP2_301_54827_20140126_184228_inLine +BABEL_OP2_301_54827_20140126_184228_outLine +BABEL_OP2_301_55106_20140119_161343_inLine +BABEL_OP2_301_55106_20140119_161343_outLine +BABEL_OP2_301_55349_20140121_152059_inLine +BABEL_OP2_301_55349_20140121_152059_outLine +BABEL_OP2_301_55381_20140103_163729_inLine +BABEL_OP2_301_55381_20140103_163729_outLine +BABEL_OP2_301_57116_20131129_012420_inLine +BABEL_OP2_301_57116_20131129_012420_outLine +BABEL_OP2_301_57233_20140224_172256_inLine +BABEL_OP2_301_57233_20140224_172256_outLine +BABEL_OP2_301_57542_20140122_150942_inLine +BABEL_OP2_301_57542_20140122_150942_outLine +BABEL_OP2_301_57566_20140106_150720_inLine +BABEL_OP2_301_57566_20140106_150720_outLine +BABEL_OP2_301_58006_20140122_203731_inLine +BABEL_OP2_301_58006_20140122_203731_outLine +BABEL_OP2_301_58313_20140207_172512_inLine +BABEL_OP2_301_58313_20140207_172512_outLine +BABEL_OP2_301_58926_20131124_131005_inLine +BABEL_OP2_301_58926_20131124_131005_outLine +BABEL_OP2_301_59039_20140220_172820_inLine +BABEL_OP2_301_59039_20140220_172820_outLine +BABEL_OP2_301_59078_20140206_221105_inLine +BABEL_OP2_301_59078_20140206_221105_outLine +BABEL_OP2_301_59549_20131115_144344_inLine +BABEL_OP2_301_59549_20131115_144344_outLine +BABEL_OP2_301_59549_20131115_145934_inLine +BABEL_OP2_301_59549_20131115_145934_outLine +BABEL_OP2_301_59928_20131208_181057_inLine +BABEL_OP2_301_59928_20131208_181057_outLine +BABEL_OP2_301_60436_20140126_184303_inLine +BABEL_OP2_301_60436_20140126_184303_outLine +BABEL_OP2_301_60458_20140127_174755_inLine +BABEL_OP2_301_60458_20140127_174755_outLine +BABEL_OP2_301_60474_20131125_202818_inLine +BABEL_OP2_301_60474_20131125_202818_outLine +BABEL_OP2_301_60477_20140131_142240_inLine +BABEL_OP2_301_60477_20140131_142240_outLine +BABEL_OP2_301_60498_20140128_144917_inLine +BABEL_OP2_301_60498_20140128_144917_outLine +BABEL_OP2_301_60626_20131123_194530_inLine +BABEL_OP2_301_60626_20131123_194530_outLine +BABEL_OP2_301_61440_20140129_162338_inLine +BABEL_OP2_301_61440_20140129_162338_outLine +BABEL_OP2_301_62047_20131223_201629_inLine +BABEL_OP2_301_62047_20131223_201629_outLine +BABEL_OP2_301_62734_20131127_125913_inLine +BABEL_OP2_301_62734_20131127_125913_outLine +BABEL_OP2_301_62800_20131023_133254_inLine +BABEL_OP2_301_62800_20131023_133254_outLine +BABEL_OP2_301_63787_20131112_234133_inLine +BABEL_OP2_301_63787_20131112_234133_outLine +BABEL_OP2_301_63906_20140122_195218_inLine +BABEL_OP2_301_63906_20140122_195218_outLine +BABEL_OP2_301_64768_20131129_183309_inLine +BABEL_OP2_301_64768_20131129_183309_outLine +BABEL_OP2_301_65466_20140122_211719_inLine +BABEL_OP2_301_65466_20140122_211719_outLine +BABEL_OP2_301_66045_20131203_142944_inLine +BABEL_OP2_301_66045_20131203_142944_outLine +BABEL_OP2_301_66361_20140223_153258_inLine +BABEL_OP2_301_66361_20140223_153258_outLine +BABEL_OP2_301_66916_20131023_223807_inLine +BABEL_OP2_301_66916_20131023_223807_outLine +BABEL_OP2_301_67152_20140119_212917_inLine +BABEL_OP2_301_67152_20140119_212917_outLine +BABEL_OP2_301_68182_20140115_183030_inLine +BABEL_OP2_301_68182_20140115_183030_outLine +BABEL_OP2_301_69096_20140128_171512_inLine +BABEL_OP2_301_69096_20140128_171512_outLine +BABEL_OP2_301_69937_20140131_181058_inLine +BABEL_OP2_301_69937_20140131_181058_outLine +BABEL_OP2_301_69992_20131110_135349_inLine +BABEL_OP2_301_69992_20131110_135349_outLine +BABEL_OP2_301_70386_20140102_173141_inLine +BABEL_OP2_301_70386_20140102_173141_outLine +BABEL_OP2_301_71121_20140223_161906_inLine +BABEL_OP2_301_71121_20140223_161906_outLine +BABEL_OP2_301_72844_20131023_180119_inLine +BABEL_OP2_301_72844_20131023_180119_outLine +BABEL_OP2_301_73005_20140126_193903_inLine +BABEL_OP2_301_73005_20140126_193903_outLine +BABEL_OP2_301_73258_20131203_200331_inLine +BABEL_OP2_301_73258_20131203_200331_outLine +BABEL_OP2_301_73485_20140128_210522_inLine +BABEL_OP2_301_73485_20140128_210522_outLine +BABEL_OP2_301_73549_20140131_160208_inLine +BABEL_OP2_301_73549_20140131_160208_outLine +BABEL_OP2_301_73964_20140214_161434_inLine +BABEL_OP2_301_73964_20140214_161434_outLine +BABEL_OP2_301_74886_20131102_122938_inLine +BABEL_OP2_301_74886_20131102_122938_outLine +BABEL_OP2_301_75261_20131226_160602_inLine +BABEL_OP2_301_75261_20131226_160602_outLine +BABEL_OP2_301_75981_20140127_143431_inLine +BABEL_OP2_301_75981_20140127_143431_outLine +BABEL_OP2_301_76155_20131203_185301_inLine +BABEL_OP2_301_76155_20131203_185301_outLine +BABEL_OP2_301_77146_20131023_185146_inLine +BABEL_OP2_301_77146_20131023_185146_outLine +BABEL_OP2_301_77427_20131124_013134_inLine +BABEL_OP2_301_77427_20131124_013134_outLine +BABEL_OP2_301_77427_20131124_014748_inLine +BABEL_OP2_301_77427_20131124_014748_outLine +BABEL_OP2_301_77744_20131117_154739_inLine +BABEL_OP2_301_77744_20131117_154739_outLine +BABEL_OP2_301_78543_20140131_010053_inLine +BABEL_OP2_301_78543_20140131_010053_outLine +BABEL_OP2_301_78743_20131220_201406_inLine +BABEL_OP2_301_78743_20131220_201406_outLine +BABEL_OP2_301_78943_20131120_175430_inLine +BABEL_OP2_301_78943_20131120_175430_outLine +BABEL_OP2_301_79451_20131125_114859_inLine +BABEL_OP2_301_79451_20131125_114859_outLine +BABEL_OP2_301_81622_20131204_193304_inLine +BABEL_OP2_301_81622_20131204_193304_outLine +BABEL_OP2_301_82089_20131208_202028_inLine +BABEL_OP2_301_82089_20131208_202028_outLine +BABEL_OP2_301_82425_20131113_010203_inLine +BABEL_OP2_301_82425_20131113_010203_outLine +BABEL_OP2_301_82626_20140131_233635_inLine +BABEL_OP2_301_82626_20140131_233635_outLine +BABEL_OP2_301_83436_20131116_194233_inLine +BABEL_OP2_301_83436_20131116_194233_outLine +BABEL_OP2_301_83455_20131129_211537_inLine +BABEL_OP2_301_83455_20131129_211537_outLine +BABEL_OP2_301_83455_20131129_212747_inLine +BABEL_OP2_301_83455_20131129_212747_outLine +BABEL_OP2_301_83625_20140224_161632_inLine +BABEL_OP2_301_83625_20140224_161632_outLine +BABEL_OP2_301_84458_20131216_193109_inLine +BABEL_OP2_301_84458_20131216_193109_outLine +BABEL_OP2_301_85322_20131112_183356_inLine +BABEL_OP2_301_85322_20131112_183356_outLine +BABEL_OP2_301_85519_20140103_170652_inLine +BABEL_OP2_301_85519_20140103_170652_outLine +BABEL_OP2_301_86156_20140122_185516_inLine +BABEL_OP2_301_86156_20140122_185516_outLine +BABEL_OP2_301_87470_20131128_003454_inLine +BABEL_OP2_301_87470_20131128_003454_outLine +BABEL_OP2_301_88812_20140126_203311_inLine +BABEL_OP2_301_88812_20140126_203311_outLine +BABEL_OP2_301_88925_20131220_151054_inLine +BABEL_OP2_301_88925_20131220_151054_outLine +BABEL_OP2_301_88938_20140104_195418_inLine +BABEL_OP2_301_88938_20140104_195418_outLine +BABEL_OP2_301_89059_20140109_141228_inLine +BABEL_OP2_301_89059_20140109_141228_outLine +BABEL_OP2_301_89358_20131209_174055_inLine +BABEL_OP2_301_89358_20131209_174055_outLine +BABEL_OP2_301_89665_20131206_143535_inLine +BABEL_OP2_301_89665_20131206_143535_outLine +BABEL_OP2_301_89695_20131203_225429_inLine +BABEL_OP2_301_89695_20131203_225429_outLine +BABEL_OP2_301_89877_20140205_200816_inLine +BABEL_OP2_301_89877_20140205_200816_outLine +BABEL_OP2_301_90709_20131109_170505_inLine +BABEL_OP2_301_90709_20131109_170505_outLine +BABEL_OP2_301_90737_20131206_160650_inLine +BABEL_OP2_301_90737_20131206_160650_outLine +BABEL_OP2_301_91478_20140224_170543_inLine +BABEL_OP2_301_91478_20140224_170543_outLine +BABEL_OP2_301_91760_20140127_183930_inLine +BABEL_OP2_301_91760_20140127_183930_outLine +BABEL_OP2_301_91891_20131213_192340_inLine +BABEL_OP2_301_91891_20131213_192340_outLine +BABEL_OP2_301_91944_20131114_123915_inLine +BABEL_OP2_301_91944_20131114_123915_outLine +BABEL_OP2_301_92809_20131124_142340_inLine +BABEL_OP2_301_92809_20131124_142340_outLine +BABEL_OP2_301_92809_20131124_143817_inLine +BABEL_OP2_301_92809_20131124_143817_outLine +BABEL_OP2_301_92942_20140206_180304_inLine +BABEL_OP2_301_92942_20140206_180304_outLine +BABEL_OP2_301_93153_20131114_144733_inLine +BABEL_OP2_301_93153_20131114_144733_outLine +BABEL_OP2_301_93153_20131114_151704_inLine +BABEL_OP2_301_93153_20131114_151704_outLine +BABEL_OP2_301_93964_20131130_172431_inLine +BABEL_OP2_301_93964_20131130_172431_outLine +BABEL_OP2_301_94978_20140119_185149_inLine +BABEL_OP2_301_94978_20140119_185149_outLine +BABEL_OP2_301_95338_20140127_192317_inLine +BABEL_OP2_301_95338_20140127_192317_outLine +BABEL_OP2_301_95583_20131029_002312_inLine +BABEL_OP2_301_95583_20131029_002312_outLine +BABEL_OP2_301_95663_20131025_134113_inLine +BABEL_OP2_301_95663_20131025_134113_outLine +BABEL_OP2_301_95935_20140103_190515_inLine +BABEL_OP2_301_95935_20140103_190515_outLine +BABEL_OP2_301_96324_20131026_023101_inLine +BABEL_OP2_301_96324_20131026_023101_outLine +BABEL_OP2_301_96376_20140126_140015_inLine +BABEL_OP2_301_96376_20140126_140015_outLine +BABEL_OP2_301_96910_20131124_183403_inLine +BABEL_OP2_301_96910_20131124_183403_outLine +BABEL_OP2_301_97136_20140120_235804_inLine +BABEL_OP2_301_97136_20140120_235804_outLine +BABEL_OP2_301_97588_20131025_185012_inLine +BABEL_OP2_301_97588_20131025_185012_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/training.list b/egs/babel/s5d/conf/lists/301-cebuano/training.list new file mode 100644 index 00000000000..e6ea8dcfeff --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/training.list @@ -0,0 +1,502 @@ +BABEL_OP2_301_10482_20131213_185259_inLine +BABEL_OP2_301_10482_20131213_185259_outLine +BABEL_OP2_301_10647_20140122_182555_inLine +BABEL_OP2_301_10647_20140122_182555_outLine +BABEL_OP2_301_11581_20140214_131627_inLine +BABEL_OP2_301_11581_20140214_131627_outLine +BABEL_OP2_301_11673_20131025_130227_inLine +BABEL_OP2_301_11673_20131025_130227_outLine +BABEL_OP2_301_11681_20131121_134611_inLine +BABEL_OP2_301_11681_20131121_134611_outLine +BABEL_OP2_301_12220_20131205_210711_inLine +BABEL_OP2_301_12220_20131205_210711_outLine +BABEL_OP2_301_12846_20140130_151205_inLine +BABEL_OP2_301_12846_20140130_151205_outLine +BABEL_OP2_301_13184_20140106_154520_inLine +BABEL_OP2_301_13184_20140106_154520_outLine +BABEL_OP2_301_13776_20140129_150321_inLine +BABEL_OP2_301_13776_20140129_150321_outLine +BABEL_OP2_301_14137_20131129_173610_inLine +BABEL_OP2_301_14137_20131129_173610_outLine +BABEL_OP2_301_14229_20131129_210206_inLine +BABEL_OP2_301_14229_20131129_210206_outLine +BABEL_OP2_301_14729_20140120_213036_inLine +BABEL_OP2_301_14729_20140120_213036_outLine +BABEL_OP2_301_14807_20140214_134654_inLine +BABEL_OP2_301_14807_20140214_134654_outLine +BABEL_OP2_301_14929_20131204_221803_inLine +BABEL_OP2_301_14929_20131204_221803_outLine +BABEL_OP2_301_15163_20131203_221053_inLine +BABEL_OP2_301_15163_20131203_221053_outLine +BABEL_OP2_301_15466_20140220_163311_inLine +BABEL_OP2_301_15466_20140220_163311_outLine +BABEL_OP2_301_15617_20140216_200848_inLine +BABEL_OP2_301_15617_20140216_200848_outLine +BABEL_OP2_301_15730_20131103_191134_inLine +BABEL_OP2_301_15730_20131103_191134_outLine +BABEL_OP2_301_16149_20131122_182440_inLine +BABEL_OP2_301_16149_20131122_182440_outLine +BABEL_OP2_301_16749_20140109_145017_inLine +BABEL_OP2_301_16749_20140109_145017_outLine +BABEL_OP2_301_17113_20140202_140244_inLine +BABEL_OP2_301_17113_20140202_140244_outLine +BABEL_OP2_301_18118_20140214_195210_inLine +BABEL_OP2_301_18118_20140214_195210_outLine +BABEL_OP2_301_18380_20131208_205543_inLine +BABEL_OP2_301_18380_20131208_205543_outLine +BABEL_OP2_301_19589_20140126_182029_inLine +BABEL_OP2_301_19589_20140126_182029_outLine +BABEL_OP2_301_20133_20131017_002355_inLine +BABEL_OP2_301_20133_20131017_002355_outLine +BABEL_OP2_301_20437_20140223_171247_inLine +BABEL_OP2_301_20437_20140223_171247_outLine +BABEL_OP2_301_20922_20140103_201925_inLine +BABEL_OP2_301_20922_20140103_201925_outLine +BABEL_OP2_301_21206_20131113_200040_inLine +BABEL_OP2_301_21206_20131113_200040_outLine +BABEL_OP2_301_21435_20140123_135222_inLine +BABEL_OP2_301_21435_20140123_135222_outLine +BABEL_OP2_301_22216_20131024_101416_inLine +BABEL_OP2_301_22216_20131024_101416_outLine +BABEL_OP2_301_22321_20131101_141744_inLine +BABEL_OP2_301_22321_20131101_141744_outLine +BABEL_OP2_301_22494_20131210_134219_inLine +BABEL_OP2_301_22494_20131210_134219_outLine +BABEL_OP2_301_23893_20140214_163728_inLine +BABEL_OP2_301_23893_20140214_163728_outLine +BABEL_OP2_301_24239_20140126_152805_inLine +BABEL_OP2_301_24239_20140126_152805_outLine +BABEL_OP2_301_24679_20131023_181905_inLine +BABEL_OP2_301_24679_20131023_181905_outLine +BABEL_OP2_301_24982_20131125_001842_inLine +BABEL_OP2_301_24982_20131125_001842_outLine +BABEL_OP2_301_25719_20140104_183539_inLine +BABEL_OP2_301_25719_20140104_183539_outLine +BABEL_OP2_301_25767_20131112_191047_inLine +BABEL_OP2_301_25767_20131112_191047_outLine +BABEL_OP2_301_26072_20140110_183220_inLine +BABEL_OP2_301_26072_20140110_183220_outLine +BABEL_OP2_301_28012_20140103_194242_inLine +BABEL_OP2_301_28012_20140103_194242_outLine +BABEL_OP2_301_28303_20131125_142043_inLine +BABEL_OP2_301_28303_20131125_142043_outLine +BABEL_OP2_301_28595_20140214_164503_inLine +BABEL_OP2_301_28595_20140214_164503_outLine +BABEL_OP2_301_28600_20131220_191009_inLine +BABEL_OP2_301_28600_20131220_191009_outLine +BABEL_OP2_301_28814_20140109_152108_inLine +BABEL_OP2_301_28814_20140109_152108_outLine +BABEL_OP2_301_28945_20131123_183004_inLine +BABEL_OP2_301_28945_20131123_183004_outLine +BABEL_OP2_301_29023_20131123_171357_inLine +BABEL_OP2_301_29023_20131123_171357_outLine +BABEL_OP2_301_29023_20131123_173406_inLine +BABEL_OP2_301_29023_20131123_173406_outLine +BABEL_OP2_301_29168_20131018_141724_inLine +BABEL_OP2_301_29168_20131018_141724_outLine +BABEL_OP2_301_29323_20140112_184600_inLine +BABEL_OP2_301_29323_20140112_184600_outLine +BABEL_OP2_301_29404_20140123_123004_inLine +BABEL_OP2_301_29404_20140123_123004_outLine +BABEL_OP2_301_30645_20131116_183545_inLine +BABEL_OP2_301_30645_20131116_183545_outLine +BABEL_OP2_301_31490_20131130_190602_inLine +BABEL_OP2_301_31490_20131130_190602_outLine +BABEL_OP2_301_32630_20140127_181615_inLine +BABEL_OP2_301_32630_20140127_181615_outLine +BABEL_OP2_301_32708_20131122_134009_inLine +BABEL_OP2_301_32708_20131122_134009_outLine +BABEL_OP2_301_32708_20131122_134900_inLine +BABEL_OP2_301_32708_20131122_134900_outLine +BABEL_OP2_301_32998_20140206_212018_inLine +BABEL_OP2_301_32998_20140206_212018_outLine +BABEL_OP2_301_33175_20131019_231650_inLine +BABEL_OP2_301_33175_20131019_231650_outLine +BABEL_OP2_301_33216_20140131_183344_inLine +BABEL_OP2_301_33216_20140131_183344_outLine +BABEL_OP2_301_33355_20131021_130538_inLine +BABEL_OP2_301_33355_20131021_130538_outLine +BABEL_OP2_301_33672_20131112_192407_inLine +BABEL_OP2_301_33672_20131112_192407_outLine +BABEL_OP2_301_33672_20131112_194343_inLine +BABEL_OP2_301_33672_20131112_194343_outLine +BABEL_OP2_301_33806_20140204_204611_inLine +BABEL_OP2_301_33806_20140204_204611_outLine +BABEL_OP2_301_34019_20140220_175645_inLine +BABEL_OP2_301_34019_20140220_175645_outLine +BABEL_OP2_301_34106_20131020_192105_inLine +BABEL_OP2_301_34106_20131020_192105_outLine +BABEL_OP2_301_34629_20140224_174043_inLine +BABEL_OP2_301_34629_20140224_174043_outLine +BABEL_OP2_301_34811_20131204_195646_inLine +BABEL_OP2_301_34811_20131204_195646_outLine +BABEL_OP2_301_34860_20140224_170732_inLine +BABEL_OP2_301_34860_20140224_170732_outLine +BABEL_OP2_301_36039_20140121_002746_inLine +BABEL_OP2_301_36039_20140121_002746_outLine +BABEL_OP2_301_36669_20131208_191649_inLine +BABEL_OP2_301_36669_20131208_191649_outLine +BABEL_OP2_301_37228_20140109_190716_inLine +BABEL_OP2_301_37228_20140109_190716_outLine +BABEL_OP2_301_37598_20140206_190701_inLine +BABEL_OP2_301_37598_20140206_190701_outLine +BABEL_OP2_301_37682_20131128_161814_inLine +BABEL_OP2_301_37682_20131128_161814_outLine +BABEL_OP2_301_38323_20140205_184350_inLine +BABEL_OP2_301_38323_20140205_184350_outLine +BABEL_OP2_301_38554_20131024_134203_inLine +BABEL_OP2_301_38554_20131024_134203_outLine +BABEL_OP2_301_39006_20140204_195257_inLine +BABEL_OP2_301_39006_20140204_195257_outLine +BABEL_OP2_301_39099_20140127_003852_inLine +BABEL_OP2_301_39099_20140127_003852_outLine +BABEL_OP2_301_39307_20131024_204807_inLine +BABEL_OP2_301_39307_20131024_204807_outLine +BABEL_OP2_301_39638_20140224_155231_inLine +BABEL_OP2_301_39638_20140224_155231_outLine +BABEL_OP2_301_39680_20140115_193747_inLine +BABEL_OP2_301_39680_20140115_193747_outLine +BABEL_OP2_301_39848_20131204_163640_inLine +BABEL_OP2_301_39848_20131204_163640_outLine +BABEL_OP2_301_41469_20131123_114935_inLine +BABEL_OP2_301_41469_20131123_114935_outLine +BABEL_OP2_301_41469_20131123_115625_inLine +BABEL_OP2_301_41469_20131123_115625_outLine +BABEL_OP2_301_41680_20131016_202751_inLine +BABEL_OP2_301_41680_20131016_202751_outLine +BABEL_OP2_301_41685_20140223_155438_inLine +BABEL_OP2_301_41685_20140223_155438_outLine +BABEL_OP2_301_42029_20140115_163832_inLine +BABEL_OP2_301_42029_20140115_163832_outLine +BABEL_OP2_301_42434_20131127_190632_inLine +BABEL_OP2_301_42434_20131127_190632_outLine +BABEL_OP2_301_43115_20140125_145846_inLine +BABEL_OP2_301_43115_20140125_145846_outLine +BABEL_OP2_301_43323_20140223_191949_inLine +BABEL_OP2_301_43323_20140223_191949_outLine +BABEL_OP2_301_43388_20131203_204504_inLine +BABEL_OP2_301_43388_20131203_204504_outLine +BABEL_OP2_301_44347_20140102_122651_inLine +BABEL_OP2_301_44347_20140102_122651_outLine +BABEL_OP2_301_44619_20131122_014112_inLine +BABEL_OP2_301_44619_20131122_014112_outLine +BABEL_OP2_301_45121_20140127_190059_inLine +BABEL_OP2_301_45121_20140127_190059_outLine +BABEL_OP2_301_45559_20140127_145550_inLine +BABEL_OP2_301_45559_20140127_145550_outLine +BABEL_OP2_301_45560_20131104_171401_inLine +BABEL_OP2_301_45560_20131104_171401_outLine +BABEL_OP2_301_45851_20140127_224015_inLine +BABEL_OP2_301_45851_20140127_224015_outLine +BABEL_OP2_301_46066_20140110_170456_inLine +BABEL_OP2_301_46066_20140110_170456_outLine +BABEL_OP2_301_46268_20131021_142020_inLine +BABEL_OP2_301_46268_20131021_142020_outLine +BABEL_OP2_301_46310_20131104_001007_inLine +BABEL_OP2_301_46310_20131104_001007_outLine +BABEL_OP2_301_49001_20131126_004357_inLine +BABEL_OP2_301_49001_20131126_004357_outLine +BABEL_OP2_301_49216_20131020_181355_inLine +BABEL_OP2_301_49216_20131020_181355_outLine +BABEL_OP2_301_49945_20140127_184032_inLine +BABEL_OP2_301_49945_20140127_184032_outLine +BABEL_OP2_301_50175_20131019_212339_inLine +BABEL_OP2_301_50175_20131019_212339_outLine +BABEL_OP2_301_50810_20131025_174542_inLine +BABEL_OP2_301_50810_20131025_174542_outLine +BABEL_OP2_301_51484_20131220_211835_inLine +BABEL_OP2_301_51484_20131220_211835_outLine +BABEL_OP2_301_51540_20140106_172711_inLine +BABEL_OP2_301_51540_20140106_172711_outLine +BABEL_OP2_301_51701_20140205_193018_inLine +BABEL_OP2_301_51701_20140205_193018_outLine +BABEL_OP2_301_51968_20131204_190129_inLine +BABEL_OP2_301_51968_20131204_190129_outLine +BABEL_OP2_301_52265_20140216_163445_inLine +BABEL_OP2_301_52265_20140216_163445_outLine +BABEL_OP2_301_52272_20131030_202958_inLine +BABEL_OP2_301_52272_20131030_202958_outLine +BABEL_OP2_301_52381_20140109_155159_inLine +BABEL_OP2_301_52381_20140109_155159_outLine +BABEL_OP2_301_52404_20131211_192143_inLine +BABEL_OP2_301_52404_20131211_192143_outLine +BABEL_OP2_301_52804_20131122_192606_inLine +BABEL_OP2_301_52804_20131122_192606_outLine +BABEL_OP2_301_53842_20131205_212824_inLine +BABEL_OP2_301_53842_20131205_212824_outLine +BABEL_OP2_301_53842_20131205_214030_inLine +BABEL_OP2_301_53842_20131205_214030_outLine +BABEL_OP2_301_54074_20131204_200954_inLine +BABEL_OP2_301_54074_20131204_200954_outLine +BABEL_OP2_301_54162_20131210_170602_inLine +BABEL_OP2_301_54162_20131210_170602_outLine +BABEL_OP2_301_54530_20131218_184644_inLine +BABEL_OP2_301_54530_20131218_184644_outLine +BABEL_OP2_301_54567_20131205_193927_inLine +BABEL_OP2_301_54567_20131205_193927_outLine +BABEL_OP2_301_54827_20140126_184228_inLine +BABEL_OP2_301_54827_20140126_184228_outLine +BABEL_OP2_301_54953_20131127_005926_inLine +BABEL_OP2_301_54953_20131127_005926_outLine +BABEL_OP2_301_55106_20140119_161343_inLine +BABEL_OP2_301_55106_20140119_161343_outLine +BABEL_OP2_301_55349_20140121_152059_inLine +BABEL_OP2_301_55349_20140121_152059_outLine +BABEL_OP2_301_55381_20140103_163729_inLine +BABEL_OP2_301_55381_20140103_163729_outLine +BABEL_OP2_301_55818_20131110_111534_inLine +BABEL_OP2_301_55818_20131110_111534_outLine +BABEL_OP2_301_55818_20131110_121457_inLine +BABEL_OP2_301_55818_20131110_121457_outLine +BABEL_OP2_301_56306_20140108_175350_inLine +BABEL_OP2_301_56306_20140108_175350_outLine +BABEL_OP2_301_57116_20131129_012420_inLine +BABEL_OP2_301_57116_20131129_012420_outLine +BABEL_OP2_301_57233_20140224_172256_inLine +BABEL_OP2_301_57233_20140224_172256_outLine +BABEL_OP2_301_57542_20140122_150942_inLine +BABEL_OP2_301_57542_20140122_150942_outLine +BABEL_OP2_301_57566_20140106_150720_inLine +BABEL_OP2_301_57566_20140106_150720_outLine +BABEL_OP2_301_58006_20140122_203731_inLine +BABEL_OP2_301_58006_20140122_203731_outLine +BABEL_OP2_301_58313_20140207_172512_inLine +BABEL_OP2_301_58313_20140207_172512_outLine +BABEL_OP2_301_58926_20131124_131005_inLine +BABEL_OP2_301_58926_20131124_131005_outLine +BABEL_OP2_301_59039_20140220_172820_inLine +BABEL_OP2_301_59039_20140220_172820_outLine +BABEL_OP2_301_59078_20140206_221105_inLine +BABEL_OP2_301_59078_20140206_221105_outLine +BABEL_OP2_301_59549_20131115_144344_inLine +BABEL_OP2_301_59549_20131115_144344_outLine +BABEL_OP2_301_59549_20131115_145934_inLine +BABEL_OP2_301_59549_20131115_145934_outLine +BABEL_OP2_301_59928_20131208_181057_inLine +BABEL_OP2_301_59928_20131208_181057_outLine +BABEL_OP2_301_60436_20140126_184303_inLine +BABEL_OP2_301_60436_20140126_184303_outLine +BABEL_OP2_301_60458_20140127_174755_inLine +BABEL_OP2_301_60458_20140127_174755_outLine +BABEL_OP2_301_60474_20131125_202818_inLine +BABEL_OP2_301_60474_20131125_202818_outLine +BABEL_OP2_301_60477_20140131_142240_inLine +BABEL_OP2_301_60477_20140131_142240_outLine +BABEL_OP2_301_60498_20140128_144917_inLine +BABEL_OP2_301_60498_20140128_144917_outLine +BABEL_OP2_301_60626_20131123_194530_inLine +BABEL_OP2_301_60626_20131123_194530_outLine +BABEL_OP2_301_61440_20140129_162338_inLine +BABEL_OP2_301_61440_20140129_162338_outLine +BABEL_OP2_301_62047_20131223_201629_inLine +BABEL_OP2_301_62047_20131223_201629_outLine +BABEL_OP2_301_62734_20131127_125913_inLine +BABEL_OP2_301_62734_20131127_125913_outLine +BABEL_OP2_301_62800_20131023_133254_inLine +BABEL_OP2_301_62800_20131023_133254_outLine +BABEL_OP2_301_63787_20131112_234133_inLine +BABEL_OP2_301_63787_20131112_234133_outLine +BABEL_OP2_301_63906_20140122_195218_inLine +BABEL_OP2_301_63906_20140122_195218_outLine +BABEL_OP2_301_64768_20131129_183309_inLine +BABEL_OP2_301_64768_20131129_183309_outLine +BABEL_OP2_301_64902_20140123_130547_inLine +BABEL_OP2_301_64902_20140123_130547_outLine +BABEL_OP2_301_65298_20140115_174724_inLine +BABEL_OP2_301_65298_20140115_174724_outLine +BABEL_OP2_301_65466_20140122_211719_inLine +BABEL_OP2_301_65466_20140122_211719_outLine +BABEL_OP2_301_66045_20131203_142944_inLine +BABEL_OP2_301_66045_20131203_142944_outLine +BABEL_OP2_301_66361_20140223_153258_inLine +BABEL_OP2_301_66361_20140223_153258_outLine +BABEL_OP2_301_66916_20131023_223807_inLine +BABEL_OP2_301_66916_20131023_223807_outLine +BABEL_OP2_301_67152_20140119_212917_inLine +BABEL_OP2_301_67152_20140119_212917_outLine +BABEL_OP2_301_67213_20140220_182122_inLine +BABEL_OP2_301_67213_20140220_182122_outLine +BABEL_OP2_301_67622_20131023_150210_inLine +BABEL_OP2_301_67622_20131023_150210_outLine +BABEL_OP2_301_68182_20140115_183030_inLine +BABEL_OP2_301_68182_20140115_183030_outLine +BABEL_OP2_301_68924_20131210_145459_inLine +BABEL_OP2_301_68924_20131210_145459_outLine +BABEL_OP2_301_69096_20140128_171512_inLine +BABEL_OP2_301_69096_20140128_171512_outLine +BABEL_OP2_301_69746_20140108_182845_inLine +BABEL_OP2_301_69746_20140108_182845_outLine +BABEL_OP2_301_69937_20140131_181058_inLine +BABEL_OP2_301_69937_20140131_181058_outLine +BABEL_OP2_301_69992_20131110_135349_inLine +BABEL_OP2_301_69992_20131110_135349_outLine +BABEL_OP2_301_70386_20140102_173141_inLine +BABEL_OP2_301_70386_20140102_173141_outLine +BABEL_OP2_301_71121_20140223_161906_inLine +BABEL_OP2_301_71121_20140223_161906_outLine +BABEL_OP2_301_71263_20140205_210654_inLine +BABEL_OP2_301_71263_20140205_210654_outLine +BABEL_OP2_301_72733_20140126_155036_inLine +BABEL_OP2_301_72733_20140126_155036_outLine +BABEL_OP2_301_72844_20131023_180119_inLine +BABEL_OP2_301_72844_20131023_180119_outLine +BABEL_OP2_301_73005_20140126_193903_inLine +BABEL_OP2_301_73005_20140126_193903_outLine +BABEL_OP2_301_73042_20131114_135827_inLine +BABEL_OP2_301_73042_20131114_135827_outLine +BABEL_OP2_301_73258_20131203_200331_inLine +BABEL_OP2_301_73258_20131203_200331_outLine +BABEL_OP2_301_73485_20140128_210522_inLine +BABEL_OP2_301_73485_20140128_210522_outLine +BABEL_OP2_301_73549_20140131_160208_inLine +BABEL_OP2_301_73549_20140131_160208_outLine +BABEL_OP2_301_73591_20131016_200144_inLine +BABEL_OP2_301_73591_20131016_200144_outLine +BABEL_OP2_301_73591_20131016_201810_inLine +BABEL_OP2_301_73591_20131016_201810_outLine +BABEL_OP2_301_73964_20140214_161434_inLine +BABEL_OP2_301_73964_20140214_161434_outLine +BABEL_OP2_301_74886_20131102_122938_inLine +BABEL_OP2_301_74886_20131102_122938_outLine +BABEL_OP2_301_75261_20131226_160602_inLine +BABEL_OP2_301_75261_20131226_160602_outLine +BABEL_OP2_301_75869_20140122_141000_inLine +BABEL_OP2_301_75869_20140122_141000_outLine +BABEL_OP2_301_75981_20140127_143431_inLine +BABEL_OP2_301_75981_20140127_143431_outLine +BABEL_OP2_301_76155_20131203_185301_inLine +BABEL_OP2_301_76155_20131203_185301_outLine +BABEL_OP2_301_77146_20131023_185146_inLine +BABEL_OP2_301_77146_20131023_185146_outLine +BABEL_OP2_301_77427_20131124_013134_inLine +BABEL_OP2_301_77427_20131124_013134_outLine +BABEL_OP2_301_77427_20131124_014748_inLine +BABEL_OP2_301_77427_20131124_014748_outLine +BABEL_OP2_301_77744_20131117_154739_inLine +BABEL_OP2_301_77744_20131117_154739_outLine +BABEL_OP2_301_78482_20131227_163840_inLine +BABEL_OP2_301_78482_20131227_163840_outLine +BABEL_OP2_301_78543_20140131_010053_inLine +BABEL_OP2_301_78543_20140131_010053_outLine +BABEL_OP2_301_78743_20131220_201406_inLine +BABEL_OP2_301_78743_20131220_201406_outLine +BABEL_OP2_301_78943_20131120_175430_inLine +BABEL_OP2_301_78943_20131120_175430_outLine +BABEL_OP2_301_79451_20131125_114859_inLine +BABEL_OP2_301_79451_20131125_114859_outLine +BABEL_OP2_301_81622_20131204_193304_inLine +BABEL_OP2_301_81622_20131204_193304_outLine +BABEL_OP2_301_81810_20131214_030628_inLine +BABEL_OP2_301_81810_20131214_030628_outLine +BABEL_OP2_301_81854_20140127_151841_inLine +BABEL_OP2_301_81854_20140127_151841_outLine +BABEL_OP2_301_82089_20131208_202028_inLine +BABEL_OP2_301_82089_20131208_202028_outLine +BABEL_OP2_301_82425_20131113_010203_inLine +BABEL_OP2_301_82425_20131113_010203_outLine +BABEL_OP2_301_82626_20140131_233635_inLine +BABEL_OP2_301_82626_20140131_233635_outLine +BABEL_OP2_301_83436_20131116_194233_inLine +BABEL_OP2_301_83436_20131116_194233_outLine +BABEL_OP2_301_83455_20131129_211537_inLine +BABEL_OP2_301_83455_20131129_211537_outLine +BABEL_OP2_301_83455_20131129_212747_inLine +BABEL_OP2_301_83455_20131129_212747_outLine +BABEL_OP2_301_83625_20140224_161632_inLine +BABEL_OP2_301_83625_20140224_161632_outLine +BABEL_OP2_301_84458_20131216_193109_inLine +BABEL_OP2_301_84458_20131216_193109_outLine +BABEL_OP2_301_84547_20131025_143053_inLine +BABEL_OP2_301_84547_20131025_143053_outLine +BABEL_OP2_301_85248_20140115_144605_inLine +BABEL_OP2_301_85248_20140115_144605_outLine +BABEL_OP2_301_85322_20131112_183356_inLine +BABEL_OP2_301_85322_20131112_183356_outLine +BABEL_OP2_301_85519_20140103_170652_inLine +BABEL_OP2_301_85519_20140103_170652_outLine +BABEL_OP2_301_86156_20140122_185516_inLine +BABEL_OP2_301_86156_20140122_185516_outLine +BABEL_OP2_301_87470_20131128_003454_inLine +BABEL_OP2_301_87470_20131128_003454_outLine +BABEL_OP2_301_87545_20140125_194128_inLine +BABEL_OP2_301_87545_20140125_194128_outLine +BABEL_OP2_301_88812_20140126_203311_inLine +BABEL_OP2_301_88812_20140126_203311_outLine +BABEL_OP2_301_88925_20131220_151054_inLine +BABEL_OP2_301_88925_20131220_151054_outLine +BABEL_OP2_301_88938_20140104_195418_inLine +BABEL_OP2_301_88938_20140104_195418_outLine +BABEL_OP2_301_89059_20140109_141228_inLine +BABEL_OP2_301_89059_20140109_141228_outLine +BABEL_OP2_301_89358_20131209_174055_inLine +BABEL_OP2_301_89358_20131209_174055_outLine +BABEL_OP2_301_89665_20131206_143535_inLine +BABEL_OP2_301_89665_20131206_143535_outLine +BABEL_OP2_301_89695_20131203_225429_inLine +BABEL_OP2_301_89695_20131203_225429_outLine +BABEL_OP2_301_89877_20140205_200816_inLine +BABEL_OP2_301_89877_20140205_200816_outLine +BABEL_OP2_301_90709_20131109_170505_inLine +BABEL_OP2_301_90709_20131109_170505_outLine +BABEL_OP2_301_90737_20131206_160650_inLine +BABEL_OP2_301_90737_20131206_160650_outLine +BABEL_OP2_301_91372_20140126_145526_inLine +BABEL_OP2_301_91372_20140126_145526_outLine +BABEL_OP2_301_91463_20140206_144651_inLine +BABEL_OP2_301_91463_20140206_144651_outLine +BABEL_OP2_301_91478_20140224_170543_inLine +BABEL_OP2_301_91478_20140224_170543_outLine +BABEL_OP2_301_91760_20140127_183930_inLine +BABEL_OP2_301_91760_20140127_183930_outLine +BABEL_OP2_301_91884_20140118_220510_inLine +BABEL_OP2_301_91884_20140118_220510_outLine +BABEL_OP2_301_91891_20131213_192340_inLine +BABEL_OP2_301_91891_20131213_192340_outLine +BABEL_OP2_301_91944_20131114_123915_inLine +BABEL_OP2_301_91944_20131114_123915_outLine +BABEL_OP2_301_92809_20131124_142340_inLine +BABEL_OP2_301_92809_20131124_142340_outLine +BABEL_OP2_301_92809_20131124_143817_inLine +BABEL_OP2_301_92809_20131124_143817_outLine +BABEL_OP2_301_92942_20140206_180304_inLine +BABEL_OP2_301_92942_20140206_180304_outLine +BABEL_OP2_301_93153_20131114_144733_inLine +BABEL_OP2_301_93153_20131114_144733_outLine +BABEL_OP2_301_93153_20131114_151704_inLine +BABEL_OP2_301_93153_20131114_151704_outLine +BABEL_OP2_301_93475_20131119_183619_inLine +BABEL_OP2_301_93475_20131119_183619_outLine +BABEL_OP2_301_93515_20140125_212344_inLine +BABEL_OP2_301_93515_20140125_212344_outLine +BABEL_OP2_301_93964_20131130_172431_inLine +BABEL_OP2_301_93964_20131130_172431_outLine +BABEL_OP2_301_94409_20131204_145545_inLine +BABEL_OP2_301_94409_20131204_145545_outLine +BABEL_OP2_301_94978_20140119_185149_inLine +BABEL_OP2_301_94978_20140119_185149_outLine +BABEL_OP2_301_95338_20140127_192317_inLine +BABEL_OP2_301_95338_20140127_192317_outLine +BABEL_OP2_301_95399_20131206_150920_inLine +BABEL_OP2_301_95399_20131206_150920_outLine +BABEL_OP2_301_95583_20131029_002312_inLine +BABEL_OP2_301_95583_20131029_002312_outLine +BABEL_OP2_301_95663_20131025_134113_inLine +BABEL_OP2_301_95663_20131025_134113_outLine +BABEL_OP2_301_95935_20140103_190515_inLine +BABEL_OP2_301_95935_20140103_190515_outLine +BABEL_OP2_301_96190_20131122_024403_inLine +BABEL_OP2_301_96190_20131122_024403_outLine +BABEL_OP2_301_96324_20131026_023101_inLine +BABEL_OP2_301_96324_20131026_023101_outLine +BABEL_OP2_301_96376_20140126_140015_inLine +BABEL_OP2_301_96376_20140126_140015_outLine +BABEL_OP2_301_96910_20131124_183403_inLine +BABEL_OP2_301_96910_20131124_183403_outLine +BABEL_OP2_301_97136_20140120_235804_inLine +BABEL_OP2_301_97136_20140120_235804_outLine +BABEL_OP2_301_97588_20131025_185012_inLine +BABEL_OP2_301_97588_20131025_185012_outLine +BABEL_OP2_301_99202_20131226_202006_inLine +BABEL_OP2_301_99202_20131226_202006_outLine +BABEL_OP2_301_99955_20140110_162703_inLine +BABEL_OP2_301_99955_20140110_162703_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list b/egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list new file mode 100644 index 00000000000..f0033cd47ec --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list @@ -0,0 +1,548 @@ +BABEL_OP2_301_10188_20131015_200722_inLine +BABEL_OP2_301_10188_20131015_200722_outLine +BABEL_OP2_301_10188_20131015_201921_inLine +BABEL_OP2_301_10188_20131015_201921_outLine +BABEL_OP2_301_10974_20131226_183511_inLine +BABEL_OP2_301_10974_20131226_183511_outLine +BABEL_OP2_301_11096_20140129_200046_inLine +BABEL_OP2_301_11096_20140129_200046_outLine +BABEL_OP2_301_11663_20140206_183134_inLine +BABEL_OP2_301_11663_20140206_183134_outLine +BABEL_OP2_301_12851_20131019_233929_inLine +BABEL_OP2_301_12851_20131019_233929_outLine +BABEL_OP2_301_12851_20131026_182349_inLine +BABEL_OP2_301_12851_20131026_182349_outLine +BABEL_OP2_301_13030_20131128_165148_inLine +BABEL_OP2_301_13030_20131128_165148_outLine +BABEL_OP2_301_13040_20131113_202409_outLine +BABEL_OP2_301_13744_20131020_151336_inLine +BABEL_OP2_301_13744_20131020_160305_inLine +BABEL_OP2_301_13909_20140201_154926_inLine +BABEL_OP2_301_13909_20140201_154926_outLine +BABEL_OP2_301_14228_20131223_203905_outLine +BABEL_OP2_301_15024_20131206_182911_inLine +BABEL_OP2_301_15024_20131206_182911_outLine +BABEL_OP2_301_15535_20131212_140356_inLine +BABEL_OP2_301_15535_20131212_140356_outLine +BABEL_OP2_301_15749_20131226_201016_inLine +BABEL_OP2_301_15749_20131226_201016_outLine +BABEL_OP2_301_15902_20131116_134056_inLine +BABEL_OP2_301_15902_20131116_134056_outLine +BABEL_OP2_301_17567_20131223_202218_inLine +BABEL_OP2_301_17567_20131223_202218_outLine +BABEL_OP2_301_17573_20131221_234136_inLine +BABEL_OP2_301_17573_20131221_234136_outLine +BABEL_OP2_301_17751_20140201_163439_inLine +BABEL_OP2_301_17751_20140201_163439_outLine +BABEL_OP2_301_17914_20140114_191137_inLine +BABEL_OP2_301_17914_20140114_191137_outLine +BABEL_OP2_301_18863_20140103_143408_inLine +BABEL_OP2_301_18863_20140103_145207_inLine +BABEL_OP2_301_18939_20131108_215217_outLine +BABEL_OP2_301_19120_20140129_153621_inLine +BABEL_OP2_301_19120_20140129_153621_outLine +BABEL_OP2_301_19545_20131223_225812_inLine +BABEL_OP2_301_19545_20131223_225812_outLine +BABEL_OP2_301_19703_20131203_181434_inLine +BABEL_OP2_301_19703_20131203_181434_outLine +BABEL_OP2_301_20682_20131220_204542_inLine +BABEL_OP2_301_20682_20131220_204542_outLine +BABEL_OP2_301_20738_20140115_135411_inLine +BABEL_OP2_301_20738_20140115_135411_outLine +BABEL_OP2_301_21426_20140216_181528_outLine +BABEL_OP2_301_21426_20140216_182606_outLine +BABEL_OP2_301_21581_20131128_151038_inLine +BABEL_OP2_301_21581_20131128_151038_outLine +BABEL_OP2_301_21794_20131203_210336_inLine +BABEL_OP2_301_21794_20131203_210336_outLine +BABEL_OP2_301_21794_20131203_211241_inLine +BABEL_OP2_301_21794_20131203_211241_outLine +BABEL_OP2_301_21794_20131203_212201_inLine +BABEL_OP2_301_21794_20131203_212201_outLine +BABEL_OP2_301_22170_20140119_235310_inLine +BABEL_OP2_301_22170_20140119_235310_outLine +BABEL_OP2_301_23151_20140115_191742_inLine +BABEL_OP2_301_23151_20140115_191742_outLine +BABEL_OP2_301_23260_20140123_165218_inLine +BABEL_OP2_301_23260_20140123_165218_outLine +BABEL_OP2_301_23681_20140129_150558_inLine +BABEL_OP2_301_23681_20140129_150558_outLine +BABEL_OP2_301_23983_20140125_164849_inLine +BABEL_OP2_301_23983_20140125_164849_outLine +BABEL_OP2_301_24033_20140108_160013_inLine +BABEL_OP2_301_24033_20140108_160013_outLine +BABEL_OP2_301_24470_20140206_191002_inLine +BABEL_OP2_301_24470_20140206_191002_outLine +BABEL_OP2_301_25085_20140204_170633_inLine +BABEL_OP2_301_25085_20140204_170633_outLine +BABEL_OP2_301_25220_20140202_012113_inLine +BABEL_OP2_301_25220_20140202_012113_outLine +BABEL_OP2_301_25698_20140202_155327_inLine +BABEL_OP2_301_25698_20140202_155327_outLine +BABEL_OP2_301_26398_20140125_202344_inLine +BABEL_OP2_301_26398_20140125_202344_outLine +BABEL_OP2_301_26574_20131226_194917_inLine +BABEL_OP2_301_26574_20131226_194917_outLine +BABEL_OP2_301_27203_20140205_212839_inLine +BABEL_OP2_301_27203_20140205_212839_outLine +BABEL_OP2_301_27478_20140120_183015_inLine +BABEL_OP2_301_27478_20140120_183015_outLine +BABEL_OP2_301_28190_20140103_204548_inLine +BABEL_OP2_301_28190_20140103_204548_outLine +BABEL_OP2_301_28190_20140103_211418_inLine +BABEL_OP2_301_28190_20140103_211418_outLine +BABEL_OP2_301_28538_20131206_201510_inLine +BABEL_OP2_301_28538_20131206_201510_outLine +BABEL_OP2_301_28775_20131117_184047_inLine +BABEL_OP2_301_28775_20131117_184047_outLine +BABEL_OP2_301_28775_20131117_184742_inLine +BABEL_OP2_301_28775_20131117_184742_outLine +BABEL_OP2_301_28775_20131117_190311_inLine +BABEL_OP2_301_28775_20131117_190311_outLine +BABEL_OP2_301_29072_20131212_183347_inLine +BABEL_OP2_301_29072_20131212_183347_outLine +BABEL_OP2_301_29076_20140207_194512_inLine +BABEL_OP2_301_29076_20140207_194512_outLine +BABEL_OP2_301_29352_20140131_181124_inLine +BABEL_OP2_301_29352_20140131_181124_outLine +BABEL_OP2_301_29633_20140121_164509_inLine +BABEL_OP2_301_29633_20140121_164509_outLine +BABEL_OP2_301_29643_20140129_200354_outLine +BABEL_OP2_301_29765_20140131_185401_inLine +BABEL_OP2_301_29765_20140131_185401_outLine +BABEL_OP2_301_30253_20131217_200910_inLine +BABEL_OP2_301_30253_20131217_200910_outLine +BABEL_OP2_301_30280_20140210_171213_inLine +BABEL_OP2_301_30280_20140210_171213_outLine +BABEL_OP2_301_30497_20140123_162323_inLine +BABEL_OP2_301_30497_20140123_162323_outLine +BABEL_OP2_301_31109_20131225_234903_inLine +BABEL_OP2_301_31109_20131225_234903_outLine +BABEL_OP2_301_31182_20140102_125318_inLine +BABEL_OP2_301_31182_20140102_125318_outLine +BABEL_OP2_301_31182_20140102_130533_inLine +BABEL_OP2_301_31182_20140102_130533_outLine +BABEL_OP2_301_31184_20131208_184700_inLine +BABEL_OP2_301_31184_20131208_184700_outLine +BABEL_OP2_301_31484_20131210_183412_inLine +BABEL_OP2_301_31484_20131210_183412_outLine +BABEL_OP2_301_31583_20131220_145426_inLine +BABEL_OP2_301_31583_20131220_145426_outLine +BABEL_OP2_301_32048_20140107_184712_inLine +BABEL_OP2_301_32048_20140107_184712_outLine +BABEL_OP2_301_32861_20140110_193920_outLine +BABEL_OP2_301_32872_20140126_181540_inLine +BABEL_OP2_301_32872_20140126_181540_outLine +BABEL_OP2_301_32959_20131218_155238_inLine +BABEL_OP2_301_32959_20131218_155238_outLine +BABEL_OP2_301_33229_20140112_190633_inLine +BABEL_OP2_301_33229_20140112_190633_outLine +BABEL_OP2_301_33251_20140206_154015_inLine +BABEL_OP2_301_33251_20140206_154015_outLine +BABEL_OP2_301_33659_20140223_185752_outLine +BABEL_OP2_301_34336_20131125_162020_inLine +BABEL_OP2_301_34336_20131125_162020_outLine +BABEL_OP2_301_34336_20131125_163318_inLine +BABEL_OP2_301_34336_20131125_163318_outLine +BABEL_OP2_301_34477_20131129_201317_inLine +BABEL_OP2_301_34477_20131129_201317_outLine +BABEL_OP2_301_34688_20131107_151905_inLine +BABEL_OP2_301_34713_20140216_184756_outLine +BABEL_OP2_301_34899_20140201_183710_inLine +BABEL_OP2_301_34899_20140201_183710_outLine +BABEL_OP2_301_36017_20140123_220745_inLine +BABEL_OP2_301_36017_20140123_220745_outLine +BABEL_OP2_301_36894_20131113_201325_inLine +BABEL_OP2_301_38750_20131218_210138_inLine +BABEL_OP2_301_38750_20131218_210138_outLine +BABEL_OP2_301_39059_20140115_160435_inLine +BABEL_OP2_301_39059_20140115_160435_outLine +BABEL_OP2_301_39059_20140115_161237_inLine +BABEL_OP2_301_39059_20140115_161237_outLine +BABEL_OP2_301_39893_20140201_164926_inLine +BABEL_OP2_301_39893_20140201_164926_outLine +BABEL_OP2_301_41097_20131218_194351_inLine +BABEL_OP2_301_41097_20131218_194351_outLine +BABEL_OP2_301_41100_20131130_204102_inLine +BABEL_OP2_301_41100_20131130_204102_outLine +BABEL_OP2_301_41100_20131130_204814_inLine +BABEL_OP2_301_41100_20131130_204814_outLine +BABEL_OP2_301_41109_20140107_200127_inLine +BABEL_OP2_301_41109_20140107_200127_outLine +BABEL_OP2_301_41272_20140126_163911_inLine +BABEL_OP2_301_41272_20140126_163911_outLine +BABEL_OP2_301_41442_20131220_182530_inLine +BABEL_OP2_301_41442_20131220_182530_outLine +BABEL_OP2_301_41442_20131220_183940_inLine +BABEL_OP2_301_41442_20131220_183940_outLine +BABEL_OP2_301_42231_20131213_161445_inLine +BABEL_OP2_301_42231_20131213_161445_outLine +BABEL_OP2_301_42243_20131124_191210_inLine +BABEL_OP2_301_42243_20131124_191210_outLine +BABEL_OP2_301_42718_20140126_222724_inLine +BABEL_OP2_301_42718_20140126_222724_outLine +BABEL_OP2_301_43074_20140213_170948_inLine +BABEL_OP2_301_43074_20140213_170948_outLine +BABEL_OP2_301_43157_20140214_155422_inLine +BABEL_OP2_301_43157_20140214_155422_outLine +BABEL_OP2_301_43588_20140128_173254_inLine +BABEL_OP2_301_43588_20140128_173254_outLine +BABEL_OP2_301_43588_20140128_174720_inLine +BABEL_OP2_301_43588_20140128_174720_outLine +BABEL_OP2_301_43990_20140220_141338_inLine +BABEL_OP2_301_43990_20140220_141338_outLine +BABEL_OP2_301_44255_20140115_001546_inLine +BABEL_OP2_301_44255_20140115_001546_outLine +BABEL_OP2_301_44290_20140126_145048_inLine +BABEL_OP2_301_44290_20140126_145048_outLine +BABEL_OP2_301_44531_20140118_212803_inLine +BABEL_OP2_301_44531_20140118_212803_outLine +BABEL_OP2_301_44847_20131214_204251_inLine +BABEL_OP2_301_44847_20131214_204251_outLine +BABEL_OP2_301_44847_20131214_230118_inLine +BABEL_OP2_301_44847_20131214_230118_outLine +BABEL_OP2_301_45697_20140214_220139_inLine +BABEL_OP2_301_45697_20140214_220139_outLine +BABEL_OP2_301_46169_20131220_162551_inLine +BABEL_OP2_301_46169_20131220_162551_outLine +BABEL_OP2_301_46202_20140224_155801_inLine +BABEL_OP2_301_46202_20140224_155801_outLine +BABEL_OP2_301_46315_20131211_204949_inLine +BABEL_OP2_301_46315_20131211_204949_outLine +BABEL_OP2_301_46625_20131026_225140_outLine +BABEL_OP2_301_46974_20131211_200449_inLine +BABEL_OP2_301_46974_20131211_200449_outLine +BABEL_OP2_301_47637_20140213_164701_inLine +BABEL_OP2_301_47637_20140213_164701_outLine +BABEL_OP2_301_47799_20140216_165643_inLine +BABEL_OP2_301_47799_20140216_165643_outLine +BABEL_OP2_301_48016_20140205_174755_inLine +BABEL_OP2_301_48016_20140205_174755_outLine +BABEL_OP2_301_48299_20140224_163951_inLine +BABEL_OP2_301_48299_20140224_163951_outLine +BABEL_OP2_301_48610_20131113_182547_outLine +BABEL_OP2_301_48663_20140126_210156_inLine +BABEL_OP2_301_48663_20140126_210156_outLine +BABEL_OP2_301_48758_20140122_144530_inLine +BABEL_OP2_301_48758_20140122_144530_outLine +BABEL_OP2_301_48758_20140122_155747_inLine +BABEL_OP2_301_48758_20140122_155747_outLine +BABEL_OP2_301_48907_20140127_134337_inLine +BABEL_OP2_301_48907_20140127_134337_outLine +BABEL_OP2_301_49637_20131030_211145_inLine +BABEL_OP2_301_49767_20140131_135142_inLine +BABEL_OP2_301_49767_20140131_135142_outLine +BABEL_OP2_301_50779_20140207_191951_inLine +BABEL_OP2_301_50779_20140207_191951_outLine +BABEL_OP2_301_50940_20140220_201041_inLine +BABEL_OP2_301_50940_20140220_201041_outLine +BABEL_OP2_301_51858_20140220_170150_inLine +BABEL_OP2_301_51858_20140220_170150_outLine +BABEL_OP2_301_52222_20140224_160657_inLine +BABEL_OP2_301_52222_20140224_160657_outLine +BABEL_OP2_301_52483_20140214_142008_inLine +BABEL_OP2_301_52483_20140214_142008_outLine +BABEL_OP2_301_52854_20131015_224412_inLine +BABEL_OP2_301_52854_20131015_224412_outLine +BABEL_OP2_301_52854_20131015_225109_inLine +BABEL_OP2_301_52854_20131015_225109_outLine +BABEL_OP2_301_52854_20131015_230437_inLine +BABEL_OP2_301_52854_20131015_230437_outLine +BABEL_OP2_301_53072_20140128_162233_inLine +BABEL_OP2_301_53072_20140128_162233_outLine +BABEL_OP2_301_53415_20140119_182758_inLine +BABEL_OP2_301_53415_20140119_182758_outLine +BABEL_OP2_301_53419_20131222_184412_inLine +BABEL_OP2_301_53419_20131222_184412_outLine +BABEL_OP2_301_53492_20140122_223158_inLine +BABEL_OP2_301_53492_20140122_223158_outLine +BABEL_OP2_301_53492_20140122_223724_inLine +BABEL_OP2_301_53492_20140122_223724_outLine +BABEL_OP2_301_54040_20140102_113546_inLine +BABEL_OP2_301_54040_20140102_113546_outLine +BABEL_OP2_301_54066_20140214_153112_inLine +BABEL_OP2_301_54066_20140214_153112_outLine +BABEL_OP2_301_54405_20131227_152052_inLine +BABEL_OP2_301_54405_20131227_152052_outLine +BABEL_OP2_301_54634_20140225_214816_inLine +BABEL_OP2_301_54634_20140225_214816_outLine +BABEL_OP2_301_54923_20140201_161814_inLine +BABEL_OP2_301_54923_20140201_161814_outLine +BABEL_OP2_301_55013_20140214_165830_inLine +BABEL_OP2_301_55013_20140214_165830_outLine +BABEL_OP2_301_56019_20140117_192119_inLine +BABEL_OP2_301_56019_20140117_192119_outLine +BABEL_OP2_301_56090_20131016_191346_inLine +BABEL_OP2_301_56090_20131016_191346_outLine +BABEL_OP2_301_56213_20131216_202911_inLine +BABEL_OP2_301_56213_20131216_202911_outLine +BABEL_OP2_301_56345_20140223_203712_inLine +BABEL_OP2_301_56345_20140223_203712_outLine +BABEL_OP2_301_56429_20131112_172026_inLine +BABEL_OP2_301_56429_20131112_172026_outLine +BABEL_OP2_301_56465_20140205_174245_inLine +BABEL_OP2_301_56465_20140205_174245_outLine +BABEL_OP2_301_56468_20140225_204233_inLine +BABEL_OP2_301_56468_20140225_204233_outLine +BABEL_OP2_301_56677_20131217_201344_inLine +BABEL_OP2_301_56677_20131217_201344_outLine +BABEL_OP2_301_56684_20140105_193720_inLine +BABEL_OP2_301_56684_20140105_193720_outLine +BABEL_OP2_301_57067_20140107_192621_inLine +BABEL_OP2_301_57067_20140107_192621_outLine +BABEL_OP2_301_57219_20140205_155125_outLine +BABEL_OP2_301_57219_20140205_160417_outLine +BABEL_OP2_301_57529_20131217_195013_inLine +BABEL_OP2_301_57529_20131217_195013_outLine +BABEL_OP2_301_57609_20131224_152505_inLine +BABEL_OP2_301_57609_20131224_152505_outLine +BABEL_OP2_301_57650_20140114_203646_inLine +BABEL_OP2_301_57650_20140114_203646_outLine +BABEL_OP2_301_57654_20131123_151724_inLine +BABEL_OP2_301_57654_20131123_151724_outLine +BABEL_OP2_301_57654_20131123_152356_inLine +BABEL_OP2_301_57654_20131123_152356_outLine +BABEL_OP2_301_57654_20131123_154603_inLine +BABEL_OP2_301_57654_20131123_154603_outLine +BABEL_OP2_301_58717_20131223_213724_inLine +BABEL_OP2_301_58717_20131223_213724_outLine +BABEL_OP2_301_59028_20140201_153656_inLine +BABEL_OP2_301_59028_20140201_153656_outLine +BABEL_OP2_301_59645_20131224_162758_inLine +BABEL_OP2_301_59645_20131224_162758_outLine +BABEL_OP2_301_60307_20140213_205247_inLine +BABEL_OP2_301_60307_20140213_205247_outLine +BABEL_OP2_301_61011_20131020_212453_inLine +BABEL_OP2_301_61011_20131020_212453_outLine +BABEL_OP2_301_62155_20140121_235400_inLine +BABEL_OP2_301_62155_20140121_235400_outLine +BABEL_OP2_301_62430_20140123_160035_inLine +BABEL_OP2_301_62430_20140123_160035_outLine +BABEL_OP2_301_63094_20140129_205122_inLine +BABEL_OP2_301_63094_20140129_205122_outLine +BABEL_OP2_301_63220_20131218_184307_inLine +BABEL_OP2_301_63220_20131218_184307_outLine +BABEL_OP2_301_63511_20140214_161858_inLine +BABEL_OP2_301_63511_20140214_161858_outLine +BABEL_OP2_301_63670_20131216_201258_inLine +BABEL_OP2_301_63670_20131216_201258_outLine +BABEL_OP2_301_63730_20140204_182322_inLine +BABEL_OP2_301_63730_20140204_182322_outLine +BABEL_OP2_301_63757_20140206_214404_inLine +BABEL_OP2_301_63757_20140206_214404_outLine +BABEL_OP2_301_64014_20140114_133546_inLine +BABEL_OP2_301_64014_20140114_133546_outLine +BABEL_OP2_301_64259_20140225_211407_inLine +BABEL_OP2_301_64259_20140225_211407_outLine +BABEL_OP2_301_64398_20131213_201128_inLine +BABEL_OP2_301_64398_20131213_201128_outLine +BABEL_OP2_301_65064_20140207_185319_inLine +BABEL_OP2_301_65064_20140207_185319_outLine +BABEL_OP2_301_65370_20140201_200500_inLine +BABEL_OP2_301_65370_20140201_200500_outLine +BABEL_OP2_301_65640_20140123_140233_inLine +BABEL_OP2_301_65640_20140123_140233_outLine +BABEL_OP2_301_66001_20131115_220236_inLine +BABEL_OP2_301_66001_20131115_220236_outLine +BABEL_OP2_301_66519_20131128_144732_inLine +BABEL_OP2_301_66519_20131128_144732_outLine +BABEL_OP2_301_66519_20131128_150056_inLine +BABEL_OP2_301_66519_20131128_150056_outLine +BABEL_OP2_301_67283_20131023_173705_inLine +BABEL_OP2_301_67283_20131023_173705_outLine +BABEL_OP2_301_67389_20140219_142647_inLine +BABEL_OP2_301_67389_20140219_142647_outLine +BABEL_OP2_301_67401_20140207_182426_inLine +BABEL_OP2_301_67401_20140207_182426_outLine +BABEL_OP2_301_68385_20131016_193158_inLine +BABEL_OP2_301_69153_20131216_202419_inLine +BABEL_OP2_301_69153_20131216_202419_outLine +BABEL_OP2_301_69578_20131201_211250_inLine +BABEL_OP2_301_69578_20131201_211250_outLine +BABEL_OP2_301_69578_20131201_212353_inLine +BABEL_OP2_301_69578_20131201_212353_outLine +BABEL_OP2_301_70221_20131223_190148_inLine +BABEL_OP2_301_70221_20131223_190148_outLine +BABEL_OP2_301_70343_20131212_181613_inLine +BABEL_OP2_301_70343_20131212_181613_outLine +BABEL_OP2_301_70526_20140127_161237_inLine +BABEL_OP2_301_70526_20140127_161237_outLine +BABEL_OP2_301_70986_20140223_164925_inLine +BABEL_OP2_301_70986_20140223_164925_outLine +BABEL_OP2_301_72110_20131218_192930_inLine +BABEL_OP2_301_72110_20131218_192930_outLine +BABEL_OP2_301_72110_20131220_163212_inLine +BABEL_OP2_301_72110_20131220_163212_outLine +BABEL_OP2_301_73301_20131208_194427_inLine +BABEL_OP2_301_73301_20131208_194427_outLine +BABEL_OP2_301_73408_20140213_184704_inLine +BABEL_OP2_301_73408_20140213_184704_outLine +BABEL_OP2_301_73822_20140216_175714_inLine +BABEL_OP2_301_73822_20140216_175714_outLine +BABEL_OP2_301_73837_20131124_173546_inLine +BABEL_OP2_301_74728_20140214_203632_outLine +BABEL_OP2_301_75064_20131129_123930_inLine +BABEL_OP2_301_75064_20131129_123930_outLine +BABEL_OP2_301_75064_20131129_124541_inLine +BABEL_OP2_301_75064_20131129_124541_outLine +BABEL_OP2_301_75342_20131217_201144_inLine +BABEL_OP2_301_75342_20131217_201144_outLine +BABEL_OP2_301_75366_20140131_192045_inLine +BABEL_OP2_301_75366_20140131_192045_outLine +BABEL_OP2_301_75465_20131221_182948_inLine +BABEL_OP2_301_75465_20131221_182948_outLine +BABEL_OP2_301_77242_20140204_192041_inLine +BABEL_OP2_301_77242_20140204_192041_outLine +BABEL_OP2_301_77803_20131024_201026_inLine +BABEL_OP2_301_77803_20131024_201026_outLine +BABEL_OP2_301_79107_20140204_212236_inLine +BABEL_OP2_301_79107_20140204_212236_outLine +BABEL_OP2_301_79139_20131203_165343_outLine +BABEL_OP2_301_79429_20140220_203629_inLine +BABEL_OP2_301_79429_20140220_203629_outLine +BABEL_OP2_301_79858_20131024_220616_outLine +BABEL_OP2_301_80306_20131203_161230_inLine +BABEL_OP2_301_80306_20131203_161230_outLine +BABEL_OP2_301_80306_20131203_162810_inLine +BABEL_OP2_301_80306_20131203_162810_outLine +BABEL_OP2_301_80439_20131202_210809_inLine +BABEL_OP2_301_80439_20131202_210809_outLine +BABEL_OP2_301_80655_20140123_205823_inLine +BABEL_OP2_301_80655_20140123_205823_outLine +BABEL_OP2_301_80721_20131225_182955_inLine +BABEL_OP2_301_80721_20131225_182955_outLine +BABEL_OP2_301_81213_20131114_184213_outLine +BABEL_OP2_301_81213_20131114_190753_outLine +BABEL_OP2_301_81404_20131206_140303_outLine +BABEL_OP2_301_81971_20131029_141333_outLine +BABEL_OP2_301_82030_20140126_162146_inLine +BABEL_OP2_301_82030_20140126_162146_outLine +BABEL_OP2_301_82140_20131201_202210_inLine +BABEL_OP2_301_82140_20131201_202210_outLine +BABEL_OP2_301_82224_20140108_145115_inLine +BABEL_OP2_301_82224_20140108_145115_outLine +BABEL_OP2_301_82361_20140123_185800_outLine +BABEL_OP2_301_82966_20131229_171324_inLine +BABEL_OP2_301_82966_20131229_171324_outLine +BABEL_OP2_301_83062_20140123_143457_inLine +BABEL_OP2_301_83062_20140123_143457_outLine +BABEL_OP2_301_83935_20131216_140532_inLine +BABEL_OP2_301_83935_20131216_140532_outLine +BABEL_OP2_301_84327_20131217_123632_inLine +BABEL_OP2_301_84327_20131217_123632_outLine +BABEL_OP2_301_84823_20131218_180840_inLine +BABEL_OP2_301_84823_20131218_180840_outLine +BABEL_OP2_301_85246_20140216_194331_inLine +BABEL_OP2_301_85246_20140216_194331_outLine +BABEL_OP2_301_85254_20140131_161411_inLine +BABEL_OP2_301_85254_20140131_161411_outLine +BABEL_OP2_301_85254_20140131_162620_inLine +BABEL_OP2_301_85254_20140131_162620_outLine +BABEL_OP2_301_86557_20131019_195730_outLine +BABEL_OP2_301_86597_20140204_185521_inLine +BABEL_OP2_301_86597_20140204_185521_outLine +BABEL_OP2_301_86715_20140201_181648_inLine +BABEL_OP2_301_86715_20140201_181648_outLine +BABEL_OP2_301_86826_20140129_155917_inLine +BABEL_OP2_301_86826_20140129_155917_outLine +BABEL_OP2_301_87280_20131220_194114_inLine +BABEL_OP2_301_87280_20131220_194114_outLine +BABEL_OP2_301_87731_20140220_185807_inLine +BABEL_OP2_301_87731_20140220_185807_outLine +BABEL_OP2_301_87777_20140208_173157_inLine +BABEL_OP2_301_87777_20140208_173157_outLine +BABEL_OP2_301_89045_20131025_163532_inLine +BABEL_OP2_301_89045_20131025_163532_outLine +BABEL_OP2_301_90347_20140206_160505_inLine +BABEL_OP2_301_90347_20140206_160505_outLine +BABEL_OP2_301_90417_20140202_164404_inLine +BABEL_OP2_301_90417_20140202_164404_outLine +BABEL_OP2_301_90760_20140204_173413_inLine +BABEL_OP2_301_90760_20140204_173413_outLine +BABEL_OP2_301_91189_20140130_134130_inLine +BABEL_OP2_301_91189_20140130_134130_outLine +BABEL_OP2_301_91581_20131218_193124_inLine +BABEL_OP2_301_91581_20131218_193124_outLine +BABEL_OP2_301_91593_20140201_174423_inLine +BABEL_OP2_301_91593_20140201_174423_outLine +BABEL_OP2_301_91888_20140128_153319_inLine +BABEL_OP2_301_91888_20140128_153319_outLine +BABEL_OP2_301_92077_20140127_141441_inLine +BABEL_OP2_301_92077_20140127_141441_outLine +BABEL_OP2_301_92356_20140112_181929_inLine +BABEL_OP2_301_92356_20140112_181929_outLine +BABEL_OP2_301_92557_20140115_150859_inLine +BABEL_OP2_301_92557_20140115_150859_outLine +BABEL_OP2_301_92643_20140127_134733_inLine +BABEL_OP2_301_92643_20140127_134733_outLine +BABEL_OP2_301_93007_20140201_215259_inLine +BABEL_OP2_301_93007_20140201_215259_outLine +BABEL_OP2_301_93222_20140224_152228_inLine +BABEL_OP2_301_93222_20140224_152228_outLine +BABEL_OP2_301_93681_20131129_223439_inLine +BABEL_OP2_301_93681_20131129_223439_outLine +BABEL_OP2_301_93858_20140202_152245_inLine +BABEL_OP2_301_93858_20140202_152245_outLine +BABEL_OP2_301_94044_20140225_200641_inLine +BABEL_OP2_301_94044_20140225_200641_outLine +BABEL_OP2_301_94141_20140214_174210_inLine +BABEL_OP2_301_94141_20140214_174210_outLine +BABEL_OP2_301_94141_20140214_174838_inLine +BABEL_OP2_301_94141_20140214_174838_outLine +BABEL_OP2_301_94166_20140114_223757_inLine +BABEL_OP2_301_94166_20140114_223757_outLine +BABEL_OP2_301_94237_20140125_154005_inLine +BABEL_OP2_301_94237_20140125_154005_outLine +BABEL_OP2_301_94487_20140214_181548_inLine +BABEL_OP2_301_94487_20140214_181548_outLine +BABEL_OP2_301_94969_20140216_191950_inLine +BABEL_OP2_301_94969_20140216_191950_outLine +BABEL_OP2_301_95467_20140204_202122_inLine +BABEL_OP2_301_95467_20140204_202122_outLine +BABEL_OP2_301_95490_20131019_201427_inLine +BABEL_OP2_301_95571_20140225_185558_inLine +BABEL_OP2_301_95571_20140225_185558_outLine +BABEL_OP2_301_95670_20131119_163101_inLine +BABEL_OP2_301_95670_20131119_163101_outLine +BABEL_OP2_301_95670_20131119_163931_inLine +BABEL_OP2_301_95670_20131119_163931_outLine +BABEL_OP2_301_96205_20131208_194017_inLine +BABEL_OP2_301_96205_20131208_194017_outLine +BABEL_OP2_301_96205_20131208_195213_inLine +BABEL_OP2_301_96205_20131208_195213_outLine +BABEL_OP2_301_96446_20131030_214504_inLine +BABEL_OP2_301_96446_20131030_214504_outLine +BABEL_OP2_301_96584_20140114_144108_inLine +BABEL_OP2_301_96584_20140114_144108_outLine +BABEL_OP2_301_96934_20131202_185517_inLine +BABEL_OP2_301_96934_20131202_185517_outLine +BABEL_OP2_301_96940_20140223_150250_inLine +BABEL_OP2_301_96940_20140223_150250_outLine +BABEL_OP2_301_97097_20140122_232217_inLine +BABEL_OP2_301_97097_20140122_232217_outLine +BABEL_OP2_301_97220_20140204_184737_inLine +BABEL_OP2_301_97220_20140204_184737_outLine +BABEL_OP2_301_97604_20140112_103548_inLine +BABEL_OP2_301_97604_20140112_103548_outLine +BABEL_OP2_301_97849_20140123_174235_inLine +BABEL_OP2_301_97849_20140123_174235_outLine +BABEL_OP2_301_97911_20140131_152336_inLine +BABEL_OP2_301_97911_20140131_152336_outLine +BABEL_OP2_301_97911_20140131_153328_inLine +BABEL_OP2_301_97911_20140131_153328_outLine +BABEL_OP2_301_97988_20131219_190252_inLine +BABEL_OP2_301_97988_20131219_190252_outLine +BABEL_OP2_301_97988_20140114_012737_inLine +BABEL_OP2_301_97988_20140114_012737_outLine +BABEL_OP2_301_98192_20140205_153043_inLine +BABEL_OP2_301_98192_20140205_153043_outLine +BABEL_OP2_301_98506_20140122_174742_inLine +BABEL_OP2_301_98506_20140122_174742_outLine +BABEL_OP2_301_98678_20140122_124908_inLine +BABEL_OP2_301_98678_20140122_124908_outLine +BABEL_OP2_301_99401_20131024_225414_inLine +BABEL_OP2_301_99401_20131024_225414_outLine +BABEL_OP2_301_99732_20131220_214613_inLine +BABEL_OP2_301_99732_20131220_214613_outLine +BABEL_OP2_301_99813_20131216_151916_inLine +BABEL_OP2_301_99813_20131216_151916_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/dev.list b/egs/babel/s5d/conf/lists/302-kazakh/dev.list new file mode 100644 index 00000000000..31a554efeef --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/dev.list @@ -0,0 +1,140 @@ +BABEL_OP2_302_10002_20140316_215637_inLine +BABEL_OP2_302_10002_20140316_215637_outLine +BABEL_OP2_302_10188_20131030_194100_inLine +BABEL_OP2_302_10188_20131030_194100_outLine +BABEL_OP2_302_11673_20131104_223908_inLine +BABEL_OP2_302_11673_20131104_223908_outLine +BABEL_OP2_302_13324_20131115_220718_inLine +BABEL_OP2_302_13324_20131115_220718_outLine +BABEL_OP2_302_17440_20140218_204311_inLine +BABEL_OP2_302_17440_20140218_204311_outLine +BABEL_OP2_302_17573_20140312_030325_inLine +BABEL_OP2_302_17573_20140312_030325_outLine +BABEL_OP2_302_17914_20140126_234956_inLine +BABEL_OP2_302_17914_20140126_234956_outLine +BABEL_OP2_302_17923_20131116_222221_inLine +BABEL_OP2_302_17923_20131116_222221_outLine +BABEL_OP2_302_18939_20131111_213325_inLine +BABEL_OP2_302_18939_20131111_213325_outLine +BABEL_OP2_302_19663_20131212_235807_inLine +BABEL_OP2_302_19663_20131212_235807_outLine +BABEL_OP2_302_19703_20131202_234704_inLine +BABEL_OP2_302_19703_20131202_234704_outLine +BABEL_OP2_302_20682_20140114_221052_inLine +BABEL_OP2_302_20682_20140114_221052_outLine +BABEL_OP2_302_20768_20140203_185125_inLine +BABEL_OP2_302_20768_20140203_185125_outLine +BABEL_OP2_302_20768_20140203_190423_inLine +BABEL_OP2_302_20768_20140203_190423_outLine +BABEL_OP2_302_21109_20140111_215428_inLine +BABEL_OP2_302_21109_20140111_215428_outLine +BABEL_OP2_302_21581_20131217_222306_inLine +BABEL_OP2_302_21581_20131217_222306_outLine +BABEL_OP2_302_22216_20131104_153600_inLine +BABEL_OP2_302_22216_20131104_153600_outLine +BABEL_OP2_302_23355_20140317_191841_inLine +BABEL_OP2_302_23355_20140317_191841_outLine +BABEL_OP2_302_24589_20131129_215929_inLine +BABEL_OP2_302_24589_20131129_215929_outLine +BABEL_OP2_302_26072_20140131_184053_inLine +BABEL_OP2_302_26072_20140131_184053_outLine +BABEL_OP2_302_33175_20131105_201906_inLine +BABEL_OP2_302_33175_20131105_201906_outLine +BABEL_OP2_302_33355_20131112_211255_inLine +BABEL_OP2_302_33355_20131112_211255_outLine +BABEL_OP2_302_33355_20131112_213746_inLine +BABEL_OP2_302_33355_20131112_213746_outLine +BABEL_OP2_302_34328_20131219_023407_inLine +BABEL_OP2_302_34328_20131219_023407_outLine +BABEL_OP2_302_36341_20131101_170216_inLine +BABEL_OP2_302_36341_20131101_170216_outLine +BABEL_OP2_302_36341_20131101_171111_inLine +BABEL_OP2_302_36341_20131101_171111_outLine +BABEL_OP2_302_36669_20131206_164229_inLine +BABEL_OP2_302_36669_20131206_164229_outLine +BABEL_OP2_302_41174_20131212_200450_inLine +BABEL_OP2_302_41174_20131212_200450_outLine +BABEL_OP2_302_41442_20140125_220923_inLine +BABEL_OP2_302_41442_20140125_220923_outLine +BABEL_OP2_302_42497_20131116_001033_inLine +BABEL_OP2_302_42497_20131116_001033_outLine +BABEL_OP2_302_42497_20131116_002236_inLine +BABEL_OP2_302_42497_20131116_002236_outLine +BABEL_OP2_302_43789_20140108_210806_inLine +BABEL_OP2_302_43789_20140108_210806_outLine +BABEL_OP2_302_44868_20131217_205108_inLine +BABEL_OP2_302_44868_20131217_205108_outLine +BABEL_OP2_302_44868_20131217_205716_inLine +BABEL_OP2_302_44868_20131217_205716_outLine +BABEL_OP2_302_44868_20131217_211035_inLine +BABEL_OP2_302_44868_20131217_211035_outLine +BABEL_OP2_302_45642_20131114_014119_inLine +BABEL_OP2_302_45642_20131114_014119_outLine +BABEL_OP2_302_47156_20140313_011009_inLine +BABEL_OP2_302_47156_20140313_011009_outLine +BABEL_OP2_302_49502_20131104_181501_inLine +BABEL_OP2_302_49502_20131104_181501_outLine +BABEL_OP2_302_50565_20131103_225947_inLine +BABEL_OP2_302_50565_20131103_225947_outLine +BABEL_OP2_302_50726_20131118_025621_inLine +BABEL_OP2_302_50726_20131118_025621_outLine +BABEL_OP2_302_50745_20140214_021844_inLine +BABEL_OP2_302_50745_20140214_021844_outLine +BABEL_OP2_302_60830_20131205_223823_inLine +BABEL_OP2_302_60830_20131205_223823_outLine +BABEL_OP2_302_60830_20131205_225122_inLine +BABEL_OP2_302_60830_20131205_225122_outLine +BABEL_OP2_302_61011_20131110_191134_inLine +BABEL_OP2_302_61011_20131110_191134_outLine +BABEL_OP2_302_61040_20140123_215906_inLine +BABEL_OP2_302_61040_20140123_215906_outLine +BABEL_OP2_302_61963_20140119_184816_inLine +BABEL_OP2_302_61963_20140119_184816_outLine +BABEL_OP2_302_66916_20131121_223838_inLine +BABEL_OP2_302_66916_20131121_223838_outLine +BABEL_OP2_302_70110_20131109_190313_inLine +BABEL_OP2_302_70110_20131109_190313_outLine +BABEL_OP2_302_70182_20140214_185232_inLine +BABEL_OP2_302_70182_20140214_185232_outLine +BABEL_OP2_302_72654_20131207_162604_inLine +BABEL_OP2_302_72654_20131207_162604_outLine +BABEL_OP2_302_77730_20131114_223327_inLine +BABEL_OP2_302_77730_20131114_223327_outLine +BABEL_OP2_302_77730_20131114_230511_inLine +BABEL_OP2_302_77730_20131114_230511_outLine +BABEL_OP2_302_77730_20131114_231344_inLine +BABEL_OP2_302_77730_20131114_231344_outLine +BABEL_OP2_302_79080_20140203_192545_inLine +BABEL_OP2_302_79080_20140203_192545_outLine +BABEL_OP2_302_80577_20140126_190012_inLine +BABEL_OP2_302_80577_20140126_190012_outLine +BABEL_OP2_302_81854_20140203_161410_inLine +BABEL_OP2_302_81854_20140203_161410_outLine +BABEL_OP2_302_81971_20131101_194252_inLine +BABEL_OP2_302_81971_20131101_194252_outLine +BABEL_OP2_302_81971_20131101_195016_inLine +BABEL_OP2_302_81971_20131101_195016_outLine +BABEL_OP2_302_84823_20140213_015014_inLine +BABEL_OP2_302_84823_20140213_015014_outLine +BABEL_OP2_302_85248_20140123_204317_inLine +BABEL_OP2_302_85248_20140123_204317_outLine +BABEL_OP2_302_85322_20131108_161437_inLine +BABEL_OP2_302_85322_20131108_161437_outLine +BABEL_OP2_302_86557_20131121_000022_inLine +BABEL_OP2_302_86557_20131121_000022_outLine +BABEL_OP2_302_87889_20140119_163150_inLine +BABEL_OP2_302_87889_20140119_163150_outLine +BABEL_OP2_302_90080_20140120_230635_inLine +BABEL_OP2_302_90080_20140120_230635_outLine +BABEL_OP2_302_91593_20140215_175049_inLine +BABEL_OP2_302_91593_20140215_175049_outLine +BABEL_OP2_302_92509_20131114_030809_inLine +BABEL_OP2_302_92509_20131114_030809_outLine +BABEL_OP2_302_93320_20140218_173001_inLine +BABEL_OP2_302_93320_20140218_173001_outLine +BABEL_OP2_302_93475_20131115_203137_inLine +BABEL_OP2_302_93475_20131115_203137_outLine +BABEL_OP2_302_95583_20131112_203137_inLine +BABEL_OP2_302_95583_20131112_203137_outLine +BABEL_OP2_302_96842_20140131_154710_inLine +BABEL_OP2_302_96842_20140131_154710_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/eval.list b/egs/babel/s5d/conf/lists/302-kazakh/eval.list new file mode 100644 index 00000000000..cf23788087e --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/eval.list @@ -0,0 +1,191 @@ +BABEL_OP2_302_10416_20131210_035651_inLine +BABEL_OP2_302_10416_20131210_035651_outLine +BABEL_OP2_302_11096_20140219_220112_inLine +BABEL_OP2_302_11096_20140219_220112_outLine +BABEL_OP2_302_12916_20131107_171154_inLine +BABEL_OP2_302_12916_20131107_171154_outLine +BABEL_OP2_302_15216_20140219_211720_inLine +BABEL_OP2_302_15216_20140219_211720_outLine +BABEL_OP2_302_16787_20131207_203127_inLine +BABEL_OP2_302_16787_20131207_203127_outLine +BABEL_OP2_302_17582_20140215_204647_inLine +BABEL_OP2_302_17582_20140215_204647_outLine +BABEL_OP2_302_17751_20140216_211124_inLine +BABEL_OP2_302_17751_20140216_211124_outLine +BABEL_OP2_302_18291_20140215_182410_inLine +BABEL_OP2_302_18291_20140215_182410_outLine +BABEL_OP2_302_18863_20140118_154802_inLine +BABEL_OP2_302_18863_20140118_154802_outLine +BABEL_OP2_302_19545_20131213_220625_inLine +BABEL_OP2_302_19545_20131213_220625_outLine +BABEL_OP2_302_19672_20131217_215636_inLine +BABEL_OP2_302_19672_20131217_215636_outLine +BABEL_OP2_302_19782_20140125_222442_inLine +BABEL_OP2_302_19782_20140125_222442_outLine +BABEL_OP2_302_20738_20140126_201239_inLine +BABEL_OP2_302_20738_20140126_201239_outLine +BABEL_OP2_302_22624_20140116_163601_inLine +BABEL_OP2_302_22624_20140116_163601_outLine +BABEL_OP2_302_22641_20131104_232148_inLine +BABEL_OP2_302_22641_20131104_232148_outLine +BABEL_OP2_302_23628_20131206_185035_inLine +BABEL_OP2_302_23628_20131206_185035_outLine +BABEL_OP2_302_23731_20131211_000104_inLine +BABEL_OP2_302_23731_20131211_000104_outLine +BABEL_OP2_302_23893_20140314_000251_inLine +BABEL_OP2_302_23893_20140314_000251_outLine +BABEL_OP2_302_24924_20140219_171405_inLine +BABEL_OP2_302_24924_20140219_171405_outLine +BABEL_OP2_302_28422_20131224_204108_inLine +BABEL_OP2_302_28422_20131224_204108_outLine +BABEL_OP2_302_28871_20131030_171711_inLine +BABEL_OP2_302_28871_20131030_171711_outLine +BABEL_OP2_302_29352_20140304_201752_inLine +BABEL_OP2_302_29352_20140304_201752_outLine +BABEL_OP2_302_29777_20140114_172507_inLine +BABEL_OP2_302_29777_20140114_172507_outLine +BABEL_OP2_302_31979_20131206_224314_inLine +BABEL_OP2_302_31979_20131206_224314_outLine +BABEL_OP2_302_32914_20140106_220002_inLine +BABEL_OP2_302_32914_20140106_220002_outLine +BABEL_OP2_302_33635_20131206_225838_inLine +BABEL_OP2_302_33635_20131206_225838_outLine +BABEL_OP2_302_33672_20131111_153638_inLine +BABEL_OP2_302_33672_20131111_153638_outLine +BABEL_OP2_302_37064_20131207_191407_inLine +BABEL_OP2_302_37064_20131207_191407_outLine +BABEL_OP2_302_37499_20140225_222508_inLine +BABEL_OP2_302_37499_20140225_222508_outLine +BABEL_OP2_302_38139_20140315_230332_inLine +BABEL_OP2_302_38139_20140315_230332_outLine +BABEL_OP2_302_38979_20140126_212312_inLine +BABEL_OP2_302_38979_20140126_212312_outLine +BABEL_OP2_302_41493_20131031_190908_inLine +BABEL_OP2_302_41493_20131031_190908_outLine +BABEL_OP2_302_42299_20140216_142852_inLine +BABEL_OP2_302_42299_20140216_142852_outLine +BABEL_OP2_302_42942_20131207_000752_inLine +BABEL_OP2_302_42942_20131207_000752_outLine +BABEL_OP2_302_43388_20131222_214138_inLine +BABEL_OP2_302_43388_20131222_214138_outLine +BABEL_OP2_302_45777_20131209_205207_inLine +BABEL_OP2_302_45777_20131209_205207_outLine +BABEL_OP2_302_46974_20140108_014337_inLine +BABEL_OP2_302_46974_20140108_014337_outLine +BABEL_OP2_302_47877_20140118_204004_inLine +BABEL_OP2_302_47877_20140118_204004_outLine +BABEL_OP2_302_48016_20140220_174426_inLine +BABEL_OP2_302_48016_20140220_174426_outLine +BABEL_OP2_302_49775_20131103_031204_inLine +BABEL_OP2_302_49775_20131103_031204_outLine +BABEL_OP2_302_49902_20131218_203252_inLine +BABEL_OP2_302_49902_20131218_203252_outLine +BABEL_OP2_302_52025_20131108_191032_inLine +BABEL_OP2_302_52025_20131108_191032_outLine +BABEL_OP2_302_52025_20131108_193401_inLine +BABEL_OP2_302_52025_20131108_193401_outLine +BABEL_OP2_302_54744_20131111_235401_inLine +BABEL_OP2_302_54744_20131111_235401_outLine +BABEL_OP2_302_55742_20131118_154051_inLine +BABEL_OP2_302_55742_20131118_154051_outLine +BABEL_OP2_302_56019_20140226_155123_inLine +BABEL_OP2_302_56019_20140226_155123_outLine +BABEL_OP2_302_56370_20131120_230147_inLine +BABEL_OP2_302_56370_20131120_230147_outLine +BABEL_OP2_302_56429_20131117_181816_inLine +BABEL_OP2_302_56429_20131117_181816_outLine +BABEL_OP2_302_56523_20131215_162313_inLine +BABEL_OP2_302_56523_20131215_162313_outLine +BABEL_OP2_302_57219_20140218_190044_inLine +BABEL_OP2_302_57219_20140218_190044_outLine +BABEL_OP2_302_57650_20140126_224015_inLine +BABEL_OP2_302_57650_20140126_224015_outLine +BABEL_OP2_302_58815_20140125_201759_inLine +BABEL_OP2_302_58815_20140125_201759_outLine +BABEL_OP2_302_60836_20131115_015627_inLine +BABEL_OP2_302_60836_20131115_015627_outLine +BABEL_OP2_302_61219_20131128_233326_inLine +BABEL_OP2_302_61219_20131128_233326_outLine +BABEL_OP2_302_62286_20131214_174209_inLine +BABEL_OP2_302_62286_20131214_174209_outLine +BABEL_OP2_302_63481_20131105_213305_inLine +BABEL_OP2_302_63481_20131105_213305_outLine +BABEL_OP2_302_64759_20131107_153706_inLine +BABEL_OP2_302_66967_20131125_200431_inLine +BABEL_OP2_302_66967_20131125_200431_outLine +BABEL_OP2_302_66967_20131125_201605_inLine +BABEL_OP2_302_66967_20131125_201605_outLine +BABEL_OP2_302_66967_20131125_202216_inLine +BABEL_OP2_302_66967_20131125_202216_outLine +BABEL_OP2_302_67066_20140215_220827_inLine +BABEL_OP2_302_67066_20140215_220827_outLine +BABEL_OP2_302_71404_20131128_225018_inLine +BABEL_OP2_302_71404_20131128_225018_outLine +BABEL_OP2_302_71780_20131121_222518_inLine +BABEL_OP2_302_71780_20131121_222518_outLine +BABEL_OP2_302_73042_20131115_165006_inLine +BABEL_OP2_302_73042_20131115_165006_outLine +BABEL_OP2_302_73119_20131128_222112_inLine +BABEL_OP2_302_73119_20131128_222112_outLine +BABEL_OP2_302_73622_20131117_223750_inLine +BABEL_OP2_302_73622_20131117_223750_outLine +BABEL_OP2_302_73622_20131117_230514_inLine +BABEL_OP2_302_73622_20131117_230514_outLine +BABEL_OP2_302_76372_20140121_204025_inLine +BABEL_OP2_302_76372_20140121_204025_outLine +BABEL_OP2_302_76773_20131117_001202_inLine +BABEL_OP2_302_76773_20131117_001202_outLine +BABEL_OP2_302_77112_20131127_221650_inLine +BABEL_OP2_302_77112_20131127_221650_outLine +BABEL_OP2_302_78604_20131117_205614_inLine +BABEL_OP2_302_78604_20131117_205614_outLine +BABEL_OP2_302_78604_20131117_210914_inLine +BABEL_OP2_302_78604_20131117_210914_outLine +BABEL_OP2_302_78749_20140305_221314_inLine +BABEL_OP2_302_78749_20140305_221314_outLine +BABEL_OP2_302_79107_20140223_160949_inLine +BABEL_OP2_302_79107_20140223_160949_outLine +BABEL_OP2_302_79505_20140221_191940_inLine +BABEL_OP2_302_79505_20140221_191940_outLine +BABEL_OP2_302_79571_20131224_210857_inLine +BABEL_OP2_302_79571_20131224_210857_outLine +BABEL_OP2_302_80881_20131130_200459_inLine +BABEL_OP2_302_80881_20131130_200459_outLine +BABEL_OP2_302_80897_20131226_221806_inLine +BABEL_OP2_302_80897_20131226_221806_outLine +BABEL_OP2_302_82966_20140203_200450_inLine +BABEL_OP2_302_82966_20140203_200450_outLine +BABEL_OP2_302_85179_20140113_180639_inLine +BABEL_OP2_302_85179_20140113_180639_outLine +BABEL_OP2_302_87280_20140123_211738_inLine +BABEL_OP2_302_87280_20140123_211738_outLine +BABEL_OP2_302_88686_20140131_165805_inLine +BABEL_OP2_302_88686_20140131_165805_outLine +BABEL_OP2_302_89372_20131106_214629_inLine +BABEL_OP2_302_89372_20131106_214629_outLine +BABEL_OP2_302_90417_20140215_195110_inLine +BABEL_OP2_302_90417_20140215_195110_outLine +BABEL_OP2_302_90935_20131207_172013_inLine +BABEL_OP2_302_90935_20131207_172013_outLine +BABEL_OP2_302_92281_20140312_223937_inLine +BABEL_OP2_302_92281_20140312_223937_outLine +BABEL_OP2_302_93224_20131219_004305_inLine +BABEL_OP2_302_93224_20131219_004305_outLine +BABEL_OP2_302_93861_20131208_195418_inLine +BABEL_OP2_302_93861_20131208_195418_outLine +BABEL_OP2_302_95663_20131031_164153_inLine +BABEL_OP2_302_95663_20131031_164153_outLine +BABEL_OP2_302_97097_20140121_214508_inLine +BABEL_OP2_302_97097_20140121_214508_outLine +BABEL_OP2_302_97220_20140216_214954_inLine +BABEL_OP2_302_97220_20140216_214954_outLine +BABEL_OP2_302_97264_20140203_010930_inLine +BABEL_OP2_302_97264_20140203_010930_outLine +BABEL_OP2_302_97988_20140226_180453_inLine +BABEL_OP2_302_97988_20140226_180453_outLine +BABEL_OP2_302_98888_20140224_195320_inLine +BABEL_OP2_302_98888_20140224_195320_outLine +BABEL_OP2_302_99344_20140317_184547_inLine +BABEL_OP2_302_99344_20140317_184547_outLine +BABEL_OP2_302_99516_20131109_182628_inLine +BABEL_OP2_302_99516_20131109_182628_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list b/egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list new file mode 100644 index 00000000000..402c6ca4cb0 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list @@ -0,0 +1,61 @@ +BABEL_OP2_302_10416_20131210_035651_inLine +BABEL_OP2_302_10416_20131210_035651_outLine +BABEL_OP2_302_16787_20131207_203127_inLine +BABEL_OP2_302_16787_20131207_203127_outLine +BABEL_OP2_302_18863_20140118_154802_inLine +BABEL_OP2_302_18863_20140118_154802_outLine +BABEL_OP2_302_19672_20131217_215636_inLine +BABEL_OP2_302_19672_20131217_215636_outLine +BABEL_OP2_302_23628_20131206_185035_inLine +BABEL_OP2_302_23628_20131206_185035_outLine +BABEL_OP2_302_23731_20131211_000104_inLine +BABEL_OP2_302_23731_20131211_000104_outLine +BABEL_OP2_302_33635_20131206_225838_inLine +BABEL_OP2_302_33635_20131206_225838_outLine +BABEL_OP2_302_42942_20131207_000752_inLine +BABEL_OP2_302_42942_20131207_000752_outLine +BABEL_OP2_302_45777_20131209_205207_inLine +BABEL_OP2_302_45777_20131209_205207_outLine +BABEL_OP2_302_46974_20140108_014337_inLine +BABEL_OP2_302_46974_20140108_014337_outLine +BABEL_OP2_302_48016_20140220_174426_inLine +BABEL_OP2_302_48016_20140220_174426_outLine +BABEL_OP2_302_49775_20131103_031204_inLine +BABEL_OP2_302_49775_20131103_031204_outLine +BABEL_OP2_302_54744_20131111_235401_inLine +BABEL_OP2_302_54744_20131111_235401_outLine +BABEL_OP2_302_55742_20131118_154051_inLine +BABEL_OP2_302_55742_20131118_154051_outLine +BABEL_OP2_302_56019_20140226_155123_inLine +BABEL_OP2_302_56019_20140226_155123_outLine +BABEL_OP2_302_56429_20131117_181816_inLine +BABEL_OP2_302_56429_20131117_181816_outLine +BABEL_OP2_302_57650_20140126_224015_inLine +BABEL_OP2_302_57650_20140126_224015_outLine +BABEL_OP2_302_58815_20140125_201759_inLine +BABEL_OP2_302_58815_20140125_201759_outLine +BABEL_OP2_302_63481_20131105_213305_inLine +BABEL_OP2_302_63481_20131105_213305_outLine +BABEL_OP2_302_64759_20131107_153706_inLine +BABEL_OP2_302_71780_20131121_222518_inLine +BABEL_OP2_302_71780_20131121_222518_outLine +BABEL_OP2_302_73042_20131115_165006_inLine +BABEL_OP2_302_73042_20131115_165006_outLine +BABEL_OP2_302_73119_20131128_222112_inLine +BABEL_OP2_302_73119_20131128_222112_outLine +BABEL_OP2_302_76773_20131117_001202_inLine +BABEL_OP2_302_76773_20131117_001202_outLine +BABEL_OP2_302_78604_20131117_205614_inLine +BABEL_OP2_302_78604_20131117_205614_outLine +BABEL_OP2_302_78604_20131117_210914_inLine +BABEL_OP2_302_78604_20131117_210914_outLine +BABEL_OP2_302_80897_20131226_221806_inLine +BABEL_OP2_302_80897_20131226_221806_outLine +BABEL_OP2_302_89372_20131106_214629_inLine +BABEL_OP2_302_89372_20131106_214629_outLine +BABEL_OP2_302_92281_20140312_223937_inLine +BABEL_OP2_302_92281_20140312_223937_outLine +BABEL_OP2_302_97097_20140121_214508_inLine +BABEL_OP2_302_97097_20140121_214508_outLine +BABEL_OP2_302_98888_20140224_195320_inLine +BABEL_OP2_302_98888_20140224_195320_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/sub-train.list b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.list new file mode 100644 index 00000000000..ef82fb8fc17 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.list @@ -0,0 +1,130 @@ +BABEL_OP2_302_13483_20140111_145619_inLine +BABEL_OP2_302_13483_20140111_145619_outLine +BABEL_OP2_302_13792_20131105_160713_inLine +BABEL_OP2_302_13792_20131105_160713_outLine +BABEL_OP2_302_14137_20131205_201718_inLine +BABEL_OP2_302_14137_20131205_201718_outLine +BABEL_OP2_302_15638_20131227_190456_inLine +BABEL_OP2_302_15638_20131227_190456_outLine +BABEL_OP2_302_16467_20140125_193127_inLine +BABEL_OP2_302_16467_20140125_193127_outLine +BABEL_OP2_302_16886_20131209_211339_inLine +BABEL_OP2_302_16886_20131209_211339_outLine +BABEL_OP2_302_17113_20140216_165407_inLine +BABEL_OP2_302_17113_20140216_165407_outLine +BABEL_OP2_302_17567_20131227_223417_inLine +BABEL_OP2_302_17567_20131227_223417_outLine +BABEL_OP2_302_18118_20140312_010735_inLine +BABEL_OP2_302_18118_20140312_010735_outLine +BABEL_OP2_302_19722_20131106_001542_inLine +BABEL_OP2_302_19722_20131106_001542_outLine +BABEL_OP2_302_22280_20131214_220249_inLine +BABEL_OP2_302_22280_20131214_220249_outLine +BABEL_OP2_302_23505_20131113_214234_inLine +BABEL_OP2_302_23505_20131113_214234_outLine +BABEL_OP2_302_23505_20131113_215736_inLine +BABEL_OP2_302_23505_20131113_215736_outLine +BABEL_OP2_302_24323_20131207_212641_inLine +BABEL_OP2_302_24323_20131207_212641_outLine +BABEL_OP2_302_25085_20140216_161934_inLine +BABEL_OP2_302_25085_20140216_161934_outLine +BABEL_OP2_302_29135_20131031_201509_inLine +BABEL_OP2_302_29135_20131031_201509_outLine +BABEL_OP2_302_29416_20140125_222019_inLine +BABEL_OP2_302_29416_20140125_222019_outLine +BABEL_OP2_302_31490_20131120_230743_inLine +BABEL_OP2_302_31490_20131120_230743_outLine +BABEL_OP2_302_32287_20140316_185534_inLine +BABEL_OP2_302_32287_20140316_185534_outLine +BABEL_OP2_302_32301_20140108_212650_inLine +BABEL_OP2_302_32301_20140108_212650_outLine +BABEL_OP2_302_34197_20131203_173358_inLine +BABEL_OP2_302_34197_20131203_173358_outLine +BABEL_OP2_302_34477_20131205_030548_inLine +BABEL_OP2_302_34477_20131205_030548_outLine +BABEL_OP2_302_34477_20131205_035623_inLine +BABEL_OP2_302_34477_20131205_035623_outLine +BABEL_OP2_302_37598_20131218_200535_inLine +BABEL_OP2_302_37598_20131218_200535_outLine +BABEL_OP2_302_38588_20131216_211052_inLine +BABEL_OP2_302_38588_20131216_211052_outLine +BABEL_OP2_302_39744_20131031_182731_inLine +BABEL_OP2_302_39744_20131031_182731_outLine +BABEL_OP2_302_41233_20140111_195838_inLine +BABEL_OP2_302_41233_20140111_195838_outLine +BABEL_OP2_302_43646_20131204_185430_inLine +BABEL_OP2_302_43646_20131204_185430_outLine +BABEL_OP2_302_43920_20140312_031242_inLine +BABEL_OP2_302_43920_20140312_031242_outLine +BABEL_OP2_302_44619_20131212_234348_inLine +BABEL_OP2_302_44619_20131212_234348_outLine +BABEL_OP2_302_46763_20140225_183302_inLine +BABEL_OP2_302_46763_20140225_183302_outLine +BABEL_OP2_302_48243_20131128_221311_inLine +BABEL_OP2_302_48243_20131128_221311_outLine +BABEL_OP2_302_49912_20140217_201647_inLine +BABEL_OP2_302_49912_20140217_201647_outLine +BABEL_OP2_302_50779_20131219_172746_inLine +BABEL_OP2_302_50779_20131219_172746_outLine +BABEL_OP2_302_53492_20140124_221354_inLine +BABEL_OP2_302_53492_20140124_221354_outLine +BABEL_OP2_302_53492_20140124_231722_inLine +BABEL_OP2_302_53492_20140124_231722_outLine +BABEL_OP2_302_56306_20140115_190808_inLine +BABEL_OP2_302_56306_20140115_190808_outLine +BABEL_OP2_302_58850_20131209_231304_inLine +BABEL_OP2_302_58850_20131209_231304_outLine +BABEL_OP2_302_61888_20140127_161005_inLine +BABEL_OP2_302_61888_20140127_161005_outLine +BABEL_OP2_302_70386_20131203_030837_inLine +BABEL_OP2_302_70386_20131203_030837_outLine +BABEL_OP2_302_70452_20131219_032729_inLine +BABEL_OP2_302_70452_20131219_032729_outLine +BABEL_OP2_302_71038_20140119_172132_inLine +BABEL_OP2_302_71038_20140119_172132_outLine +BABEL_OP2_302_71067_20140130_194954_inLine +BABEL_OP2_302_71067_20140130_194954_outLine +BABEL_OP2_302_75223_20131130_211714_inLine +BABEL_OP2_302_75223_20131130_211714_outLine +BABEL_OP2_302_75223_20131130_212825_inLine +BABEL_OP2_302_75223_20131130_212825_outLine +BABEL_OP2_302_77126_20131111_012344_inLine +BABEL_OP2_302_77126_20131111_012344_outLine +BABEL_OP2_302_77242_20140217_184823_inLine +BABEL_OP2_302_77242_20140217_184823_outLine +BABEL_OP2_302_79898_20140310_200258_inLine +BABEL_OP2_302_79898_20140310_200258_outLine +BABEL_OP2_302_80781_20131207_183741_inLine +BABEL_OP2_302_80781_20131207_183741_outLine +BABEL_OP2_302_81213_20131118_175514_inLine +BABEL_OP2_302_81213_20131118_175514_outLine +BABEL_OP2_302_82138_20131206_045140_inLine +BABEL_OP2_302_82138_20131206_045140_outLine +BABEL_OP2_302_82145_20140301_225354_inLine +BABEL_OP2_302_82145_20140301_225354_outLine +BABEL_OP2_302_82224_20140203_014024_inLine +BABEL_OP2_302_82224_20140203_014024_outLine +BABEL_OP2_302_83436_20131106_170059_inLine +BABEL_OP2_302_83436_20131106_170059_outLine +BABEL_OP2_302_84408_20131207_204020_inLine +BABEL_OP2_302_84408_20131207_204020_outLine +BABEL_OP2_302_85010_20140316_222754_inLine +BABEL_OP2_302_85010_20140316_222754_outLine +BABEL_OP2_302_87298_20140130_191447_inLine +BABEL_OP2_302_87298_20140130_191447_outLine +BABEL_OP2_302_87693_20131121_041057_inLine +BABEL_OP2_302_87693_20131121_041057_outLine +BABEL_OP2_302_94803_20140313_225823_inLine +BABEL_OP2_302_94803_20140313_225823_outLine +BABEL_OP2_302_95598_20131101_172634_inLine +BABEL_OP2_302_95598_20131101_172634_outLine +BABEL_OP2_302_95598_20131101_175037_inLine +BABEL_OP2_302_95598_20131101_175037_outLine +BABEL_OP2_302_95903_20140303_002203_inLine +BABEL_OP2_302_95903_20140303_002203_outLine +BABEL_OP2_302_97731_20140114_201001_inLine +BABEL_OP2_302_97731_20140114_201001_outLine +BABEL_OP2_302_97772_20131107_223232_inLine +BABEL_OP2_302_97772_20131107_223232_outLine +BABEL_OP2_302_98489_20131204_181216_inLine +BABEL_OP2_302_98489_20131204_181216_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list new file mode 100644 index 00000000000..668576c2888 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list @@ -0,0 +1,398 @@ +BABEL_OP2_302_10036_20131223_231808_inLine +BABEL_OP2_302_10036_20131223_231808_outLine +BABEL_OP2_302_10313_20140319_000910_inLine +BABEL_OP2_302_10313_20140319_000910_outLine +BABEL_OP2_302_10938_20140110_231500_inLine +BABEL_OP2_302_10938_20140110_231500_outLine +BABEL_OP2_302_10966_20131201_171745_inLine +BABEL_OP2_302_10966_20131201_171745_outLine +BABEL_OP2_302_11486_20140327_014542_inLine +BABEL_OP2_302_11486_20140327_014542_outLine +BABEL_OP2_302_11528_20140313_172050_inLine +BABEL_OP2_302_11528_20140313_172050_outLine +BABEL_OP2_302_11581_20131224_173459_inLine +BABEL_OP2_302_11581_20131224_173459_outLine +BABEL_OP2_302_11797_20131123_210739_inLine +BABEL_OP2_302_11797_20131123_210739_outLine +BABEL_OP2_302_12220_20131208_170136_inLine +BABEL_OP2_302_12220_20131208_170136_outLine +BABEL_OP2_302_12606_20140203_201955_inLine +BABEL_OP2_302_12606_20140203_201955_outLine +BABEL_OP2_302_12609_20140213_010711_inLine +BABEL_OP2_302_12609_20140213_010711_outLine +BABEL_OP2_302_12767_20131109_202610_inLine +BABEL_OP2_302_12767_20131109_202610_outLine +BABEL_OP2_302_12846_20140216_173021_inLine +BABEL_OP2_302_12846_20140216_173021_outLine +BABEL_OP2_302_12851_20131030_220616_inLine +BABEL_OP2_302_12851_20131030_220616_outLine +BABEL_OP2_302_13664_20131030_032900_inLine +BABEL_OP2_302_13664_20131030_032900_outLine +BABEL_OP2_302_13709_20140126_163818_inLine +BABEL_OP2_302_13709_20140126_163818_outLine +BABEL_OP2_302_14725_20131106_204535_inLine +BABEL_OP2_302_14725_20131106_204535_outLine +BABEL_OP2_302_14807_20131220_203507_inLine +BABEL_OP2_302_14807_20131220_203507_outLine +BABEL_OP2_302_14814_20131206_165156_inLine +BABEL_OP2_302_14814_20131206_165156_outLine +BABEL_OP2_302_14899_20131101_223556_inLine +BABEL_OP2_302_14899_20131101_223556_outLine +BABEL_OP2_302_14972_20131220_203939_inLine +BABEL_OP2_302_14972_20131220_203939_outLine +BABEL_OP2_302_15535_20131227_221937_inLine +BABEL_OP2_302_15535_20131227_221937_outLine +BABEL_OP2_302_15617_20140312_215158_inLine +BABEL_OP2_302_15617_20140312_215158_outLine +BABEL_OP2_302_15730_20131121_044516_inLine +BABEL_OP2_302_15730_20131121_044516_outLine +BABEL_OP2_302_16839_20140203_151410_inLine +BABEL_OP2_302_16839_20140203_151410_outLine +BABEL_OP2_302_17032_20140108_211239_inLine +BABEL_OP2_302_17032_20140108_211239_outLine +BABEL_OP2_302_17097_20140310_234246_inLine +BABEL_OP2_302_17097_20140310_234246_outLine +BABEL_OP2_302_17280_20131214_140641_inLine +BABEL_OP2_302_17280_20131214_140641_outLine +BABEL_OP2_302_17320_20140203_165125_inLine +BABEL_OP2_302_17320_20140203_165125_outLine +BABEL_OP2_302_18078_20140219_195739_inLine +BABEL_OP2_302_18078_20140219_195739_outLine +BABEL_OP2_302_18242_20140219_185647_inLine +BABEL_OP2_302_18242_20140219_185647_outLine +BABEL_OP2_302_19773_20140113_201049_inLine +BABEL_OP2_302_19773_20140113_201049_outLine +BABEL_OP2_302_20133_20131225_003913_inLine +BABEL_OP2_302_20133_20131225_003913_outLine +BABEL_OP2_302_20367_20140220_000514_inLine +BABEL_OP2_302_20367_20140220_000514_outLine +BABEL_OP2_302_20437_20140317_015757_inLine +BABEL_OP2_302_20437_20140317_015757_outLine +BABEL_OP2_302_20916_20131031_232512_inLine +BABEL_OP2_302_20916_20131031_232512_outLine +BABEL_OP2_302_20922_20140115_174224_inLine +BABEL_OP2_302_20922_20140115_174224_outLine +BABEL_OP2_302_20972_20140301_200910_inLine +BABEL_OP2_302_20972_20140301_200910_outLine +BABEL_OP2_302_20985_20131227_225613_inLine +BABEL_OP2_302_20985_20131227_225613_outLine +BABEL_OP2_302_20985_20131227_230755_inLine +BABEL_OP2_302_20985_20131227_230755_outLine +BABEL_OP2_302_21206_20131209_212818_inLine +BABEL_OP2_302_21206_20131209_212818_outLine +BABEL_OP2_302_21206_20131209_214221_inLine +BABEL_OP2_302_21206_20131209_214221_outLine +BABEL_OP2_302_21435_20140201_181751_inLine +BABEL_OP2_302_21435_20140201_181751_outLine +BABEL_OP2_302_21624_20140302_191929_inLine +BABEL_OP2_302_21624_20140302_191929_outLine +BABEL_OP2_302_21807_20131215_163416_inLine +BABEL_OP2_302_21807_20131215_163416_outLine +BABEL_OP2_302_22321_20131204_001445_inLine +BABEL_OP2_302_22321_20131204_001445_outLine +BABEL_OP2_302_22321_20131204_002854_inLine +BABEL_OP2_302_22321_20131204_002854_outLine +BABEL_OP2_302_22446_20131107_221527_inLine +BABEL_OP2_302_22446_20131107_221527_outLine +BABEL_OP2_302_23239_20131227_213345_inLine +BABEL_OP2_302_23239_20131227_213345_outLine +BABEL_OP2_302_23239_20131227_214733_inLine +BABEL_OP2_302_23239_20131227_214733_outLine +BABEL_OP2_302_23980_20131206_213027_inLine +BABEL_OP2_302_23980_20131206_213027_outLine +BABEL_OP2_302_24239_20140314_185042_inLine +BABEL_OP2_302_24239_20140314_185042_outLine +BABEL_OP2_302_24241_20140312_211507_inLine +BABEL_OP2_302_24241_20140312_211507_outLine +BABEL_OP2_302_24270_20131218_184807_inLine +BABEL_OP2_302_24270_20131218_184807_outLine +BABEL_OP2_302_24586_20140301_162559_inLine +BABEL_OP2_302_24586_20140301_162559_outLine +BABEL_OP2_302_24605_20131109_160432_inLine +BABEL_OP2_302_24605_20131109_160432_outLine +BABEL_OP2_302_24648_20140313_194015_inLine +BABEL_OP2_302_24648_20140313_194015_outLine +BABEL_OP2_302_24679_20131101_171953_inLine +BABEL_OP2_302_24679_20131101_171953_outLine +BABEL_OP2_302_24679_20131101_173035_inLine +BABEL_OP2_302_24679_20131101_173035_outLine +BABEL_OP2_302_24982_20131128_202029_inLine +BABEL_OP2_302_24982_20131128_202029_outLine +BABEL_OP2_302_24982_20131128_202537_inLine +BABEL_OP2_302_24982_20131128_202537_outLine +BABEL_OP2_302_24982_20131128_203436_inLine +BABEL_OP2_302_24982_20131128_203436_outLine +BABEL_OP2_302_25496_20140228_212748_inLine +BABEL_OP2_302_25496_20140228_212748_outLine +BABEL_OP2_302_25767_20131108_203252_inLine +BABEL_OP2_302_25767_20131108_203252_outLine +BABEL_OP2_302_25767_20131108_205755_inLine +BABEL_OP2_302_25767_20131108_205755_outLine +BABEL_OP2_302_25961_20131122_214450_inLine +BABEL_OP2_302_25961_20131122_214450_outLine +BABEL_OP2_302_26388_20140203_173156_inLine +BABEL_OP2_302_26388_20140203_173156_outLine +BABEL_OP2_302_26836_20131207_194346_inLine +BABEL_OP2_302_26836_20131207_194346_outLine +BABEL_OP2_302_27367_20140317_000858_inLine +BABEL_OP2_302_27367_20140317_000858_outLine +BABEL_OP2_302_28012_20140115_155940_inLine +BABEL_OP2_302_28012_20140115_155940_outLine +BABEL_OP2_302_28477_20140127_173004_inLine +BABEL_OP2_302_28477_20140127_173004_outLine +BABEL_OP2_302_28595_20140312_200036_inLine +BABEL_OP2_302_28595_20140312_200036_outLine +BABEL_OP2_302_28814_20140115_202820_inLine +BABEL_OP2_302_28814_20140115_202820_outLine +BABEL_OP2_302_29072_20131224_215008_inLine +BABEL_OP2_302_29072_20131224_215008_outLine +BABEL_OP2_302_29439_20140226_160155_inLine +BABEL_OP2_302_29439_20140226_160155_outLine +BABEL_OP2_302_30013_20140111_202103_inLine +BABEL_OP2_302_30013_20140111_202103_outLine +BABEL_OP2_302_30345_20140113_154634_inLine +BABEL_OP2_302_30345_20140113_154634_outLine +BABEL_OP2_302_30461_20140305_205327_inLine +BABEL_OP2_302_30461_20140305_205327_outLine +BABEL_OP2_302_30720_20140312_002038_inLine +BABEL_OP2_302_30720_20140312_002038_outLine +BABEL_OP2_302_31267_20140221_194733_inLine +BABEL_OP2_302_31267_20140221_194733_outLine +BABEL_OP2_302_32097_20131106_232714_inLine +BABEL_OP2_302_32097_20131106_232714_outLine +BABEL_OP2_302_32097_20131106_233937_inLine +BABEL_OP2_302_32097_20131106_233937_outLine +BABEL_OP2_302_34106_20131118_201548_inLine +BABEL_OP2_302_34106_20131118_201548_outLine +BABEL_OP2_302_34486_20140313_003302_inLine +BABEL_OP2_302_34486_20140313_003302_outLine +BABEL_OP2_302_34811_20131210_202739_inLine +BABEL_OP2_302_34811_20131210_202739_outLine +BABEL_OP2_302_34826_20140127_205243_inLine +BABEL_OP2_302_34826_20140127_205243_outLine +BABEL_OP2_302_35000_20140126_011711_inLine +BABEL_OP2_302_35000_20140126_011711_outLine +BABEL_OP2_302_35139_20131117_174341_inLine +BABEL_OP2_302_35139_20131117_174341_outLine +BABEL_OP2_302_36894_20131113_172242_inLine +BABEL_OP2_302_36894_20131113_172242_outLine +BABEL_OP2_302_37271_20131228_201109_inLine +BABEL_OP2_302_37271_20131228_201109_outLine +BABEL_OP2_302_37682_20131218_170241_inLine +BABEL_OP2_302_37682_20131218_170241_outLine +BABEL_OP2_302_39006_20140220_200207_inLine +BABEL_OP2_302_39006_20140220_200207_outLine +BABEL_OP2_302_39555_20140110_211809_inLine +BABEL_OP2_302_39555_20140110_211809_outLine +BABEL_OP2_302_39848_20131210_214951_inLine +BABEL_OP2_302_39848_20131210_214951_outLine +BABEL_OP2_302_41680_20131031_034941_inLine +BABEL_OP2_302_41680_20131031_034941_outLine +BABEL_OP2_302_42526_20140119_151507_inLine +BABEL_OP2_302_42526_20140119_151507_outLine +BABEL_OP2_302_42771_20131210_163330_inLine +BABEL_OP2_302_42771_20131210_163330_outLine +BABEL_OP2_302_43286_20131105_180949_inLine +BABEL_OP2_302_43286_20131105_180949_outLine +BABEL_OP2_302_43784_20131128_211932_inLine +BABEL_OP2_302_43784_20131128_211932_outLine +BABEL_OP2_302_43788_20131225_201206_inLine +BABEL_OP2_302_43788_20131225_201206_outLine +BABEL_OP2_302_44961_20131111_223956_inLine +BABEL_OP2_302_44961_20131111_223956_outLine +BABEL_OP2_302_45770_20131107_181651_inLine +BABEL_OP2_302_45770_20131107_181651_outLine +BABEL_OP2_302_46268_20131113_230054_inLine +BABEL_OP2_302_46268_20131113_230054_outLine +BABEL_OP2_302_46558_20131204_011205_inLine +BABEL_OP2_302_46558_20131204_011205_outLine +BABEL_OP2_302_46688_20131114_195414_inLine +BABEL_OP2_302_46688_20131114_195414_outLine +BABEL_OP2_302_46757_20131227_210756_inLine +BABEL_OP2_302_46757_20131227_210756_outLine +BABEL_OP2_302_46976_20131212_235226_inLine +BABEL_OP2_302_46976_20131212_235226_outLine +BABEL_OP2_302_47866_20140122_200544_inLine +BABEL_OP2_302_47866_20140122_200544_outLine +BABEL_OP2_302_48610_20131112_232839_inLine +BABEL_OP2_302_48610_20131112_232839_outLine +BABEL_OP2_302_50549_20140130_172816_inLine +BABEL_OP2_302_50549_20140130_172816_outLine +BABEL_OP2_302_50810_20131031_180316_inLine +BABEL_OP2_302_50810_20131031_180316_outLine +BABEL_OP2_302_51955_20131117_200909_inLine +BABEL_OP2_302_51955_20131117_200909_outLine +BABEL_OP2_302_52272_20131106_175931_inLine +BABEL_OP2_302_52272_20131106_175931_outLine +BABEL_OP2_302_52381_20140118_163935_inLine +BABEL_OP2_302_52381_20140118_163935_outLine +BABEL_OP2_302_52404_20131226_172656_inLine +BABEL_OP2_302_52404_20131226_172656_outLine +BABEL_OP2_302_53063_20140219_175252_inLine +BABEL_OP2_302_53063_20140219_175252_outLine +BABEL_OP2_302_54074_20131213_143818_inLine +BABEL_OP2_302_54074_20131213_143818_outLine +BABEL_OP2_302_54104_20131108_172927_inLine +BABEL_OP2_302_54104_20131108_172927_outLine +BABEL_OP2_302_54697_20140125_210815_inLine +BABEL_OP2_302_54697_20140125_210815_outLine +BABEL_OP2_302_54953_20131207_170139_inLine +BABEL_OP2_302_54953_20131207_170139_outLine +BABEL_OP2_302_55106_20140226_210229_inLine +BABEL_OP2_302_55106_20140226_210229_outLine +BABEL_OP2_302_57065_20140105_155451_inLine +BABEL_OP2_302_57065_20140105_155451_outLine +BABEL_OP2_302_57548_20131220_025554_inLine +BABEL_OP2_302_57548_20131220_025554_outLine +BABEL_OP2_302_57566_20140129_021108_inLine +BABEL_OP2_302_57566_20140129_021108_outLine +BABEL_OP2_302_58926_20131121_050015_inLine +BABEL_OP2_302_58926_20131121_050015_outLine +BABEL_OP2_302_59509_20140203_215611_inLine +BABEL_OP2_302_59509_20140203_215611_outLine +BABEL_OP2_302_59635_20140124_183551_inLine +BABEL_OP2_302_59635_20140124_183551_outLine +BABEL_OP2_302_59720_20131206_145023_inLine +BABEL_OP2_302_59720_20131206_145023_outLine +BABEL_OP2_302_59747_20131104_164054_inLine +BABEL_OP2_302_59747_20131104_164054_outLine +BABEL_OP2_302_60299_20140220_162147_inLine +BABEL_OP2_302_60299_20140220_162147_outLine +BABEL_OP2_302_63307_20131213_180556_inLine +BABEL_OP2_302_63307_20131213_180556_outLine +BABEL_OP2_302_63999_20140214_214838_inLine +BABEL_OP2_302_63999_20140214_214838_outLine +BABEL_OP2_302_64870_20131226_033837_inLine +BABEL_OP2_302_64870_20131226_033837_outLine +BABEL_OP2_302_66001_20131114_212059_inLine +BABEL_OP2_302_66001_20131114_212059_outLine +BABEL_OP2_302_66822_20131207_210025_inLine +BABEL_OP2_302_66822_20131207_210025_outLine +BABEL_OP2_302_67401_20131220_213115_inLine +BABEL_OP2_302_67401_20131220_213115_outLine +BABEL_OP2_302_67622_20131109_180009_inLine +BABEL_OP2_302_67622_20131109_180009_outLine +BABEL_OP2_302_67659_20131130_221743_inLine +BABEL_OP2_302_67659_20131130_221743_outLine +BABEL_OP2_302_67773_20140318_210730_inLine +BABEL_OP2_302_67773_20140318_210730_outLine +BABEL_OP2_302_68244_20140131_200512_inLine +BABEL_OP2_302_68244_20140131_200512_outLine +BABEL_OP2_302_68289_20140126_231552_inLine +BABEL_OP2_302_68289_20140126_231552_outLine +BABEL_OP2_302_68924_20140130_230433_inLine +BABEL_OP2_302_68924_20140130_230433_outLine +BABEL_OP2_302_69578_20131207_134320_inLine +BABEL_OP2_302_69578_20131207_134320_outLine +BABEL_OP2_302_69746_20140118_173307_inLine +BABEL_OP2_302_69746_20140118_173307_outLine +BABEL_OP2_302_70121_20131209_204053_inLine +BABEL_OP2_302_70121_20131209_204053_outLine +BABEL_OP2_302_70251_20131109_191513_inLine +BABEL_OP2_302_70251_20131109_191513_outLine +BABEL_OP2_302_70460_20140222_223545_inLine +BABEL_OP2_302_70460_20140222_223545_outLine +BABEL_OP2_302_70713_20140126_171356_inLine +BABEL_OP2_302_70713_20140126_171356_outLine +BABEL_OP2_302_70794_20131107_185831_inLine +BABEL_OP2_302_70794_20131107_185831_outLine +BABEL_OP2_302_72952_20140215_000239_inLine +BABEL_OP2_302_72952_20140215_000239_outLine +BABEL_OP2_302_73518_20140123_190347_inLine +BABEL_OP2_302_73518_20140123_190347_outLine +BABEL_OP2_302_74921_20131225_190044_inLine +BABEL_OP2_302_74921_20131225_190044_outLine +BABEL_OP2_302_75505_20131104_234450_inLine +BABEL_OP2_302_75505_20131104_234450_outLine +BABEL_OP2_302_75505_20131104_235752_inLine +BABEL_OP2_302_75505_20131104_235752_outLine +BABEL_OP2_302_76756_20131219_183439_inLine +BABEL_OP2_302_76756_20131219_183439_outLine +BABEL_OP2_302_77033_20140127_183412_inLine +BABEL_OP2_302_77033_20140127_183412_outLine +BABEL_OP2_302_77990_20131117_222127_inLine +BABEL_OP2_302_77990_20131117_222127_outLine +BABEL_OP2_302_78398_20131116_213051_inLine +BABEL_OP2_302_78398_20131116_213051_outLine +BABEL_OP2_302_78943_20131206_035541_inLine +BABEL_OP2_302_78943_20131206_035541_outLine +BABEL_OP2_302_78943_20131206_042746_inLine +BABEL_OP2_302_78943_20131206_042746_outLine +BABEL_OP2_302_79995_20140125_225240_inLine +BABEL_OP2_302_79995_20140125_225240_outLine +BABEL_OP2_302_80306_20131206_235538_inLine +BABEL_OP2_302_80306_20131206_235538_outLine +BABEL_OP2_302_81287_20140115_190105_inLine +BABEL_OP2_302_81287_20140115_190105_outLine +BABEL_OP2_302_81671_20140131_220121_inLine +BABEL_OP2_302_81671_20140131_220121_outLine +BABEL_OP2_302_82089_20131206_191938_inLine +BABEL_OP2_302_82089_20131206_191938_outLine +BABEL_OP2_302_82425_20131115_005742_inLine +BABEL_OP2_302_82425_20131115_005742_outLine +BABEL_OP2_302_82935_20140114_204802_inLine +BABEL_OP2_302_82935_20140114_204802_outLine +BABEL_OP2_302_83430_20140315_203750_inLine +BABEL_OP2_302_83430_20140315_203750_outLine +BABEL_OP2_302_83455_20131208_201956_inLine +BABEL_OP2_302_83455_20131208_201956_outLine +BABEL_OP2_302_84469_20140107_205046_inLine +BABEL_OP2_302_84469_20140107_205046_outLine +BABEL_OP2_302_84715_20140127_201518_inLine +BABEL_OP2_302_84715_20140127_201518_outLine +BABEL_OP2_302_84936_20140108_204108_inLine +BABEL_OP2_302_84936_20140108_204108_outLine +BABEL_OP2_302_86628_20140215_171431_inLine +BABEL_OP2_302_86628_20140215_171431_outLine +BABEL_OP2_302_86715_20140215_174540_inLine +BABEL_OP2_302_86715_20140215_174540_outLine +BABEL_OP2_302_87305_20140214_225515_inLine +BABEL_OP2_302_87305_20140214_225515_outLine +BABEL_OP2_302_87777_20140127_145958_inLine +BABEL_OP2_302_87777_20140127_145958_outLine +BABEL_OP2_302_88661_20131225_211835_inLine +BABEL_OP2_302_88661_20131225_211835_outLine +BABEL_OP2_302_88938_20140202_215623_inLine +BABEL_OP2_302_88938_20140202_215623_outLine +BABEL_OP2_302_89059_20140115_214308_inLine +BABEL_OP2_302_89059_20140115_214308_outLine +BABEL_OP2_302_90709_20131111_143953_inLine +BABEL_OP2_302_90709_20131111_143953_outLine +BABEL_OP2_302_92557_20140125_230505_inLine +BABEL_OP2_302_92557_20140125_230505_outLine +BABEL_OP2_302_92736_20140119_170328_inLine +BABEL_OP2_302_92736_20140119_170328_outLine +BABEL_OP2_302_92809_20131109_182045_inLine +BABEL_OP2_302_92809_20131109_182045_outLine +BABEL_OP2_302_92942_20131219_014744_inLine +BABEL_OP2_302_92942_20131219_014744_outLine +BABEL_OP2_302_93632_20140203_154221_inLine +BABEL_OP2_302_93632_20140203_154221_outLine +BABEL_OP2_302_93964_20131208_002934_inLine +BABEL_OP2_302_93964_20131208_002934_outLine +BABEL_OP2_302_94253_20131114_215945_inLine +BABEL_OP2_302_94253_20131114_215945_outLine +BABEL_OP2_302_94449_20140314_185933_inLine +BABEL_OP2_302_94449_20140314_185933_outLine +BABEL_OP2_302_95670_20131130_185901_inLine +BABEL_OP2_302_95670_20131130_185901_outLine +BABEL_OP2_302_96525_20140110_201817_inLine +BABEL_OP2_302_96525_20140110_201817_outLine +BABEL_OP2_302_96690_20131204_221739_inLine +BABEL_OP2_302_96690_20131204_221739_outLine +BABEL_OP2_302_96910_20140130_210316_inLine +BABEL_OP2_302_96910_20140130_210316_outLine +BABEL_OP2_302_97461_20131211_211339_inLine +BABEL_OP2_302_97461_20131211_211339_outLine +BABEL_OP2_302_97557_20131219_192714_inLine +BABEL_OP2_302_97557_20131219_192714_outLine +BABEL_OP2_302_97588_20131101_161834_inLine +BABEL_OP2_302_97588_20131101_161834_outLine +BABEL_OP2_302_97588_20131101_163947_inLine +BABEL_OP2_302_97588_20131101_163947_outLine +BABEL_OP2_302_98909_20131117_153948_inLine +BABEL_OP2_302_98909_20131117_153948_outLine +BABEL_OP2_302_99401_20131114_221114_inLine +BABEL_OP2_302_99401_20131114_221114_outLine +BABEL_OP2_302_99887_20140129_162421_inLine +BABEL_OP2_302_99887_20140129_162421_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/training.list b/egs/babel/s5d/conf/lists/302-kazakh/training.list new file mode 100644 index 00000000000..c2026850026 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/training.list @@ -0,0 +1,528 @@ +BABEL_OP2_302_10036_20131223_231808_inLine +BABEL_OP2_302_10036_20131223_231808_outLine +BABEL_OP2_302_10313_20140319_000910_inLine +BABEL_OP2_302_10313_20140319_000910_outLine +BABEL_OP2_302_10938_20140110_231500_inLine +BABEL_OP2_302_10938_20140110_231500_outLine +BABEL_OP2_302_10966_20131201_171745_inLine +BABEL_OP2_302_10966_20131201_171745_outLine +BABEL_OP2_302_11486_20140327_014542_inLine +BABEL_OP2_302_11486_20140327_014542_outLine +BABEL_OP2_302_11528_20140313_172050_inLine +BABEL_OP2_302_11528_20140313_172050_outLine +BABEL_OP2_302_11581_20131224_173459_inLine +BABEL_OP2_302_11581_20131224_173459_outLine +BABEL_OP2_302_11797_20131123_210739_inLine +BABEL_OP2_302_11797_20131123_210739_outLine +BABEL_OP2_302_12220_20131208_170136_inLine +BABEL_OP2_302_12220_20131208_170136_outLine +BABEL_OP2_302_12606_20140203_201955_inLine +BABEL_OP2_302_12606_20140203_201955_outLine +BABEL_OP2_302_12609_20140213_010711_inLine +BABEL_OP2_302_12609_20140213_010711_outLine +BABEL_OP2_302_12767_20131109_202610_inLine +BABEL_OP2_302_12767_20131109_202610_outLine +BABEL_OP2_302_12846_20140216_173021_inLine +BABEL_OP2_302_12846_20140216_173021_outLine +BABEL_OP2_302_12851_20131030_220616_inLine +BABEL_OP2_302_12851_20131030_220616_outLine +BABEL_OP2_302_13483_20140111_145619_inLine +BABEL_OP2_302_13483_20140111_145619_outLine +BABEL_OP2_302_13664_20131030_032900_inLine +BABEL_OP2_302_13664_20131030_032900_outLine +BABEL_OP2_302_13709_20140126_163818_inLine +BABEL_OP2_302_13709_20140126_163818_outLine +BABEL_OP2_302_13792_20131105_160713_inLine +BABEL_OP2_302_13792_20131105_160713_outLine +BABEL_OP2_302_14137_20131205_201718_inLine +BABEL_OP2_302_14137_20131205_201718_outLine +BABEL_OP2_302_14725_20131106_204535_inLine +BABEL_OP2_302_14725_20131106_204535_outLine +BABEL_OP2_302_14807_20131220_203507_inLine +BABEL_OP2_302_14807_20131220_203507_outLine +BABEL_OP2_302_14814_20131206_165156_inLine +BABEL_OP2_302_14814_20131206_165156_outLine +BABEL_OP2_302_14899_20131101_223556_inLine +BABEL_OP2_302_14899_20131101_223556_outLine +BABEL_OP2_302_14972_20131220_203939_inLine +BABEL_OP2_302_14972_20131220_203939_outLine +BABEL_OP2_302_15535_20131227_221937_inLine +BABEL_OP2_302_15535_20131227_221937_outLine +BABEL_OP2_302_15617_20140312_215158_inLine +BABEL_OP2_302_15617_20140312_215158_outLine +BABEL_OP2_302_15638_20131227_190456_inLine +BABEL_OP2_302_15638_20131227_190456_outLine +BABEL_OP2_302_15730_20131121_044516_inLine +BABEL_OP2_302_15730_20131121_044516_outLine +BABEL_OP2_302_16467_20140125_193127_inLine +BABEL_OP2_302_16467_20140125_193127_outLine +BABEL_OP2_302_16839_20140203_151410_inLine +BABEL_OP2_302_16839_20140203_151410_outLine +BABEL_OP2_302_16886_20131209_211339_inLine +BABEL_OP2_302_16886_20131209_211339_outLine +BABEL_OP2_302_17032_20140108_211239_inLine +BABEL_OP2_302_17032_20140108_211239_outLine +BABEL_OP2_302_17097_20140310_234246_inLine +BABEL_OP2_302_17097_20140310_234246_outLine +BABEL_OP2_302_17113_20140216_165407_inLine +BABEL_OP2_302_17113_20140216_165407_outLine +BABEL_OP2_302_17280_20131214_140641_inLine +BABEL_OP2_302_17280_20131214_140641_outLine +BABEL_OP2_302_17320_20140203_165125_inLine +BABEL_OP2_302_17320_20140203_165125_outLine +BABEL_OP2_302_17567_20131227_223417_inLine +BABEL_OP2_302_17567_20131227_223417_outLine +BABEL_OP2_302_18078_20140219_195739_inLine +BABEL_OP2_302_18078_20140219_195739_outLine +BABEL_OP2_302_18118_20140312_010735_inLine +BABEL_OP2_302_18118_20140312_010735_outLine +BABEL_OP2_302_18242_20140219_185647_inLine +BABEL_OP2_302_18242_20140219_185647_outLine +BABEL_OP2_302_19722_20131106_001542_inLine +BABEL_OP2_302_19722_20131106_001542_outLine +BABEL_OP2_302_19773_20140113_201049_inLine +BABEL_OP2_302_19773_20140113_201049_outLine +BABEL_OP2_302_20133_20131225_003913_inLine +BABEL_OP2_302_20133_20131225_003913_outLine +BABEL_OP2_302_20367_20140220_000514_inLine +BABEL_OP2_302_20367_20140220_000514_outLine +BABEL_OP2_302_20437_20140317_015757_inLine +BABEL_OP2_302_20437_20140317_015757_outLine +BABEL_OP2_302_20916_20131031_232512_inLine +BABEL_OP2_302_20916_20131031_232512_outLine +BABEL_OP2_302_20922_20140115_174224_inLine +BABEL_OP2_302_20922_20140115_174224_outLine +BABEL_OP2_302_20972_20140301_200910_inLine +BABEL_OP2_302_20972_20140301_200910_outLine +BABEL_OP2_302_20985_20131227_225613_inLine +BABEL_OP2_302_20985_20131227_225613_outLine +BABEL_OP2_302_20985_20131227_230755_inLine +BABEL_OP2_302_20985_20131227_230755_outLine +BABEL_OP2_302_21206_20131209_212818_inLine +BABEL_OP2_302_21206_20131209_212818_outLine +BABEL_OP2_302_21206_20131209_214221_inLine +BABEL_OP2_302_21206_20131209_214221_outLine +BABEL_OP2_302_21435_20140201_181751_inLine +BABEL_OP2_302_21435_20140201_181751_outLine +BABEL_OP2_302_21624_20140302_191929_inLine +BABEL_OP2_302_21624_20140302_191929_outLine +BABEL_OP2_302_21807_20131215_163416_inLine +BABEL_OP2_302_21807_20131215_163416_outLine +BABEL_OP2_302_22280_20131214_220249_inLine +BABEL_OP2_302_22280_20131214_220249_outLine +BABEL_OP2_302_22321_20131204_001445_inLine +BABEL_OP2_302_22321_20131204_001445_outLine +BABEL_OP2_302_22321_20131204_002854_inLine +BABEL_OP2_302_22321_20131204_002854_outLine +BABEL_OP2_302_22446_20131107_221527_inLine +BABEL_OP2_302_22446_20131107_221527_outLine +BABEL_OP2_302_23239_20131227_213345_inLine +BABEL_OP2_302_23239_20131227_213345_outLine +BABEL_OP2_302_23239_20131227_214733_inLine +BABEL_OP2_302_23239_20131227_214733_outLine +BABEL_OP2_302_23505_20131113_214234_inLine +BABEL_OP2_302_23505_20131113_214234_outLine +BABEL_OP2_302_23505_20131113_215736_inLine +BABEL_OP2_302_23505_20131113_215736_outLine +BABEL_OP2_302_23980_20131206_213027_inLine +BABEL_OP2_302_23980_20131206_213027_outLine +BABEL_OP2_302_24239_20140314_185042_inLine +BABEL_OP2_302_24239_20140314_185042_outLine +BABEL_OP2_302_24241_20140312_211507_inLine +BABEL_OP2_302_24241_20140312_211507_outLine +BABEL_OP2_302_24270_20131218_184807_inLine +BABEL_OP2_302_24270_20131218_184807_outLine +BABEL_OP2_302_24323_20131207_212641_inLine +BABEL_OP2_302_24323_20131207_212641_outLine +BABEL_OP2_302_24586_20140301_162559_inLine +BABEL_OP2_302_24586_20140301_162559_outLine +BABEL_OP2_302_24605_20131109_160432_inLine +BABEL_OP2_302_24605_20131109_160432_outLine +BABEL_OP2_302_24648_20140313_194015_inLine +BABEL_OP2_302_24648_20140313_194015_outLine +BABEL_OP2_302_24679_20131101_171953_inLine +BABEL_OP2_302_24679_20131101_171953_outLine +BABEL_OP2_302_24679_20131101_173035_inLine +BABEL_OP2_302_24679_20131101_173035_outLine +BABEL_OP2_302_24982_20131128_202029_inLine +BABEL_OP2_302_24982_20131128_202029_outLine +BABEL_OP2_302_24982_20131128_202537_inLine +BABEL_OP2_302_24982_20131128_202537_outLine +BABEL_OP2_302_24982_20131128_203436_inLine +BABEL_OP2_302_24982_20131128_203436_outLine +BABEL_OP2_302_25085_20140216_161934_inLine +BABEL_OP2_302_25085_20140216_161934_outLine +BABEL_OP2_302_25496_20140228_212748_inLine +BABEL_OP2_302_25496_20140228_212748_outLine +BABEL_OP2_302_25767_20131108_203252_inLine +BABEL_OP2_302_25767_20131108_203252_outLine +BABEL_OP2_302_25767_20131108_205755_inLine +BABEL_OP2_302_25767_20131108_205755_outLine +BABEL_OP2_302_25961_20131122_214450_inLine +BABEL_OP2_302_25961_20131122_214450_outLine +BABEL_OP2_302_26388_20140203_173156_inLine +BABEL_OP2_302_26388_20140203_173156_outLine +BABEL_OP2_302_26836_20131207_194346_inLine +BABEL_OP2_302_26836_20131207_194346_outLine +BABEL_OP2_302_27367_20140317_000858_inLine +BABEL_OP2_302_27367_20140317_000858_outLine +BABEL_OP2_302_28012_20140115_155940_inLine +BABEL_OP2_302_28012_20140115_155940_outLine +BABEL_OP2_302_28477_20140127_173004_inLine +BABEL_OP2_302_28477_20140127_173004_outLine +BABEL_OP2_302_28595_20140312_200036_inLine +BABEL_OP2_302_28595_20140312_200036_outLine +BABEL_OP2_302_28814_20140115_202820_inLine +BABEL_OP2_302_28814_20140115_202820_outLine +BABEL_OP2_302_29072_20131224_215008_inLine +BABEL_OP2_302_29072_20131224_215008_outLine +BABEL_OP2_302_29135_20131031_201509_inLine +BABEL_OP2_302_29135_20131031_201509_outLine +BABEL_OP2_302_29416_20140125_222019_inLine +BABEL_OP2_302_29416_20140125_222019_outLine +BABEL_OP2_302_29439_20140226_160155_inLine +BABEL_OP2_302_29439_20140226_160155_outLine +BABEL_OP2_302_30013_20140111_202103_inLine +BABEL_OP2_302_30013_20140111_202103_outLine +BABEL_OP2_302_30345_20140113_154634_inLine +BABEL_OP2_302_30345_20140113_154634_outLine +BABEL_OP2_302_30461_20140305_205327_inLine +BABEL_OP2_302_30461_20140305_205327_outLine +BABEL_OP2_302_30720_20140312_002038_inLine +BABEL_OP2_302_30720_20140312_002038_outLine +BABEL_OP2_302_31267_20140221_194733_inLine +BABEL_OP2_302_31267_20140221_194733_outLine +BABEL_OP2_302_31490_20131120_230743_inLine +BABEL_OP2_302_31490_20131120_230743_outLine +BABEL_OP2_302_32097_20131106_232714_inLine +BABEL_OP2_302_32097_20131106_232714_outLine +BABEL_OP2_302_32097_20131106_233937_inLine +BABEL_OP2_302_32097_20131106_233937_outLine +BABEL_OP2_302_32287_20140316_185534_inLine +BABEL_OP2_302_32287_20140316_185534_outLine +BABEL_OP2_302_32301_20140108_212650_inLine +BABEL_OP2_302_32301_20140108_212650_outLine +BABEL_OP2_302_34106_20131118_201548_inLine +BABEL_OP2_302_34106_20131118_201548_outLine +BABEL_OP2_302_34197_20131203_173358_inLine +BABEL_OP2_302_34197_20131203_173358_outLine +BABEL_OP2_302_34477_20131205_030548_inLine +BABEL_OP2_302_34477_20131205_030548_outLine +BABEL_OP2_302_34477_20131205_035623_inLine +BABEL_OP2_302_34477_20131205_035623_outLine +BABEL_OP2_302_34486_20140313_003302_inLine +BABEL_OP2_302_34486_20140313_003302_outLine +BABEL_OP2_302_34811_20131210_202739_inLine +BABEL_OP2_302_34811_20131210_202739_outLine +BABEL_OP2_302_34826_20140127_205243_inLine +BABEL_OP2_302_34826_20140127_205243_outLine +BABEL_OP2_302_35000_20140126_011711_inLine +BABEL_OP2_302_35000_20140126_011711_outLine +BABEL_OP2_302_35139_20131117_174341_inLine +BABEL_OP2_302_35139_20131117_174341_outLine +BABEL_OP2_302_36894_20131113_172242_inLine +BABEL_OP2_302_36894_20131113_172242_outLine +BABEL_OP2_302_37271_20131228_201109_inLine +BABEL_OP2_302_37271_20131228_201109_outLine +BABEL_OP2_302_37598_20131218_200535_inLine +BABEL_OP2_302_37598_20131218_200535_outLine +BABEL_OP2_302_37682_20131218_170241_inLine +BABEL_OP2_302_37682_20131218_170241_outLine +BABEL_OP2_302_38588_20131216_211052_inLine +BABEL_OP2_302_38588_20131216_211052_outLine +BABEL_OP2_302_39006_20140220_200207_inLine +BABEL_OP2_302_39006_20140220_200207_outLine +BABEL_OP2_302_39555_20140110_211809_inLine +BABEL_OP2_302_39555_20140110_211809_outLine +BABEL_OP2_302_39744_20131031_182731_inLine +BABEL_OP2_302_39744_20131031_182731_outLine +BABEL_OP2_302_39848_20131210_214951_inLine +BABEL_OP2_302_39848_20131210_214951_outLine +BABEL_OP2_302_41233_20140111_195838_inLine +BABEL_OP2_302_41233_20140111_195838_outLine +BABEL_OP2_302_41680_20131031_034941_inLine +BABEL_OP2_302_41680_20131031_034941_outLine +BABEL_OP2_302_42526_20140119_151507_inLine +BABEL_OP2_302_42526_20140119_151507_outLine +BABEL_OP2_302_42771_20131210_163330_inLine +BABEL_OP2_302_42771_20131210_163330_outLine +BABEL_OP2_302_43286_20131105_180949_inLine +BABEL_OP2_302_43286_20131105_180949_outLine +BABEL_OP2_302_43646_20131204_185430_inLine +BABEL_OP2_302_43646_20131204_185430_outLine +BABEL_OP2_302_43784_20131128_211932_inLine +BABEL_OP2_302_43784_20131128_211932_outLine +BABEL_OP2_302_43788_20131225_201206_inLine +BABEL_OP2_302_43788_20131225_201206_outLine +BABEL_OP2_302_43920_20140312_031242_inLine +BABEL_OP2_302_43920_20140312_031242_outLine +BABEL_OP2_302_44619_20131212_234348_inLine +BABEL_OP2_302_44619_20131212_234348_outLine +BABEL_OP2_302_44961_20131111_223956_inLine +BABEL_OP2_302_44961_20131111_223956_outLine +BABEL_OP2_302_45770_20131107_181651_inLine +BABEL_OP2_302_45770_20131107_181651_outLine +BABEL_OP2_302_46268_20131113_230054_inLine +BABEL_OP2_302_46268_20131113_230054_outLine +BABEL_OP2_302_46558_20131204_011205_inLine +BABEL_OP2_302_46558_20131204_011205_outLine +BABEL_OP2_302_46688_20131114_195414_inLine +BABEL_OP2_302_46688_20131114_195414_outLine +BABEL_OP2_302_46757_20131227_210756_inLine +BABEL_OP2_302_46757_20131227_210756_outLine +BABEL_OP2_302_46763_20140225_183302_inLine +BABEL_OP2_302_46763_20140225_183302_outLine +BABEL_OP2_302_46976_20131212_235226_inLine +BABEL_OP2_302_46976_20131212_235226_outLine +BABEL_OP2_302_47866_20140122_200544_inLine +BABEL_OP2_302_47866_20140122_200544_outLine +BABEL_OP2_302_48243_20131128_221311_inLine +BABEL_OP2_302_48243_20131128_221311_outLine +BABEL_OP2_302_48610_20131112_232839_inLine +BABEL_OP2_302_48610_20131112_232839_outLine +BABEL_OP2_302_49912_20140217_201647_inLine +BABEL_OP2_302_49912_20140217_201647_outLine +BABEL_OP2_302_50549_20140130_172816_inLine +BABEL_OP2_302_50549_20140130_172816_outLine +BABEL_OP2_302_50779_20131219_172746_inLine +BABEL_OP2_302_50779_20131219_172746_outLine +BABEL_OP2_302_50810_20131031_180316_inLine +BABEL_OP2_302_50810_20131031_180316_outLine +BABEL_OP2_302_51955_20131117_200909_inLine +BABEL_OP2_302_51955_20131117_200909_outLine +BABEL_OP2_302_52272_20131106_175931_inLine +BABEL_OP2_302_52272_20131106_175931_outLine +BABEL_OP2_302_52381_20140118_163935_inLine +BABEL_OP2_302_52381_20140118_163935_outLine +BABEL_OP2_302_52404_20131226_172656_inLine +BABEL_OP2_302_52404_20131226_172656_outLine +BABEL_OP2_302_53063_20140219_175252_inLine +BABEL_OP2_302_53063_20140219_175252_outLine +BABEL_OP2_302_53492_20140124_221354_inLine +BABEL_OP2_302_53492_20140124_221354_outLine +BABEL_OP2_302_53492_20140124_231722_inLine +BABEL_OP2_302_53492_20140124_231722_outLine +BABEL_OP2_302_54074_20131213_143818_inLine +BABEL_OP2_302_54074_20131213_143818_outLine +BABEL_OP2_302_54104_20131108_172927_inLine +BABEL_OP2_302_54104_20131108_172927_outLine +BABEL_OP2_302_54697_20140125_210815_inLine +BABEL_OP2_302_54697_20140125_210815_outLine +BABEL_OP2_302_54953_20131207_170139_inLine +BABEL_OP2_302_54953_20131207_170139_outLine +BABEL_OP2_302_55106_20140226_210229_inLine +BABEL_OP2_302_55106_20140226_210229_outLine +BABEL_OP2_302_56306_20140115_190808_inLine +BABEL_OP2_302_56306_20140115_190808_outLine +BABEL_OP2_302_57065_20140105_155451_inLine +BABEL_OP2_302_57065_20140105_155451_outLine +BABEL_OP2_302_57548_20131220_025554_inLine +BABEL_OP2_302_57548_20131220_025554_outLine +BABEL_OP2_302_57566_20140129_021108_inLine +BABEL_OP2_302_57566_20140129_021108_outLine +BABEL_OP2_302_58850_20131209_231304_inLine +BABEL_OP2_302_58850_20131209_231304_outLine +BABEL_OP2_302_58926_20131121_050015_inLine +BABEL_OP2_302_58926_20131121_050015_outLine +BABEL_OP2_302_59509_20140203_215611_inLine +BABEL_OP2_302_59509_20140203_215611_outLine +BABEL_OP2_302_59635_20140124_183551_inLine +BABEL_OP2_302_59635_20140124_183551_outLine +BABEL_OP2_302_59720_20131206_145023_inLine +BABEL_OP2_302_59720_20131206_145023_outLine +BABEL_OP2_302_59747_20131104_164054_inLine +BABEL_OP2_302_59747_20131104_164054_outLine +BABEL_OP2_302_60299_20140220_162147_inLine +BABEL_OP2_302_60299_20140220_162147_outLine +BABEL_OP2_302_61888_20140127_161005_inLine +BABEL_OP2_302_61888_20140127_161005_outLine +BABEL_OP2_302_63307_20131213_180556_inLine +BABEL_OP2_302_63307_20131213_180556_outLine +BABEL_OP2_302_63999_20140214_214838_inLine +BABEL_OP2_302_63999_20140214_214838_outLine +BABEL_OP2_302_64870_20131226_033837_inLine +BABEL_OP2_302_64870_20131226_033837_outLine +BABEL_OP2_302_66001_20131114_212059_inLine +BABEL_OP2_302_66001_20131114_212059_outLine +BABEL_OP2_302_66822_20131207_210025_inLine +BABEL_OP2_302_66822_20131207_210025_outLine +BABEL_OP2_302_67401_20131220_213115_inLine +BABEL_OP2_302_67401_20131220_213115_outLine +BABEL_OP2_302_67622_20131109_180009_inLine +BABEL_OP2_302_67622_20131109_180009_outLine +BABEL_OP2_302_67659_20131130_221743_inLine +BABEL_OP2_302_67659_20131130_221743_outLine +BABEL_OP2_302_67773_20140318_210730_inLine +BABEL_OP2_302_67773_20140318_210730_outLine +BABEL_OP2_302_68244_20140131_200512_inLine +BABEL_OP2_302_68244_20140131_200512_outLine +BABEL_OP2_302_68289_20140126_231552_inLine +BABEL_OP2_302_68289_20140126_231552_outLine +BABEL_OP2_302_68924_20140130_230433_inLine +BABEL_OP2_302_68924_20140130_230433_outLine +BABEL_OP2_302_69578_20131207_134320_inLine +BABEL_OP2_302_69578_20131207_134320_outLine +BABEL_OP2_302_69746_20140118_173307_inLine +BABEL_OP2_302_69746_20140118_173307_outLine +BABEL_OP2_302_70121_20131209_204053_inLine +BABEL_OP2_302_70121_20131209_204053_outLine +BABEL_OP2_302_70251_20131109_191513_inLine +BABEL_OP2_302_70251_20131109_191513_outLine +BABEL_OP2_302_70386_20131203_030837_inLine +BABEL_OP2_302_70386_20131203_030837_outLine +BABEL_OP2_302_70452_20131219_032729_inLine +BABEL_OP2_302_70452_20131219_032729_outLine +BABEL_OP2_302_70460_20140222_223545_inLine +BABEL_OP2_302_70460_20140222_223545_outLine +BABEL_OP2_302_70713_20140126_171356_inLine +BABEL_OP2_302_70713_20140126_171356_outLine +BABEL_OP2_302_70794_20131107_185831_inLine +BABEL_OP2_302_70794_20131107_185831_outLine +BABEL_OP2_302_71038_20140119_172132_inLine +BABEL_OP2_302_71038_20140119_172132_outLine +BABEL_OP2_302_71067_20140130_194954_inLine +BABEL_OP2_302_71067_20140130_194954_outLine +BABEL_OP2_302_72952_20140215_000239_inLine +BABEL_OP2_302_72952_20140215_000239_outLine +BABEL_OP2_302_73518_20140123_190347_inLine +BABEL_OP2_302_73518_20140123_190347_outLine +BABEL_OP2_302_74921_20131225_190044_inLine +BABEL_OP2_302_74921_20131225_190044_outLine +BABEL_OP2_302_75223_20131130_211714_inLine +BABEL_OP2_302_75223_20131130_211714_outLine +BABEL_OP2_302_75223_20131130_212825_inLine +BABEL_OP2_302_75223_20131130_212825_outLine +BABEL_OP2_302_75505_20131104_234450_inLine +BABEL_OP2_302_75505_20131104_234450_outLine +BABEL_OP2_302_75505_20131104_235752_inLine +BABEL_OP2_302_75505_20131104_235752_outLine +BABEL_OP2_302_76756_20131219_183439_inLine +BABEL_OP2_302_76756_20131219_183439_outLine +BABEL_OP2_302_77033_20140127_183412_inLine +BABEL_OP2_302_77033_20140127_183412_outLine +BABEL_OP2_302_77126_20131111_012344_inLine +BABEL_OP2_302_77126_20131111_012344_outLine +BABEL_OP2_302_77242_20140217_184823_inLine +BABEL_OP2_302_77242_20140217_184823_outLine +BABEL_OP2_302_77990_20131117_222127_inLine +BABEL_OP2_302_77990_20131117_222127_outLine +BABEL_OP2_302_78398_20131116_213051_inLine +BABEL_OP2_302_78398_20131116_213051_outLine +BABEL_OP2_302_78943_20131206_035541_inLine +BABEL_OP2_302_78943_20131206_035541_outLine +BABEL_OP2_302_78943_20131206_042746_inLine +BABEL_OP2_302_78943_20131206_042746_outLine +BABEL_OP2_302_79898_20140310_200258_inLine +BABEL_OP2_302_79898_20140310_200258_outLine +BABEL_OP2_302_79995_20140125_225240_inLine +BABEL_OP2_302_79995_20140125_225240_outLine +BABEL_OP2_302_80306_20131206_235538_inLine +BABEL_OP2_302_80306_20131206_235538_outLine +BABEL_OP2_302_80781_20131207_183741_inLine +BABEL_OP2_302_80781_20131207_183741_outLine +BABEL_OP2_302_81213_20131118_175514_inLine +BABEL_OP2_302_81213_20131118_175514_outLine +BABEL_OP2_302_81287_20140115_190105_inLine +BABEL_OP2_302_81287_20140115_190105_outLine +BABEL_OP2_302_81671_20140131_220121_inLine +BABEL_OP2_302_81671_20140131_220121_outLine +BABEL_OP2_302_82089_20131206_191938_inLine +BABEL_OP2_302_82089_20131206_191938_outLine +BABEL_OP2_302_82138_20131206_045140_inLine +BABEL_OP2_302_82138_20131206_045140_outLine +BABEL_OP2_302_82145_20140301_225354_inLine +BABEL_OP2_302_82145_20140301_225354_outLine +BABEL_OP2_302_82224_20140203_014024_inLine +BABEL_OP2_302_82224_20140203_014024_outLine +BABEL_OP2_302_82425_20131115_005742_inLine +BABEL_OP2_302_82425_20131115_005742_outLine +BABEL_OP2_302_82935_20140114_204802_inLine +BABEL_OP2_302_82935_20140114_204802_outLine +BABEL_OP2_302_83430_20140315_203750_inLine +BABEL_OP2_302_83430_20140315_203750_outLine +BABEL_OP2_302_83436_20131106_170059_inLine +BABEL_OP2_302_83436_20131106_170059_outLine +BABEL_OP2_302_83455_20131208_201956_inLine +BABEL_OP2_302_83455_20131208_201956_outLine +BABEL_OP2_302_84408_20131207_204020_inLine +BABEL_OP2_302_84408_20131207_204020_outLine +BABEL_OP2_302_84469_20140107_205046_inLine +BABEL_OP2_302_84469_20140107_205046_outLine +BABEL_OP2_302_84715_20140127_201518_inLine +BABEL_OP2_302_84715_20140127_201518_outLine +BABEL_OP2_302_84936_20140108_204108_inLine +BABEL_OP2_302_84936_20140108_204108_outLine +BABEL_OP2_302_85010_20140316_222754_inLine +BABEL_OP2_302_85010_20140316_222754_outLine +BABEL_OP2_302_86628_20140215_171431_inLine +BABEL_OP2_302_86628_20140215_171431_outLine +BABEL_OP2_302_86715_20140215_174540_inLine +BABEL_OP2_302_86715_20140215_174540_outLine +BABEL_OP2_302_87298_20140130_191447_inLine +BABEL_OP2_302_87298_20140130_191447_outLine +BABEL_OP2_302_87305_20140214_225515_inLine +BABEL_OP2_302_87305_20140214_225515_outLine +BABEL_OP2_302_87693_20131121_041057_inLine +BABEL_OP2_302_87693_20131121_041057_outLine +BABEL_OP2_302_87777_20140127_145958_inLine +BABEL_OP2_302_87777_20140127_145958_outLine +BABEL_OP2_302_88661_20131225_211835_inLine +BABEL_OP2_302_88661_20131225_211835_outLine +BABEL_OP2_302_88938_20140202_215623_inLine +BABEL_OP2_302_88938_20140202_215623_outLine +BABEL_OP2_302_89059_20140115_214308_inLine +BABEL_OP2_302_89059_20140115_214308_outLine +BABEL_OP2_302_90709_20131111_143953_inLine +BABEL_OP2_302_90709_20131111_143953_outLine +BABEL_OP2_302_92557_20140125_230505_inLine +BABEL_OP2_302_92557_20140125_230505_outLine +BABEL_OP2_302_92736_20140119_170328_inLine +BABEL_OP2_302_92736_20140119_170328_outLine +BABEL_OP2_302_92809_20131109_182045_inLine +BABEL_OP2_302_92809_20131109_182045_outLine +BABEL_OP2_302_92942_20131219_014744_inLine +BABEL_OP2_302_92942_20131219_014744_outLine +BABEL_OP2_302_93632_20140203_154221_inLine +BABEL_OP2_302_93632_20140203_154221_outLine +BABEL_OP2_302_93964_20131208_002934_inLine +BABEL_OP2_302_93964_20131208_002934_outLine +BABEL_OP2_302_94253_20131114_215945_inLine +BABEL_OP2_302_94253_20131114_215945_outLine +BABEL_OP2_302_94449_20140314_185933_inLine +BABEL_OP2_302_94449_20140314_185933_outLine +BABEL_OP2_302_94803_20140313_225823_inLine +BABEL_OP2_302_94803_20140313_225823_outLine +BABEL_OP2_302_95598_20131101_172634_inLine +BABEL_OP2_302_95598_20131101_172634_outLine +BABEL_OP2_302_95598_20131101_175037_inLine +BABEL_OP2_302_95598_20131101_175037_outLine +BABEL_OP2_302_95670_20131130_185901_inLine +BABEL_OP2_302_95670_20131130_185901_outLine +BABEL_OP2_302_95903_20140303_002203_inLine +BABEL_OP2_302_95903_20140303_002203_outLine +BABEL_OP2_302_96525_20140110_201817_inLine +BABEL_OP2_302_96525_20140110_201817_outLine +BABEL_OP2_302_96690_20131204_221739_inLine +BABEL_OP2_302_96690_20131204_221739_outLine +BABEL_OP2_302_96910_20140130_210316_inLine +BABEL_OP2_302_96910_20140130_210316_outLine +BABEL_OP2_302_97461_20131211_211339_inLine +BABEL_OP2_302_97461_20131211_211339_outLine +BABEL_OP2_302_97557_20131219_192714_inLine +BABEL_OP2_302_97557_20131219_192714_outLine +BABEL_OP2_302_97588_20131101_161834_inLine +BABEL_OP2_302_97588_20131101_161834_outLine +BABEL_OP2_302_97588_20131101_163947_inLine +BABEL_OP2_302_97588_20131101_163947_outLine +BABEL_OP2_302_97731_20140114_201001_inLine +BABEL_OP2_302_97731_20140114_201001_outLine +BABEL_OP2_302_97772_20131107_223232_inLine +BABEL_OP2_302_97772_20131107_223232_outLine +BABEL_OP2_302_98489_20131204_181216_inLine +BABEL_OP2_302_98489_20131204_181216_outLine +BABEL_OP2_302_98909_20131117_153948_inLine +BABEL_OP2_302_98909_20131117_153948_outLine +BABEL_OP2_302_99401_20131114_221114_inLine +BABEL_OP2_302_99401_20131114_221114_outLine +BABEL_OP2_302_99887_20140129_162421_inLine +BABEL_OP2_302_99887_20140129_162421_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list b/egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list new file mode 100644 index 00000000000..52a1f686ddc --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list @@ -0,0 +1,569 @@ +BABEL_OP2_302_11723_20140320_021007_inLine +BABEL_OP2_302_11723_20140320_021007_outLine +BABEL_OP2_302_11723_20140320_030801_inLine +BABEL_OP2_302_11723_20140320_030801_outLine +BABEL_OP2_302_11768_20140319_010224_inLine +BABEL_OP2_302_11768_20140319_010224_outLine +BABEL_OP2_302_13776_20140225_201416_inLine +BABEL_OP2_302_13776_20140225_201416_outLine +BABEL_OP2_302_13776_20140225_203014_inLine +BABEL_OP2_302_13776_20140225_203014_outLine +BABEL_OP2_302_14179_20131218_222613_inLine +BABEL_OP2_302_14179_20131218_222613_outLine +BABEL_OP2_302_14179_20131218_223829_inLine +BABEL_OP2_302_14179_20131218_223829_outLine +BABEL_OP2_302_14179_20131218_224616_inLine +BABEL_OP2_302_14179_20131218_224616_outLine +BABEL_OP2_302_14537_20140204_020213_inLine +BABEL_OP2_302_14537_20140204_020213_outLine +BABEL_OP2_302_14575_20140317_000954_inLine +BABEL_OP2_302_14575_20140317_000954_outLine +BABEL_OP2_302_15322_20140223_032907_inLine +BABEL_OP2_302_15322_20140223_032907_outLine +BABEL_OP2_302_15466_20140311_190939_inLine +BABEL_OP2_302_15466_20140311_190939_outLine +BABEL_OP2_302_15749_20140106_224305_inLine +BABEL_OP2_302_15749_20140106_224305_outLine +BABEL_OP2_302_15869_20140319_024910_inLine +BABEL_OP2_302_15869_20140319_024910_outLine +BABEL_OP2_302_15926_20131223_153308_inLine +BABEL_OP2_302_15926_20131223_153308_outLine +BABEL_OP2_302_15926_20131223_154557_inLine +BABEL_OP2_302_15926_20131223_154557_outLine +BABEL_OP2_302_15926_20131223_155325_inLine +BABEL_OP2_302_15926_20131223_155325_outLine +BABEL_OP2_302_15926_20131223_160015_inLine +BABEL_OP2_302_15926_20131223_160015_outLine +BABEL_OP2_302_15926_20131223_160509_inLine +BABEL_OP2_302_15926_20131223_160509_outLine +BABEL_OP2_302_15926_20131223_161316_inLine +BABEL_OP2_302_15926_20131223_161316_outLine +BABEL_OP2_302_17115_20140326_194730_inLine +BABEL_OP2_302_17115_20140326_194730_outLine +BABEL_OP2_302_17420_20140222_233405_inLine +BABEL_OP2_302_17420_20140222_233405_outLine +BABEL_OP2_302_17615_20140107_025352_inLine +BABEL_OP2_302_17615_20140107_025352_outLine +BABEL_OP2_302_19440_20140328_012334_inLine +BABEL_OP2_302_19440_20140328_012334_outLine +BABEL_OP2_302_20800_20131116_165644_inLine +BABEL_OP2_302_20800_20131116_165644_outLine +BABEL_OP2_302_20896_20140319_002117_inLine +BABEL_OP2_302_20896_20140319_002117_outLine +BABEL_OP2_302_21244_20140221_181738_inLine +BABEL_OP2_302_21244_20140221_181738_outLine +BABEL_OP2_302_21244_20140221_185615_inLine +BABEL_OP2_302_21244_20140221_185615_outLine +BABEL_OP2_302_21315_20140304_163357_inLine +BABEL_OP2_302_21315_20140304_163357_outLine +BABEL_OP2_302_21393_20140303_153741_inLine +BABEL_OP2_302_21393_20140303_153741_outLine +BABEL_OP2_302_22170_20140203_213129_inLine +BABEL_OP2_302_22170_20140203_213129_outLine +BABEL_OP2_302_22494_20131227_200715_inLine +BABEL_OP2_302_22918_20140203_004320_inLine +BABEL_OP2_302_22918_20140203_004320_outLine +BABEL_OP2_302_23092_20140218_224204_inLine +BABEL_OP2_302_23092_20140218_224204_outLine +BABEL_OP2_302_24033_20140131_000701_inLine +BABEL_OP2_302_24033_20140131_000701_outLine +BABEL_OP2_302_24209_20140328_231409_inLine +BABEL_OP2_302_24209_20140328_231409_outLine +BABEL_OP2_302_24587_20140318_225950_inLine +BABEL_OP2_302_24587_20140318_225950_outLine +BABEL_OP2_302_25719_20140129_155329_inLine +BABEL_OP2_302_25719_20140129_155329_outLine +BABEL_OP2_302_26507_20140308_203259_inLine +BABEL_OP2_302_26507_20140308_203259_outLine +BABEL_OP2_302_26574_20140201_202449_inLine +BABEL_OP2_302_26574_20140201_202449_outLine +BABEL_OP2_302_27218_20131115_211108_inLine +BABEL_OP2_302_27218_20131115_211108_outLine +BABEL_OP2_302_27841_20140124_002040_inLine +BABEL_OP2_302_27841_20140124_002040_outLine +BABEL_OP2_302_27841_20140124_003521_inLine +BABEL_OP2_302_27841_20140124_003521_outLine +BABEL_OP2_302_28538_20131218_015743_inLine +BABEL_OP2_302_28600_20140111_172326_inLine +BABEL_OP2_302_28600_20140111_172326_outLine +BABEL_OP2_302_28600_20140111_173213_inLine +BABEL_OP2_302_28600_20140111_173213_outLine +BABEL_OP2_302_28606_20140108_222927_inLine +BABEL_OP2_302_28606_20140108_222927_outLine +BABEL_OP2_302_28606_20140108_223822_inLine +BABEL_OP2_302_28606_20140108_223822_outLine +BABEL_OP2_302_28775_20131116_210510_outLine +BABEL_OP2_302_29076_20131214_155845_inLine +BABEL_OP2_302_29076_20131214_155845_outLine +BABEL_OP2_302_29076_20131214_160555_inLine +BABEL_OP2_302_29076_20131214_160555_outLine +BABEL_OP2_302_29323_20140130_190425_inLine +BABEL_OP2_302_29323_20140130_190425_outLine +BABEL_OP2_302_29404_20140214_174021_inLine +BABEL_OP2_302_29404_20140214_174021_outLine +BABEL_OP2_302_29643_20140213_000617_inLine +BABEL_OP2_302_29643_20140213_000617_outLine +BABEL_OP2_302_30974_20140315_192921_inLine +BABEL_OP2_302_30974_20140315_192921_outLine +BABEL_OP2_302_31182_20140107_213519_outLine +BABEL_OP2_302_31346_20131230_220709_inLine +BABEL_OP2_302_31346_20131230_221548_inLine +BABEL_OP2_302_31919_20140120_211340_inLine +BABEL_OP2_302_31919_20140120_211340_outLine +BABEL_OP2_302_32727_20140304_200331_inLine +BABEL_OP2_302_32727_20140304_200331_outLine +BABEL_OP2_302_32832_20140114_180910_inLine +BABEL_OP2_302_32832_20140114_180910_outLine +BABEL_OP2_302_32872_20140311_203050_inLine +BABEL_OP2_302_32872_20140311_203050_outLine +BABEL_OP2_302_32872_20140311_204413_inLine +BABEL_OP2_302_32872_20140311_204413_outLine +BABEL_OP2_302_32959_20140301_192344_inLine +BABEL_OP2_302_32959_20140301_192344_outLine +BABEL_OP2_302_33273_20131214_184246_inLine +BABEL_OP2_302_33273_20131214_184246_outLine +BABEL_OP2_302_33273_20131214_191106_inLine +BABEL_OP2_302_33273_20131214_191106_outLine +BABEL_OP2_302_34410_20140321_182956_inLine +BABEL_OP2_302_34410_20140321_182956_outLine +BABEL_OP2_302_34629_20140315_192329_inLine +BABEL_OP2_302_34629_20140315_192329_outLine +BABEL_OP2_302_35202_20131221_211514_inLine +BABEL_OP2_302_35202_20131221_211514_outLine +BABEL_OP2_302_35202_20131221_220228_inLine +BABEL_OP2_302_35202_20131221_220228_outLine +BABEL_OP2_302_35609_20140326_155717_inLine +BABEL_OP2_302_35609_20140326_155717_outLine +BABEL_OP2_302_35786_20140201_193528_inLine +BABEL_OP2_302_35786_20140201_193528_outLine +BABEL_OP2_302_36147_20140314_230249_inLine +BABEL_OP2_302_36147_20140314_230249_outLine +BABEL_OP2_302_36632_20140316_220512_inLine +BABEL_OP2_302_36632_20140316_220512_outLine +BABEL_OP2_302_36642_20140203_204149_inLine +BABEL_OP2_302_36642_20140203_204149_outLine +BABEL_OP2_302_37281_20131223_193947_inLine +BABEL_OP2_302_37281_20131223_193947_outLine +BABEL_OP2_302_38554_20131107_231324_inLine +BABEL_OP2_302_38554_20131107_231324_outLine +BABEL_OP2_302_39159_20131101_181154_inLine +BABEL_OP2_302_39159_20131101_181154_outLine +BABEL_OP2_302_39159_20131101_182621_inLine +BABEL_OP2_302_39159_20131101_182621_outLine +BABEL_OP2_302_39277_20140313_204841_inLine +BABEL_OP2_302_39277_20140313_204841_outLine +BABEL_OP2_302_39920_20140308_212443_inLine +BABEL_OP2_302_39920_20140308_212443_outLine +BABEL_OP2_302_40196_20140316_214624_inLine +BABEL_OP2_302_40196_20140316_214624_outLine +BABEL_OP2_302_40740_20140203_175842_inLine +BABEL_OP2_302_40740_20140203_175842_outLine +BABEL_OP2_302_41109_20140129_151014_outLine +BABEL_OP2_302_41109_20140129_151930_outLine +BABEL_OP2_302_41682_20140316_184028_inLine +BABEL_OP2_302_41682_20140316_184028_outLine +BABEL_OP2_302_41685_20140319_224731_inLine +BABEL_OP2_302_41685_20140319_224731_outLine +BABEL_OP2_302_41745_20131216_195331_inLine +BABEL_OP2_302_41745_20131216_195331_outLine +BABEL_OP2_302_42126_20140319_211544_inLine +BABEL_OP2_302_42126_20140319_211544_outLine +BABEL_OP2_302_42243_20131104_193524_inLine +BABEL_OP2_302_42243_20131104_193524_outLine +BABEL_OP2_302_42718_20140303_235926_inLine +BABEL_OP2_302_42718_20140303_235926_outLine +BABEL_OP2_302_42883_20140301_215140_inLine +BABEL_OP2_302_42883_20140301_215140_outLine +BABEL_OP2_302_42883_20140301_220413_inLine +BABEL_OP2_302_42883_20140301_220413_outLine +BABEL_OP2_302_43285_20131218_173818_inLine +BABEL_OP2_302_43285_20131218_173818_outLine +BABEL_OP2_302_43285_20131218_175248_inLine +BABEL_OP2_302_43285_20131218_175248_outLine +BABEL_OP2_302_43323_20140320_012405_inLine +BABEL_OP2_302_43323_20140320_012405_outLine +BABEL_OP2_302_43990_20140319_003408_inLine +BABEL_OP2_302_44255_20140203_221612_inLine +BABEL_OP2_302_44255_20140203_221612_outLine +BABEL_OP2_302_44681_20140316_231417_inLine +BABEL_OP2_302_44681_20140316_231417_outLine +BABEL_OP2_302_45106_20140130_195527_inLine +BABEL_OP2_302_45106_20140130_195527_outLine +BABEL_OP2_302_45140_20140204_000835_inLine +BABEL_OP2_302_45140_20140204_000835_outLine +BABEL_OP2_302_45374_20140316_190302_inLine +BABEL_OP2_302_45374_20140316_190302_outLine +BABEL_OP2_302_46066_20140131_180512_inLine +BABEL_OP2_302_46066_20140131_180512_outLine +BABEL_OP2_302_46315_20140112_051606_inLine +BABEL_OP2_302_46315_20140112_051606_outLine +BABEL_OP2_302_46315_20140112_053032_inLine +BABEL_OP2_302_46315_20140112_053032_outLine +BABEL_OP2_302_46333_20131106_193911_inLine +BABEL_OP2_302_46333_20131106_193911_outLine +BABEL_OP2_302_46389_20140317_000314_inLine +BABEL_OP2_302_46389_20140317_000314_outLine +BABEL_OP2_302_46589_20131218_200246_inLine +BABEL_OP2_302_47799_20140310_191802_inLine +BABEL_OP2_302_47799_20140310_191802_outLine +BABEL_OP2_302_48200_20140111_171610_inLine +BABEL_OP2_302_48200_20140111_171610_outLine +BABEL_OP2_302_48758_20140222_204731_inLine +BABEL_OP2_302_48758_20140222_204731_outLine +BABEL_OP2_302_49027_20140307_172629_inLine +BABEL_OP2_302_49027_20140307_172629_outLine +BABEL_OP2_302_49118_20140114_164903_inLine +BABEL_OP2_302_49118_20140114_164903_outLine +BABEL_OP2_302_49437_20140123_224810_inLine +BABEL_OP2_302_49437_20140123_224810_outLine +BABEL_OP2_302_49739_20140314_204410_inLine +BABEL_OP2_302_49739_20140314_204410_outLine +BABEL_OP2_302_51015_20131225_194000_inLine +BABEL_OP2_302_51015_20131225_194000_outLine +BABEL_OP2_302_51407_20131210_182141_inLine +BABEL_OP2_302_51407_20131210_182141_outLine +BABEL_OP2_302_51414_20140301_231945_inLine +BABEL_OP2_302_51414_20140301_231945_outLine +BABEL_OP2_302_51414_20140301_232951_inLine +BABEL_OP2_302_51414_20140301_232951_outLine +BABEL_OP2_302_51530_20140303_173734_inLine +BABEL_OP2_302_51530_20140303_173734_outLine +BABEL_OP2_302_51693_20140317_180609_inLine +BABEL_OP2_302_51693_20140317_180609_outLine +BABEL_OP2_302_51819_20140108_231905_inLine +BABEL_OP2_302_51819_20140108_231905_outLine +BABEL_OP2_302_51819_20140108_232624_inLine +BABEL_OP2_302_51819_20140108_232624_outLine +BABEL_OP2_302_51858_20140314_235721_inLine +BABEL_OP2_302_51858_20140314_235721_outLine +BABEL_OP2_302_52070_20140320_231722_inLine +BABEL_OP2_302_52070_20140320_231722_outLine +BABEL_OP2_302_52222_20140314_185604_inLine +BABEL_OP2_302_52222_20140314_185604_outLine +BABEL_OP2_302_52265_20140317_214257_inLine +BABEL_OP2_302_52265_20140317_214257_outLine +BABEL_OP2_302_52483_20140318_192930_inLine +BABEL_OP2_302_52483_20140318_192930_outLine +BABEL_OP2_302_52490_20131204_173409_inLine +BABEL_OP2_302_52490_20131204_173409_outLine +BABEL_OP2_302_52725_20140224_182027_inLine +BABEL_OP2_302_52725_20140224_182027_outLine +BABEL_OP2_302_53072_20140307_191159_inLine +BABEL_OP2_302_53072_20140307_191159_outLine +BABEL_OP2_302_53415_20140301_180358_inLine +BABEL_OP2_302_53415_20140301_180358_outLine +BABEL_OP2_302_53917_20140214_214823_inLine +BABEL_OP2_302_53917_20140214_214823_outLine +BABEL_OP2_302_54046_20140111_191512_inLine +BABEL_OP2_302_54046_20140111_191512_outLine +BABEL_OP2_302_54160_20131105_233517_inLine +BABEL_OP2_302_54160_20131105_233517_outLine +BABEL_OP2_302_54405_20140111_185837_inLine +BABEL_OP2_302_54405_20140111_185837_outLine +BABEL_OP2_302_54477_20140108_182424_inLine +BABEL_OP2_302_54477_20140108_182424_outLine +BABEL_OP2_302_54923_20140216_224345_inLine +BABEL_OP2_302_54923_20140216_224345_outLine +BABEL_OP2_302_55259_20140110_235646_inLine +BABEL_OP2_302_55259_20140110_235646_outLine +BABEL_OP2_302_56331_20140116_230347_inLine +BABEL_OP2_302_56331_20140116_230347_outLine +BABEL_OP2_302_56345_20140316_214007_inLine +BABEL_OP2_302_56345_20140316_214007_outLine +BABEL_OP2_302_56468_20140313_205811_inLine +BABEL_OP2_302_56468_20140313_205811_outLine +BABEL_OP2_302_56743_20131216_222343_inLine +BABEL_OP2_302_56743_20131216_222343_outLine +BABEL_OP2_302_56925_20140324_224547_inLine +BABEL_OP2_302_56925_20140324_224547_outLine +BABEL_OP2_302_57116_20131030_223921_inLine +BABEL_OP2_302_57116_20131030_223921_outLine +BABEL_OP2_302_57542_20140122_203736_inLine +BABEL_OP2_302_57542_20140122_203736_outLine +BABEL_OP2_302_57654_20131117_191902_inLine +BABEL_OP2_302_57654_20131117_191902_outLine +BABEL_OP2_302_57654_20131117_192605_inLine +BABEL_OP2_302_57654_20131117_192605_outLine +BABEL_OP2_302_57678_20131219_025602_inLine +BABEL_OP2_302_57678_20131219_025602_outLine +BABEL_OP2_302_58047_20131218_204521_inLine +BABEL_OP2_302_58047_20131218_204521_outLine +BABEL_OP2_302_58734_20131113_233358_inLine +BABEL_OP2_302_58734_20131113_233358_outLine +BABEL_OP2_302_59091_20140128_234107_inLine +BABEL_OP2_302_59091_20140128_234107_outLine +BABEL_OP2_302_59301_20140114_221332_inLine +BABEL_OP2_302_59301_20140114_221332_outLine +BABEL_OP2_302_60115_20131217_170350_inLine +BABEL_OP2_302_60115_20131217_170350_outLine +BABEL_OP2_302_60661_20131116_191211_inLine +BABEL_OP2_302_60661_20131116_191211_outLine +BABEL_OP2_302_62155_20140201_185809_inLine +BABEL_OP2_302_62155_20140201_185809_outLine +BABEL_OP2_302_62158_20140319_223940_inLine +BABEL_OP2_302_62158_20140319_223940_outLine +BABEL_OP2_302_62200_20131209_215237_inLine +BABEL_OP2_302_62200_20131209_215237_outLine +BABEL_OP2_302_62362_20140225_004754_inLine +BABEL_OP2_302_62362_20140225_004754_outLine +BABEL_OP2_302_62430_20140301_152214_inLine +BABEL_OP2_302_62430_20140301_152214_outLine +BABEL_OP2_302_62724_20140304_224111_inLine +BABEL_OP2_302_62724_20140304_224111_outLine +BABEL_OP2_302_62835_20131223_201212_inLine +BABEL_OP2_302_63220_20140127_003053_inLine +BABEL_OP2_302_63220_20140127_003053_outLine +BABEL_OP2_302_63425_20140119_000855_inLine +BABEL_OP2_302_63425_20140119_000855_outLine +BABEL_OP2_302_63445_20131121_234555_inLine +BABEL_OP2_302_63511_20140311_232611_inLine +BABEL_OP2_302_63511_20140311_232611_outLine +BABEL_OP2_302_63523_20140219_180149_inLine +BABEL_OP2_302_63523_20140219_180149_outLine +BABEL_OP2_302_63906_20140120_224621_inLine +BABEL_OP2_302_63906_20140120_224621_outLine +BABEL_OP2_302_63938_20140129_205148_inLine +BABEL_OP2_302_63938_20140129_205148_outLine +BABEL_OP2_302_64350_20131117_225845_inLine +BABEL_OP2_302_64350_20131117_225845_outLine +BABEL_OP2_302_64350_20131117_232849_inLine +BABEL_OP2_302_64350_20131117_232849_outLine +BABEL_OP2_302_65639_20140320_150018_inLine +BABEL_OP2_302_65639_20140320_150018_outLine +BABEL_OP2_302_66026_20140326_234154_inLine +BABEL_OP2_302_66026_20140326_234154_outLine +BABEL_OP2_302_66361_20140319_194108_inLine +BABEL_OP2_302_66361_20140319_194108_outLine +BABEL_OP2_302_67213_20140327_183232_inLine +BABEL_OP2_302_67213_20140327_183232_outLine +BABEL_OP2_302_67304_20140327_170105_inLine +BABEL_OP2_302_67304_20140327_170105_outLine +BABEL_OP2_302_67389_20140316_224805_inLine +BABEL_OP2_302_67389_20140316_224805_outLine +BABEL_OP2_302_67389_20140316_230159_inLine +BABEL_OP2_302_67389_20140316_230159_outLine +BABEL_OP2_302_67552_20131225_215450_inLine +BABEL_OP2_302_67552_20131225_215450_outLine +BABEL_OP2_302_67592_20140113_211110_inLine +BABEL_OP2_302_67592_20140113_211110_outLine +BABEL_OP2_302_67726_20140319_013401_outLine +BABEL_OP2_302_67726_20140319_014304_outLine +BABEL_OP2_302_67794_20131117_183019_inLine +BABEL_OP2_302_67794_20131117_183019_outLine +BABEL_OP2_302_68402_20140327_221916_inLine +BABEL_OP2_302_68402_20140327_221916_outLine +BABEL_OP2_302_68627_20131204_225115_inLine +BABEL_OP2_302_68627_20131204_225115_outLine +BABEL_OP2_302_68748_20131217_195520_inLine +BABEL_OP2_302_68748_20131217_195520_outLine +BABEL_OP2_302_68748_20131217_201343_inLine +BABEL_OP2_302_68748_20131217_201343_outLine +BABEL_OP2_302_68908_20140320_022955_inLine +BABEL_OP2_302_68908_20140320_022955_outLine +BABEL_OP2_302_69982_20140311_005531_inLine +BABEL_OP2_302_69982_20140311_005531_outLine +BABEL_OP2_302_70282_20131220_160010_inLine +BABEL_OP2_302_70282_20131220_160010_outLine +BABEL_OP2_302_70639_20140318_221840_inLine +BABEL_OP2_302_70639_20140318_221840_outLine +BABEL_OP2_302_70726_20140319_183341_inLine +BABEL_OP2_302_70726_20140319_183341_outLine +BABEL_OP2_302_71419_20140314_222627_inLine +BABEL_OP2_302_71419_20140314_222627_outLine +BABEL_OP2_302_73408_20140326_185144_inLine +BABEL_OP2_302_73408_20140326_185144_outLine +BABEL_OP2_302_73408_20140326_190631_inLine +BABEL_OP2_302_73408_20140326_190631_outLine +BABEL_OP2_302_73591_20131029_231600_inLine +BABEL_OP2_302_73814_20131226_180746_inLine +BABEL_OP2_302_73814_20131226_180746_outLine +BABEL_OP2_302_73814_20131226_181941_inLine +BABEL_OP2_302_73814_20131226_181941_outLine +BABEL_OP2_302_73964_20140303_232725_inLine +BABEL_OP2_302_73964_20140303_232725_outLine +BABEL_OP2_302_74078_20140324_220859_outLine +BABEL_OP2_302_74121_20131206_165002_inLine +BABEL_OP2_302_74253_20140203_174833_inLine +BABEL_OP2_302_74253_20140203_174833_outLine +BABEL_OP2_302_74728_20140307_151132_inLine +BABEL_OP2_302_74728_20140307_151132_outLine +BABEL_OP2_302_75064_20131205_015445_inLine +BABEL_OP2_302_75261_20140218_213238_inLine +BABEL_OP2_302_75261_20140218_213238_outLine +BABEL_OP2_302_75359_20140220_000334_inLine +BABEL_OP2_302_75359_20140220_000334_outLine +BABEL_OP2_302_75366_20140310_224545_inLine +BABEL_OP2_302_75366_20140310_224545_outLine +BABEL_OP2_302_75465_20140125_194816_inLine +BABEL_OP2_302_75465_20140125_194816_outLine +BABEL_OP2_302_75764_20140123_173321_inLine +BABEL_OP2_302_75764_20140123_173321_outLine +BABEL_OP2_302_75869_20140118_180045_inLine +BABEL_OP2_302_75869_20140118_180045_outLine +BABEL_OP2_302_75993_20131118_164850_inLine +BABEL_OP2_302_75993_20131118_164850_outLine +BABEL_OP2_302_76444_20140304_213108_inLine +BABEL_OP2_302_76444_20140304_213108_outLine +BABEL_OP2_302_76970_20140327_002045_inLine +BABEL_OP2_302_76970_20140327_002045_outLine +BABEL_OP2_302_77904_20140316_204739_inLine +BABEL_OP2_302_77904_20140316_204739_outLine +BABEL_OP2_302_78360_20140131_201120_inLine +BABEL_OP2_302_78360_20140131_201120_outLine +BABEL_OP2_302_78630_20131115_232537_inLine +BABEL_OP2_302_78630_20131115_232537_outLine +BABEL_OP2_302_78976_20131128_230615_inLine +BABEL_OP2_302_78976_20131128_230615_outLine +BABEL_OP2_302_79167_20131225_175926_inLine +BABEL_OP2_302_79167_20131225_175926_outLine +BABEL_OP2_302_79367_20131112_222137_inLine +BABEL_OP2_302_79367_20131112_222137_outLine +BABEL_OP2_302_79858_20131116_000426_inLine +BABEL_OP2_302_81404_20131213_230929_inLine +BABEL_OP2_302_81404_20131213_230929_outLine +BABEL_OP2_302_81427_20131211_221442_inLine +BABEL_OP2_302_81427_20131211_221442_outLine +BABEL_OP2_302_81674_20140315_024749_inLine +BABEL_OP2_302_81674_20140315_024749_outLine +BABEL_OP2_302_82140_20131206_055551_inLine +BABEL_OP2_302_82140_20131206_055551_outLine +BABEL_OP2_302_82361_20140204_014603_inLine +BABEL_OP2_302_82361_20140204_014603_outLine +BABEL_OP2_302_82622_20131105_002634_inLine +BABEL_OP2_302_82622_20131105_002634_outLine +BABEL_OP2_302_82904_20140203_194011_inLine +BABEL_OP2_302_82904_20140203_194011_outLine +BABEL_OP2_302_83366_20131223_172753_inLine +BABEL_OP2_302_83366_20131223_172753_outLine +BABEL_OP2_302_83775_20131203_184707_inLine +BABEL_OP2_302_83775_20131203_184707_outLine +BABEL_OP2_302_83783_20131218_212844_inLine +BABEL_OP2_302_83783_20131218_212844_outLine +BABEL_OP2_302_84327_20140112_031943_inLine +BABEL_OP2_302_84327_20140112_031943_outLine +BABEL_OP2_302_84327_20140112_033431_inLine +BABEL_OP2_302_84327_20140112_033431_outLine +BABEL_OP2_302_84458_20140226_001547_inLine +BABEL_OP2_302_84458_20140226_001547_outLine +BABEL_OP2_302_84583_20131220_210443_inLine +BABEL_OP2_302_84583_20131220_210443_outLine +BABEL_OP2_302_84838_20140112_004851_inLine +BABEL_OP2_302_84838_20140112_004851_outLine +BABEL_OP2_302_84838_20140112_011030_inLine +BABEL_OP2_302_84838_20140112_011030_outLine +BABEL_OP2_302_85028_20140106_232649_inLine +BABEL_OP2_302_85028_20140106_232649_outLine +BABEL_OP2_302_85260_20140318_235730_inLine +BABEL_OP2_302_85260_20140318_235730_outLine +BABEL_OP2_302_85260_20140319_021618_inLine +BABEL_OP2_302_85260_20140319_021618_outLine +BABEL_OP2_302_85519_20140111_210933_inLine +BABEL_OP2_302_85519_20140111_210933_outLine +BABEL_OP2_302_85651_20140108_220631_inLine +BABEL_OP2_302_85651_20140108_220631_outLine +BABEL_OP2_302_85651_20140108_221652_inLine +BABEL_OP2_302_85651_20140108_221652_outLine +BABEL_OP2_302_85651_20140108_222943_inLine +BABEL_OP2_302_85651_20140108_222943_outLine +BABEL_OP2_302_86321_20140304_184505_outLine +BABEL_OP2_302_86321_20140304_190052_outLine +BABEL_OP2_302_86676_20131221_194024_inLine +BABEL_OP2_302_86676_20131221_194024_outLine +BABEL_OP2_302_86826_20140221_213850_inLine +BABEL_OP2_302_86826_20140221_213850_outLine +BABEL_OP2_302_86885_20140319_172338_inLine +BABEL_OP2_302_86885_20140319_172338_outLine +BABEL_OP2_302_86888_20131221_183239_inLine +BABEL_OP2_302_86888_20131221_183239_outLine +BABEL_OP2_302_86952_20131105_224050_inLine +BABEL_OP2_302_86952_20131105_224050_outLine +BABEL_OP2_302_87073_20131108_174654_inLine +BABEL_OP2_302_87073_20131108_174654_outLine +BABEL_OP2_302_87545_20140303_174324_inLine +BABEL_OP2_302_87545_20140303_174324_outLine +BABEL_OP2_302_87629_20131226_030820_inLine +BABEL_OP2_302_87629_20131226_030820_outLine +BABEL_OP2_302_90440_20140314_190637_inLine +BABEL_OP2_302_90440_20140314_190637_outLine +BABEL_OP2_302_90740_20140312_213002_inLine +BABEL_OP2_302_90740_20140312_213002_outLine +BABEL_OP2_302_91606_20140223_185235_inLine +BABEL_OP2_302_91606_20140223_185235_outLine +BABEL_OP2_302_91808_20140315_215351_inLine +BABEL_OP2_302_91808_20140315_215351_outLine +BABEL_OP2_302_91977_20131217_185642_inLine +BABEL_OP2_302_91977_20131217_185642_outLine +BABEL_OP2_302_92096_20140121_222052_inLine +BABEL_OP2_302_92096_20140121_222052_outLine +BABEL_OP2_302_92096_20140121_222833_inLine +BABEL_OP2_302_92096_20140121_222833_outLine +BABEL_OP2_302_92096_20140121_223620_inLine +BABEL_OP2_302_92096_20140121_223620_outLine +BABEL_OP2_302_92252_20140317_205453_outLine +BABEL_OP2_302_92740_20131217_174305_inLine +BABEL_OP2_302_92740_20131217_174305_outLine +BABEL_OP2_302_92886_20131204_201740_inLine +BABEL_OP2_302_93153_20131115_225858_inLine +BABEL_OP2_302_93153_20131115_225858_outLine +BABEL_OP2_302_93443_20140320_025047_inLine +BABEL_OP2_302_93443_20140320_025047_outLine +BABEL_OP2_302_93490_20140113_184331_inLine +BABEL_OP2_302_93490_20140113_184331_outLine +BABEL_OP2_302_93946_20140225_213901_inLine +BABEL_OP2_302_93946_20140225_213901_outLine +BABEL_OP2_302_94044_20140314_194724_inLine +BABEL_OP2_302_94044_20140314_194724_outLine +BABEL_OP2_302_94044_20140314_195844_inLine +BABEL_OP2_302_94044_20140314_195844_outLine +BABEL_OP2_302_94141_20140220_002237_inLine +BABEL_OP2_302_94141_20140220_002237_outLine +BABEL_OP2_302_94212_20140313_231224_inLine +BABEL_OP2_302_94212_20140313_231224_outLine +BABEL_OP2_302_94487_20140303_234032_inLine +BABEL_OP2_302_94487_20140303_234032_outLine +BABEL_OP2_302_94587_20140115_224719_inLine +BABEL_OP2_302_94587_20140115_224719_outLine +BABEL_OP2_302_94587_20140115_225600_inLine +BABEL_OP2_302_94587_20140115_225600_outLine +BABEL_OP2_302_94587_20140115_230344_inLine +BABEL_OP2_302_94587_20140115_230344_outLine +BABEL_OP2_302_95467_20140217_181554_inLine +BABEL_OP2_302_95467_20140217_181554_outLine +BABEL_OP2_302_95490_20131119_220530_inLine +BABEL_OP2_302_95490_20131119_220530_outLine +BABEL_OP2_302_95490_20131119_221754_inLine +BABEL_OP2_302_95490_20131119_221754_outLine +BABEL_OP2_302_95571_20140315_172644_inLine +BABEL_OP2_302_95571_20140315_172644_outLine +BABEL_OP2_302_95966_20131213_023122_inLine +BABEL_OP2_302_95966_20131213_023122_outLine +BABEL_OP2_302_96088_20140307_165731_inLine +BABEL_OP2_302_96088_20140307_165731_outLine +BABEL_OP2_302_96247_20140120_221340_inLine +BABEL_OP2_302_96247_20140120_221340_outLine +BABEL_OP2_302_96247_20140120_224135_inLine +BABEL_OP2_302_96247_20140120_224135_outLine +BABEL_OP2_302_96584_20140127_164106_inLine +BABEL_OP2_302_96584_20140127_164106_outLine +BABEL_OP2_302_96934_20131203_232255_inLine +BABEL_OP2_302_97570_20131223_175908_inLine +BABEL_OP2_302_97570_20131223_175908_outLine +BABEL_OP2_302_97570_20131223_180949_inLine +BABEL_OP2_302_97570_20131223_180949_outLine +BABEL_OP2_302_97849_20140203_203804_inLine +BABEL_OP2_302_97849_20140203_203804_outLine +BABEL_OP2_302_97896_20131212_155943_inLine +BABEL_OP2_302_97896_20131212_155943_outLine +BABEL_OP2_302_98165_20131218_211431_inLine +BABEL_OP2_302_98165_20131218_211431_outLine +BABEL_OP2_302_99202_20131226_015321_inLine +BABEL_OP2_302_99202_20131226_015321_outLine +BABEL_OP2_302_99264_20140111_215716_inLine +BABEL_OP2_302_99264_20140111_215716_outLine +BABEL_OP2_302_99487_20131109_171503_inLine +BABEL_OP2_302_99718_20131113_003931_inLine +BABEL_OP2_302_99718_20131113_003931_outLine +BABEL_OP2_302_99952_20140203_225818_inLine +BABEL_OP2_302_99955_20140127_230118_inLine +BABEL_OP2_302_99975_20140317_202757_inLine +BABEL_OP2_302_99975_20140317_202757_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/dev.2h.list b/egs/babel/s5d/conf/lists/303-telugu/dev.2h.list new file mode 100644 index 00000000000..2109ba73287 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/dev.2h.list @@ -0,0 +1,126 @@ +BABEL_OP2_303_12220_20131108_232918_inLine +BABEL_OP2_303_12220_20131108_232918_outLine +BABEL_OP2_303_13040_20131115_232722_inLine +BABEL_OP2_303_13040_20131115_232722_outLine +BABEL_OP2_303_14158_20131204_203458_inLine +BABEL_OP2_303_14158_20131204_203458_outLine +BABEL_OP2_303_15466_20140204_191250_inLine +BABEL_OP2_303_15466_20140204_191250_outLine +BABEL_OP2_303_16056_20131030_201705_inLine +BABEL_OP2_303_16056_20131030_201705_outLine +BABEL_OP2_303_16475_20131109_024735_inLine +BABEL_OP2_303_16475_20131109_024735_outLine +BABEL_OP2_303_17280_20131105_033157_inLine +BABEL_OP2_303_17280_20131105_033157_outLine +BABEL_OP2_303_19703_20131114_213952_inLine +BABEL_OP2_303_19703_20131114_213952_outLine +BABEL_OP2_303_21029_20131112_180205_inLine +BABEL_OP2_303_21029_20131112_180205_outLine +BABEL_OP2_303_22965_20131114_213605_inLine +BABEL_OP2_303_22965_20131114_213605_outLine +BABEL_OP2_303_28585_20131204_042033_inLine +BABEL_OP2_303_28585_20131204_042033_outLine +BABEL_OP2_303_28600_20131201_001853_inLine +BABEL_OP2_303_28600_20131201_001853_outLine +BABEL_OP2_303_28945_20131111_210924_inLine +BABEL_OP2_303_28945_20131111_210924_outLine +BABEL_OP2_303_34197_20131105_003635_inLine +BABEL_OP2_303_34197_20131105_003635_outLine +BABEL_OP2_303_34336_20131114_162157_inLine +BABEL_OP2_303_34336_20131114_162157_outLine +BABEL_OP2_303_36059_20131218_034050_inLine +BABEL_OP2_303_36059_20131218_034050_outLine +BABEL_OP2_303_37499_20140129_194730_inLine +BABEL_OP2_303_37499_20140129_194730_outLine +BABEL_OP2_303_37499_20140130_010436_inLine +BABEL_OP2_303_37499_20140130_010436_outLine +BABEL_OP2_303_38554_20131024_205502_inLine +BABEL_OP2_303_38554_20131024_205502_outLine +BABEL_OP2_303_39848_20131113_195552_inLine +BABEL_OP2_303_39848_20131113_195552_outLine +BABEL_OP2_303_40713_20131111_182733_inLine +BABEL_OP2_303_40713_20131111_182733_outLine +BABEL_OP2_303_40740_20131205_003945_inLine +BABEL_OP2_303_40740_20131205_003945_outLine +BABEL_OP2_303_41272_20140204_204727_inLine +BABEL_OP2_303_41272_20140204_204727_outLine +BABEL_OP2_303_41400_20140222_205655_inLine +BABEL_OP2_303_41400_20140222_205655_outLine +BABEL_OP2_303_43794_20140131_221611_inLine +BABEL_OP2_303_43794_20140131_221611_outLine +BABEL_OP2_303_45560_20131029_184514_inLine +BABEL_OP2_303_45560_20131029_184514_outLine +BABEL_OP2_303_46333_20131102_160049_inLine +BABEL_OP2_303_46333_20131102_160049_outLine +BABEL_OP2_303_46702_20131023_225137_inLine +BABEL_OP2_303_46702_20131023_225137_outLine +BABEL_OP2_303_49287_20131115_193114_inLine +BABEL_OP2_303_49287_20131115_193114_outLine +BABEL_OP2_303_49306_20140204_203901_inLine +BABEL_OP2_303_49306_20140204_203901_outLine +BABEL_OP2_303_51858_20140219_183931_inLine +BABEL_OP2_303_51858_20140219_183931_outLine +BABEL_OP2_303_52854_20131105_013802_inLine +BABEL_OP2_303_52854_20131105_013802_outLine +BABEL_OP2_303_55818_20131027_191439_inLine +BABEL_OP2_303_55818_20131027_191439_outLine +BABEL_OP2_303_56684_20131205_182944_inLine +BABEL_OP2_303_56684_20131205_182944_outLine +BABEL_OP2_303_56720_20131122_215343_inLine +BABEL_OP2_303_56720_20131122_215343_outLine +BABEL_OP2_303_58734_20131109_181122_inLine +BABEL_OP2_303_58734_20131109_181122_outLine +BABEL_OP2_303_60474_20131113_232723_inLine +BABEL_OP2_303_60474_20131113_232723_outLine +BABEL_OP2_303_61167_20131104_210455_inLine +BABEL_OP2_303_61167_20131104_210455_outLine +BABEL_OP2_303_62289_20140222_212804_inLine +BABEL_OP2_303_62289_20140222_212804_outLine +BABEL_OP2_303_64759_20131104_194712_inLine +BABEL_OP2_303_64759_20131104_194712_outLine +BABEL_OP2_303_64759_20131104_195356_inLine +BABEL_OP2_303_64759_20131104_195356_outLine +BABEL_OP2_303_65370_20140222_225324_inLine +BABEL_OP2_303_65370_20140222_225324_outLine +BABEL_OP2_303_69574_20131027_004044_inLine +BABEL_OP2_303_69574_20131027_004044_outLine +BABEL_OP2_303_70110_20131025_151421_inLine +BABEL_OP2_303_70110_20131025_151421_outLine +BABEL_OP2_303_73119_20131115_162847_inLine +BABEL_OP2_303_73119_20131115_162847_outLine +BABEL_OP2_303_73119_20131115_164236_inLine +BABEL_OP2_303_73119_20131115_164236_outLine +BABEL_OP2_303_73446_20140111_183215_inLine +BABEL_OP2_303_73446_20140111_183215_outLine +BABEL_OP2_303_74280_20131025_160420_inLine +BABEL_OP2_303_74280_20131025_160420_outLine +BABEL_OP2_303_75064_20131114_174949_inLine +BABEL_OP2_303_75064_20131114_174949_outLine +BABEL_OP2_303_77112_20131114_020655_inLine +BABEL_OP2_303_77112_20131114_020655_outLine +BABEL_OP2_303_82089_20131111_003358_inLine +BABEL_OP2_303_82089_20131111_003358_outLine +BABEL_OP2_303_83455_20131115_205335_inLine +BABEL_OP2_303_83455_20131115_205335_outLine +BABEL_OP2_303_84709_20140205_175937_inLine +BABEL_OP2_303_84709_20140205_175937_outLine +BABEL_OP2_303_86472_20131204_195705_inLine +BABEL_OP2_303_86472_20131204_195705_outLine +BABEL_OP2_303_86557_20131025_175510_inLine +BABEL_OP2_303_86557_20131025_175510_outLine +BABEL_OP2_303_87073_20131027_001213_inLine +BABEL_OP2_303_87073_20131027_001213_outLine +BABEL_OP2_303_87629_20131114_030529_inLine +BABEL_OP2_303_87629_20131114_030529_outLine +BABEL_OP2_303_88988_20140218_203032_inLine +BABEL_OP2_303_88988_20140218_203032_outLine +BABEL_OP2_303_91825_20131025_170933_inLine +BABEL_OP2_303_91825_20131025_170933_outLine +BABEL_OP2_303_91977_20131130_190309_inLine +BABEL_OP2_303_91977_20131130_190309_outLine +BABEL_OP2_303_92096_20131226_204359_inLine +BABEL_OP2_303_92096_20131226_204359_outLine +BABEL_OP2_303_92509_20131027_003447_inLine +BABEL_OP2_303_92509_20131027_003447_outLine +BABEL_OP2_303_99487_20131027_195100_inLine +BABEL_OP2_303_99487_20131027_195100_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/dev.list b/egs/babel/s5d/conf/lists/303-telugu/dev.list new file mode 100644 index 00000000000..2109ba73287 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/dev.list @@ -0,0 +1,126 @@ +BABEL_OP2_303_12220_20131108_232918_inLine +BABEL_OP2_303_12220_20131108_232918_outLine +BABEL_OP2_303_13040_20131115_232722_inLine +BABEL_OP2_303_13040_20131115_232722_outLine +BABEL_OP2_303_14158_20131204_203458_inLine +BABEL_OP2_303_14158_20131204_203458_outLine +BABEL_OP2_303_15466_20140204_191250_inLine +BABEL_OP2_303_15466_20140204_191250_outLine +BABEL_OP2_303_16056_20131030_201705_inLine +BABEL_OP2_303_16056_20131030_201705_outLine +BABEL_OP2_303_16475_20131109_024735_inLine +BABEL_OP2_303_16475_20131109_024735_outLine +BABEL_OP2_303_17280_20131105_033157_inLine +BABEL_OP2_303_17280_20131105_033157_outLine +BABEL_OP2_303_19703_20131114_213952_inLine +BABEL_OP2_303_19703_20131114_213952_outLine +BABEL_OP2_303_21029_20131112_180205_inLine +BABEL_OP2_303_21029_20131112_180205_outLine +BABEL_OP2_303_22965_20131114_213605_inLine +BABEL_OP2_303_22965_20131114_213605_outLine +BABEL_OP2_303_28585_20131204_042033_inLine +BABEL_OP2_303_28585_20131204_042033_outLine +BABEL_OP2_303_28600_20131201_001853_inLine +BABEL_OP2_303_28600_20131201_001853_outLine +BABEL_OP2_303_28945_20131111_210924_inLine +BABEL_OP2_303_28945_20131111_210924_outLine +BABEL_OP2_303_34197_20131105_003635_inLine +BABEL_OP2_303_34197_20131105_003635_outLine +BABEL_OP2_303_34336_20131114_162157_inLine +BABEL_OP2_303_34336_20131114_162157_outLine +BABEL_OP2_303_36059_20131218_034050_inLine +BABEL_OP2_303_36059_20131218_034050_outLine +BABEL_OP2_303_37499_20140129_194730_inLine +BABEL_OP2_303_37499_20140129_194730_outLine +BABEL_OP2_303_37499_20140130_010436_inLine +BABEL_OP2_303_37499_20140130_010436_outLine +BABEL_OP2_303_38554_20131024_205502_inLine +BABEL_OP2_303_38554_20131024_205502_outLine +BABEL_OP2_303_39848_20131113_195552_inLine +BABEL_OP2_303_39848_20131113_195552_outLine +BABEL_OP2_303_40713_20131111_182733_inLine +BABEL_OP2_303_40713_20131111_182733_outLine +BABEL_OP2_303_40740_20131205_003945_inLine +BABEL_OP2_303_40740_20131205_003945_outLine +BABEL_OP2_303_41272_20140204_204727_inLine +BABEL_OP2_303_41272_20140204_204727_outLine +BABEL_OP2_303_41400_20140222_205655_inLine +BABEL_OP2_303_41400_20140222_205655_outLine +BABEL_OP2_303_43794_20140131_221611_inLine +BABEL_OP2_303_43794_20140131_221611_outLine +BABEL_OP2_303_45560_20131029_184514_inLine +BABEL_OP2_303_45560_20131029_184514_outLine +BABEL_OP2_303_46333_20131102_160049_inLine +BABEL_OP2_303_46333_20131102_160049_outLine +BABEL_OP2_303_46702_20131023_225137_inLine +BABEL_OP2_303_46702_20131023_225137_outLine +BABEL_OP2_303_49287_20131115_193114_inLine +BABEL_OP2_303_49287_20131115_193114_outLine +BABEL_OP2_303_49306_20140204_203901_inLine +BABEL_OP2_303_49306_20140204_203901_outLine +BABEL_OP2_303_51858_20140219_183931_inLine +BABEL_OP2_303_51858_20140219_183931_outLine +BABEL_OP2_303_52854_20131105_013802_inLine +BABEL_OP2_303_52854_20131105_013802_outLine +BABEL_OP2_303_55818_20131027_191439_inLine +BABEL_OP2_303_55818_20131027_191439_outLine +BABEL_OP2_303_56684_20131205_182944_inLine +BABEL_OP2_303_56684_20131205_182944_outLine +BABEL_OP2_303_56720_20131122_215343_inLine +BABEL_OP2_303_56720_20131122_215343_outLine +BABEL_OP2_303_58734_20131109_181122_inLine +BABEL_OP2_303_58734_20131109_181122_outLine +BABEL_OP2_303_60474_20131113_232723_inLine +BABEL_OP2_303_60474_20131113_232723_outLine +BABEL_OP2_303_61167_20131104_210455_inLine +BABEL_OP2_303_61167_20131104_210455_outLine +BABEL_OP2_303_62289_20140222_212804_inLine +BABEL_OP2_303_62289_20140222_212804_outLine +BABEL_OP2_303_64759_20131104_194712_inLine +BABEL_OP2_303_64759_20131104_194712_outLine +BABEL_OP2_303_64759_20131104_195356_inLine +BABEL_OP2_303_64759_20131104_195356_outLine +BABEL_OP2_303_65370_20140222_225324_inLine +BABEL_OP2_303_65370_20140222_225324_outLine +BABEL_OP2_303_69574_20131027_004044_inLine +BABEL_OP2_303_69574_20131027_004044_outLine +BABEL_OP2_303_70110_20131025_151421_inLine +BABEL_OP2_303_70110_20131025_151421_outLine +BABEL_OP2_303_73119_20131115_162847_inLine +BABEL_OP2_303_73119_20131115_162847_outLine +BABEL_OP2_303_73119_20131115_164236_inLine +BABEL_OP2_303_73119_20131115_164236_outLine +BABEL_OP2_303_73446_20140111_183215_inLine +BABEL_OP2_303_73446_20140111_183215_outLine +BABEL_OP2_303_74280_20131025_160420_inLine +BABEL_OP2_303_74280_20131025_160420_outLine +BABEL_OP2_303_75064_20131114_174949_inLine +BABEL_OP2_303_75064_20131114_174949_outLine +BABEL_OP2_303_77112_20131114_020655_inLine +BABEL_OP2_303_77112_20131114_020655_outLine +BABEL_OP2_303_82089_20131111_003358_inLine +BABEL_OP2_303_82089_20131111_003358_outLine +BABEL_OP2_303_83455_20131115_205335_inLine +BABEL_OP2_303_83455_20131115_205335_outLine +BABEL_OP2_303_84709_20140205_175937_inLine +BABEL_OP2_303_84709_20140205_175937_outLine +BABEL_OP2_303_86472_20131204_195705_inLine +BABEL_OP2_303_86472_20131204_195705_outLine +BABEL_OP2_303_86557_20131025_175510_inLine +BABEL_OP2_303_86557_20131025_175510_outLine +BABEL_OP2_303_87073_20131027_001213_inLine +BABEL_OP2_303_87073_20131027_001213_outLine +BABEL_OP2_303_87629_20131114_030529_inLine +BABEL_OP2_303_87629_20131114_030529_outLine +BABEL_OP2_303_88988_20140218_203032_inLine +BABEL_OP2_303_88988_20140218_203032_outLine +BABEL_OP2_303_91825_20131025_170933_inLine +BABEL_OP2_303_91825_20131025_170933_outLine +BABEL_OP2_303_91977_20131130_190309_inLine +BABEL_OP2_303_91977_20131130_190309_outLine +BABEL_OP2_303_92096_20131226_204359_inLine +BABEL_OP2_303_92096_20131226_204359_outLine +BABEL_OP2_303_92509_20131027_003447_inLine +BABEL_OP2_303_92509_20131027_003447_outLine +BABEL_OP2_303_99487_20131027_195100_inLine +BABEL_OP2_303_99487_20131027_195100_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/eval.list b/egs/babel/s5d/conf/lists/303-telugu/eval.list new file mode 100644 index 00000000000..e40856e3e9d --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/eval.list @@ -0,0 +1,192 @@ +BABEL_OP2_303_10416_20131110_200711_inLine +BABEL_OP2_303_10416_20131110_200711_outLine +BABEL_OP2_303_10416_20131110_202619_inLine +BABEL_OP2_303_10416_20131110_202619_outLine +BABEL_OP2_303_10974_20131115_172420_inLine +BABEL_OP2_303_10974_20131115_172420_outLine +BABEL_OP2_303_10974_20131115_193956_inLine +BABEL_OP2_303_10974_20131115_193956_outLine +BABEL_OP2_303_11096_20140214_163324_inLine +BABEL_OP2_303_11096_20140214_163324_outLine +BABEL_OP2_303_12321_20131129_164832_inLine +BABEL_OP2_303_12321_20131129_164832_outLine +BABEL_OP2_303_12635_20131211_184855_inLine +BABEL_OP2_303_12635_20131211_184855_outLine +BABEL_OP2_303_12916_20131029_201419_inLine +BABEL_OP2_303_12916_20131029_201419_outLine +BABEL_OP2_303_14729_20131215_013912_inLine +BABEL_OP2_303_14729_20131215_013912_outLine +BABEL_OP2_303_17115_20140211_231649_inLine +BABEL_OP2_303_17115_20140211_231649_outLine +BABEL_OP2_303_17165_20131113_202255_inLine +BABEL_OP2_303_17165_20131113_202255_outLine +BABEL_OP2_303_19120_20131224_010850_inLine +BABEL_OP2_303_19120_20131224_010850_outLine +BABEL_OP2_303_23151_20131206_220005_inLine +BABEL_OP2_303_23151_20131206_220005_outLine +BABEL_OP2_303_23983_20140201_224449_inLine +BABEL_OP2_303_23983_20140201_224449_outLine +BABEL_OP2_303_24033_20131205_013346_inLine +BABEL_OP2_303_24033_20131205_013346_outLine +BABEL_OP2_303_26206_20131116_212034_inLine +BABEL_OP2_303_26206_20131116_212034_outLine +BABEL_OP2_303_27218_20131101_202112_inLine +BABEL_OP2_303_27218_20131101_202112_outLine +BABEL_OP2_303_28422_20131130_210214_inLine +BABEL_OP2_303_28422_20131130_210214_outLine +BABEL_OP2_303_29168_20131105_002039_inLine +BABEL_OP2_303_29168_20131105_002039_outLine +BABEL_OP2_303_31668_20140204_210838_inLine +BABEL_OP2_303_31668_20140204_210838_outLine +BABEL_OP2_303_31992_20131107_183659_inLine +BABEL_OP2_303_31992_20131107_183659_outLine +BABEL_OP2_303_32872_20140127_213914_inLine +BABEL_OP2_303_32872_20140127_213914_outLine +BABEL_OP2_303_32961_20140218_193151_inLine +BABEL_OP2_303_32961_20140218_193151_outLine +BABEL_OP2_303_33635_20131109_185729_inLine +BABEL_OP2_303_33635_20131109_185729_outLine +BABEL_OP2_303_34019_20140219_191126_inLine +BABEL_OP2_303_34019_20140219_191126_outLine +BABEL_OP2_303_34019_20140219_192321_inLine +BABEL_OP2_303_34019_20140219_192321_outLine +BABEL_OP2_303_34688_20131031_000954_inLine +BABEL_OP2_303_34688_20131031_000954_outLine +BABEL_OP2_303_37853_20131230_224659_inLine +BABEL_OP2_303_37853_20131230_224659_outLine +BABEL_OP2_303_39159_20131024_202413_inLine +BABEL_OP2_303_39159_20131024_202413_outLine +BABEL_OP2_303_42600_20131114_231539_inLine +BABEL_OP2_303_42600_20131114_231539_outLine +BABEL_OP2_303_43990_20140204_202831_inLine +BABEL_OP2_303_43990_20140204_202831_outLine +BABEL_OP2_303_44290_20140204_193649_inLine +BABEL_OP2_303_44290_20140204_193649_outLine +BABEL_OP2_303_45642_20131114_201049_inLine +BABEL_OP2_303_45642_20131114_201049_outLine +BABEL_OP2_303_45642_20131114_203559_inLine +BABEL_OP2_303_45642_20131114_203559_outLine +BABEL_OP2_303_45770_20131029_180305_inLine +BABEL_OP2_303_45770_20131029_180305_outLine +BABEL_OP2_303_45908_20140211_224100_inLine +BABEL_OP2_303_45908_20140211_224100_outLine +BABEL_OP2_303_46974_20131116_205026_inLine +BABEL_OP2_303_46974_20131116_205026_outLine +BABEL_OP2_303_47959_20131113_020835_inLine +BABEL_OP2_303_47959_20131113_020835_outLine +BABEL_OP2_303_48610_20131031_175448_inLine +BABEL_OP2_303_48610_20131031_175448_outLine +BABEL_OP2_303_49775_20131029_201844_inLine +BABEL_OP2_303_49775_20131029_201844_outLine +BABEL_OP2_303_49812_20131208_222038_inLine +BABEL_OP2_303_49812_20131208_222038_outLine +BABEL_OP2_303_51530_20140203_190540_inLine +BABEL_OP2_303_51530_20140203_190540_outLine +BABEL_OP2_303_52025_20131025_023135_inLine +BABEL_OP2_303_52025_20131025_023135_outLine +BABEL_OP2_303_52422_20131205_220934_inLine +BABEL_OP2_303_52422_20131205_220934_outLine +BABEL_OP2_303_52442_20131115_192454_inLine +BABEL_OP2_303_52442_20131115_192454_outLine +BABEL_OP2_303_52614_20140204_214212_inLine +BABEL_OP2_303_52614_20140204_214212_outLine +BABEL_OP2_303_53072_20140116_175409_inLine +BABEL_OP2_303_53072_20140116_175409_outLine +BABEL_OP2_303_56090_20131108_182022_inLine +BABEL_OP2_303_56090_20131108_182022_outLine +BABEL_OP2_303_57678_20131112_230248_inLine +BABEL_OP2_303_57678_20131112_230248_outLine +BABEL_OP2_303_58061_20140219_230114_inLine +BABEL_OP2_303_58061_20140219_230114_outLine +BABEL_OP2_303_59898_20131101_004202_inLine +BABEL_OP2_303_59898_20131101_004202_outLine +BABEL_OP2_303_59928_20131113_223724_inLine +BABEL_OP2_303_59928_20131113_223724_outLine +BABEL_OP2_303_59928_20131113_225824_inLine +BABEL_OP2_303_59928_20131113_225824_outLine +BABEL_OP2_303_60026_20131107_170611_inLine +BABEL_OP2_303_60026_20131107_170611_outLine +BABEL_OP2_303_60626_20131111_190013_inLine +BABEL_OP2_303_60626_20131111_190013_outLine +BABEL_OP2_303_62852_20131105_205005_inLine +BABEL_OP2_303_62852_20131105_205005_outLine +BABEL_OP2_303_63481_20131028_222923_inLine +BABEL_OP2_303_63481_20131028_222923_outLine +BABEL_OP2_303_63523_20140211_213504_inLine +BABEL_OP2_303_63523_20140211_213504_outLine +BABEL_OP2_303_64638_20131202_192509_inLine +BABEL_OP2_303_64638_20131202_192509_outLine +BABEL_OP2_303_64796_20131114_235122_inLine +BABEL_OP2_303_64796_20131114_235122_outLine +BABEL_OP2_303_65640_20140203_210724_inLine +BABEL_OP2_303_65640_20140203_210724_outLine +BABEL_OP2_303_66026_20131201_225144_inLine +BABEL_OP2_303_66026_20131201_225144_outLine +BABEL_OP2_303_66837_20131116_170219_inLine +BABEL_OP2_303_66837_20131116_170219_outLine +BABEL_OP2_303_66959_20131201_000211_inLine +BABEL_OP2_303_66959_20131201_000211_outLine +BABEL_OP2_303_66967_20131026_202801_inLine +BABEL_OP2_303_66967_20131026_202801_outLine +BABEL_OP2_303_67373_20131115_001228_inLine +BABEL_OP2_303_67373_20131115_001228_outLine +BABEL_OP2_303_67389_20140205_200604_inLine +BABEL_OP2_303_67389_20140205_200604_outLine +BABEL_OP2_303_67389_20140205_201314_inLine +BABEL_OP2_303_67389_20140205_201314_outLine +BABEL_OP2_303_70282_20131115_224940_inLine +BABEL_OP2_303_70282_20131115_224940_outLine +BABEL_OP2_303_73301_20131113_213007_inLine +BABEL_OP2_303_73301_20131113_213007_outLine +BABEL_OP2_303_74253_20131118_232619_inLine +BABEL_OP2_303_74253_20131118_232619_outLine +BABEL_OP2_303_75359_20140222_204832_inLine +BABEL_OP2_303_75359_20140222_204832_outLine +BABEL_OP2_303_77567_20131107_170005_inLine +BABEL_OP2_303_77567_20131107_170005_outLine +BABEL_OP2_303_79139_20131113_181752_inLine +BABEL_OP2_303_79139_20131113_181752_outLine +BABEL_OP2_303_79858_20131109_210103_inLine +BABEL_OP2_303_79858_20131109_210103_outLine +BABEL_OP2_303_81229_20131115_205519_inLine +BABEL_OP2_303_81229_20131115_205519_outLine +BABEL_OP2_303_81392_20131118_201348_inLine +BABEL_OP2_303_81392_20131118_201348_outLine +BABEL_OP2_303_81404_20131105_042501_inLine +BABEL_OP2_303_81404_20131105_042501_outLine +BABEL_OP2_303_83436_20131027_190144_inLine +BABEL_OP2_303_83436_20131027_190144_outLine +BABEL_OP2_303_84055_20131215_032429_inLine +BABEL_OP2_303_84055_20131215_032429_outLine +BABEL_OP2_303_84583_20131114_154624_inLine +BABEL_OP2_303_84583_20131114_154624_outLine +BABEL_OP2_303_87545_20140203_185743_inLine +BABEL_OP2_303_87545_20140203_185743_outLine +BABEL_OP2_303_87921_20131204_182122_inLine +BABEL_OP2_303_87921_20131204_182122_outLine +BABEL_OP2_303_89330_20140219_012432_inLine +BABEL_OP2_303_89330_20140219_012432_outLine +BABEL_OP2_303_93224_20131114_192358_inLine +BABEL_OP2_303_93224_20131114_192358_outLine +BABEL_OP2_303_94587_20131120_180235_inLine +BABEL_OP2_303_94587_20131120_180235_outLine +BABEL_OP2_303_95294_20131204_200315_inLine +BABEL_OP2_303_95294_20131204_200315_outLine +BABEL_OP2_303_95571_20140219_211426_inLine +BABEL_OP2_303_95571_20140219_211426_outLine +BABEL_OP2_303_96405_20131113_205241_inLine +BABEL_OP2_303_96405_20131113_205241_outLine +BABEL_OP2_303_96504_20131113_192045_inLine +BABEL_OP2_303_96504_20131113_192045_outLine +BABEL_OP2_303_96934_20131115_014431_inLine +BABEL_OP2_303_96934_20131115_014431_outLine +BABEL_OP2_303_96985_20131030_204329_inLine +BABEL_OP2_303_96985_20131030_204329_outLine +BABEL_OP2_303_97570_20131115_235518_inLine +BABEL_OP2_303_97570_20131115_235518_outLine +BABEL_OP2_303_97849_20140203_203326_inLine +BABEL_OP2_303_97849_20140203_203326_outLine +BABEL_OP2_303_99516_20131026_193835_inLine +BABEL_OP2_303_99516_20131026_193835_outLine +BABEL_OP2_303_99718_20131031_171828_inLine +BABEL_OP2_303_99718_20131031_171828_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/evalpart1.list b/egs/babel/s5d/conf/lists/303-telugu/evalpart1.list new file mode 100644 index 00000000000..528cd0840d9 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/evalpart1.list @@ -0,0 +1,62 @@ +BABEL_OP2_303_11096_20140214_163324_inLine +BABEL_OP2_303_11096_20140214_163324_outLine +BABEL_OP2_303_14729_20131215_013912_inLine +BABEL_OP2_303_14729_20131215_013912_outLine +BABEL_OP2_303_17165_20131113_202255_inLine +BABEL_OP2_303_17165_20131113_202255_outLine +BABEL_OP2_303_23983_20140201_224449_inLine +BABEL_OP2_303_23983_20140201_224449_outLine +BABEL_OP2_303_24033_20131205_013346_inLine +BABEL_OP2_303_24033_20131205_013346_outLine +BABEL_OP2_303_29168_20131105_002039_inLine +BABEL_OP2_303_29168_20131105_002039_outLine +BABEL_OP2_303_32872_20140127_213914_inLine +BABEL_OP2_303_32872_20140127_213914_outLine +BABEL_OP2_303_33635_20131109_185729_inLine +BABEL_OP2_303_33635_20131109_185729_outLine +BABEL_OP2_303_34019_20140219_191126_inLine +BABEL_OP2_303_34019_20140219_191126_outLine +BABEL_OP2_303_34019_20140219_192321_inLine +BABEL_OP2_303_34019_20140219_192321_outLine +BABEL_OP2_303_44290_20140204_193649_inLine +BABEL_OP2_303_44290_20140204_193649_outLine +BABEL_OP2_303_47959_20131113_020835_inLine +BABEL_OP2_303_47959_20131113_020835_outLine +BABEL_OP2_303_49775_20131029_201844_inLine +BABEL_OP2_303_49775_20131029_201844_outLine +BABEL_OP2_303_52442_20131115_192454_inLine +BABEL_OP2_303_52442_20131115_192454_outLine +BABEL_OP2_303_56090_20131108_182022_inLine +BABEL_OP2_303_56090_20131108_182022_outLine +BABEL_OP2_303_60626_20131111_190013_inLine +BABEL_OP2_303_60626_20131111_190013_outLine +BABEL_OP2_303_63481_20131028_222923_inLine +BABEL_OP2_303_63481_20131028_222923_outLine +BABEL_OP2_303_63523_20140211_213504_inLine +BABEL_OP2_303_63523_20140211_213504_outLine +BABEL_OP2_303_66959_20131201_000211_inLine +BABEL_OP2_303_66959_20131201_000211_outLine +BABEL_OP2_303_66967_20131026_202801_inLine +BABEL_OP2_303_66967_20131026_202801_outLine +BABEL_OP2_303_74253_20131118_232619_inLine +BABEL_OP2_303_74253_20131118_232619_outLine +BABEL_OP2_303_75359_20140222_204832_inLine +BABEL_OP2_303_75359_20140222_204832_outLine +BABEL_OP2_303_77567_20131107_170005_inLine +BABEL_OP2_303_77567_20131107_170005_outLine +BABEL_OP2_303_79858_20131109_210103_inLine +BABEL_OP2_303_79858_20131109_210103_outLine +BABEL_OP2_303_81229_20131115_205519_inLine +BABEL_OP2_303_81229_20131115_205519_outLine +BABEL_OP2_303_84583_20131114_154624_inLine +BABEL_OP2_303_84583_20131114_154624_outLine +BABEL_OP2_303_89330_20140219_012432_inLine +BABEL_OP2_303_89330_20140219_012432_outLine +BABEL_OP2_303_95294_20131204_200315_inLine +BABEL_OP2_303_95294_20131204_200315_outLine +BABEL_OP2_303_95571_20140219_211426_inLine +BABEL_OP2_303_95571_20140219_211426_outLine +BABEL_OP2_303_96934_20131115_014431_inLine +BABEL_OP2_303_96934_20131115_014431_outLine +BABEL_OP2_303_97570_20131115_235518_inLine +BABEL_OP2_303_97570_20131115_235518_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/sub-train.list b/egs/babel/s5d/conf/lists/303-telugu/sub-train.list new file mode 100644 index 00000000000..3694701cd97 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/sub-train.list @@ -0,0 +1,134 @@ +BABEL_OP2_303_10188_20131108_175933_inLine +BABEL_OP2_303_10188_20131108_175933_outLine +BABEL_OP2_303_11673_20131026_034803_inLine +BABEL_OP2_303_11673_20131026_034803_outLine +BABEL_OP2_303_13030_20131109_023950_inLine +BABEL_OP2_303_13030_20131109_023950_outLine +BABEL_OP2_303_14875_20131112_211504_inLine +BABEL_OP2_303_14875_20131112_211504_outLine +BABEL_OP2_303_14929_20131112_164303_inLine +BABEL_OP2_303_14929_20131112_164303_outLine +BABEL_OP2_303_14929_20131112_165202_inLine +BABEL_OP2_303_14929_20131112_165202_outLine +BABEL_OP2_303_14929_20131112_171242_inLine +BABEL_OP2_303_14929_20131112_171242_outLine +BABEL_OP2_303_17127_20131224_002728_inLine +BABEL_OP2_303_17127_20131224_002728_outLine +BABEL_OP2_303_18380_20131111_015535_inLine +BABEL_OP2_303_18380_20131111_015535_outLine +BABEL_OP2_303_18380_20131119_224151_inLine +BABEL_OP2_303_18380_20131119_224151_outLine +BABEL_OP2_303_21435_20131226_175809_inLine +BABEL_OP2_303_21435_20131226_175809_outLine +BABEL_OP2_303_21435_20131226_181138_inLine +BABEL_OP2_303_21435_20131226_181138_outLine +BABEL_OP2_303_23681_20140119_223006_inLine +BABEL_OP2_303_23681_20140119_223006_outLine +BABEL_OP2_303_24231_20140201_230638_inLine +BABEL_OP2_303_24231_20140201_230638_outLine +BABEL_OP2_303_24589_20131114_182843_inLine +BABEL_OP2_303_24589_20131114_182843_outLine +BABEL_OP2_303_25767_20131028_161454_inLine +BABEL_OP2_303_25767_20131028_161454_outLine +BABEL_OP2_303_25961_20131030_225755_inLine +BABEL_OP2_303_25961_20131030_225755_outLine +BABEL_OP2_303_31490_20131105_010342_inLine +BABEL_OP2_303_31490_20131105_010342_outLine +BABEL_OP2_303_31490_20131105_011345_inLine +BABEL_OP2_303_31490_20131105_011345_outLine +BABEL_OP2_303_32861_20131216_223500_inLine +BABEL_OP2_303_32861_20131216_223500_outLine +BABEL_OP2_303_33704_20131210_195453_inLine +BABEL_OP2_303_33704_20131210_195453_outLine +BABEL_OP2_303_35069_20131205_165127_inLine +BABEL_OP2_303_35069_20131205_165127_outLine +BABEL_OP2_303_36341_20131024_221132_inLine +BABEL_OP2_303_36341_20131024_221132_outLine +BABEL_OP2_303_36669_20131110_155909_inLine +BABEL_OP2_303_36669_20131110_155909_outLine +BABEL_OP2_303_37682_20131105_023703_inLine +BABEL_OP2_303_37682_20131105_023703_outLine +BABEL_OP2_303_39307_20131027_043600_inLine +BABEL_OP2_303_39307_20131027_043600_outLine +BABEL_OP2_303_40565_20131116_182747_inLine +BABEL_OP2_303_40565_20131116_182747_outLine +BABEL_OP2_303_41493_20131027_155001_inLine +BABEL_OP2_303_41493_20131027_155001_outLine +BABEL_OP2_303_42718_20140118_201247_inLine +BABEL_OP2_303_42718_20140118_201247_outLine +BABEL_OP2_303_43115_20140201_195115_inLine +BABEL_OP2_303_43115_20140201_195115_outLine +BABEL_OP2_303_43789_20131111_163502_inLine +BABEL_OP2_303_43789_20131111_163502_outLine +BABEL_OP2_303_46550_20131111_233520_inLine +BABEL_OP2_303_46550_20131111_233520_outLine +BABEL_OP2_303_46558_20131028_190003_inLine +BABEL_OP2_303_46558_20131028_190003_outLine +BABEL_OP2_303_47823_20131201_004209_inLine +BABEL_OP2_303_47823_20131201_004209_outLine +BABEL_OP2_303_50726_20131028_210641_inLine +BABEL_OP2_303_50726_20131028_210641_outLine +BABEL_OP2_303_51540_20131204_041920_inLine +BABEL_OP2_303_51540_20131204_041920_outLine +BABEL_OP2_303_60538_20131111_200459_inLine +BABEL_OP2_303_60538_20131111_200459_outLine +BABEL_OP2_303_63084_20131115_202655_inLine +BABEL_OP2_303_63084_20131115_202655_outLine +BABEL_OP2_303_64768_20131113_203120_inLine +BABEL_OP2_303_64768_20131113_203120_outLine +BABEL_OP2_303_65077_20131024_174953_inLine +BABEL_OP2_303_65077_20131024_174953_outLine +BABEL_OP2_303_67964_20140222_211658_inLine +BABEL_OP2_303_67964_20140222_211658_outLine +BABEL_OP2_303_69107_20131113_222827_inLine +BABEL_OP2_303_69107_20131113_222827_outLine +BABEL_OP2_303_69633_20131130_193122_inLine +BABEL_OP2_303_69633_20131130_193122_outLine +BABEL_OP2_303_72587_20131115_221128_inLine +BABEL_OP2_303_72587_20131115_221128_outLine +BABEL_OP2_303_73990_20140219_201105_inLine +BABEL_OP2_303_73990_20140219_201105_outLine +BABEL_OP2_303_73990_20140219_202300_inLine +BABEL_OP2_303_73990_20140219_202300_outLine +BABEL_OP2_303_74886_20131101_194728_inLine +BABEL_OP2_303_74886_20131101_194728_outLine +BABEL_OP2_303_75365_20140218_173521_inLine +BABEL_OP2_303_75365_20140218_173521_outLine +BABEL_OP2_303_76756_20131115_182926_inLine +BABEL_OP2_303_76756_20131115_182926_outLine +BABEL_OP2_303_78454_20131114_230026_inLine +BABEL_OP2_303_78454_20131114_230026_outLine +BABEL_OP2_303_79820_20131114_181827_inLine +BABEL_OP2_303_79820_20131114_181827_outLine +BABEL_OP2_303_80881_20131027_165716_inLine +BABEL_OP2_303_80881_20131027_165716_outLine +BABEL_OP2_303_81424_20131120_192659_inLine +BABEL_OP2_303_81424_20131120_192659_outLine +BABEL_OP2_303_83935_20131122_222948_inLine +BABEL_OP2_303_83935_20131122_222948_outLine +BABEL_OP2_303_84061_20131104_224830_inLine +BABEL_OP2_303_84061_20131104_224830_outLine +BABEL_OP2_303_84327_20131122_203936_inLine +BABEL_OP2_303_84327_20131122_203936_outLine +BABEL_OP2_303_85248_20131206_184028_inLine +BABEL_OP2_303_85248_20131206_184028_outLine +BABEL_OP2_303_86952_20131105_173230_inLine +BABEL_OP2_303_86952_20131105_173230_outLine +BABEL_OP2_303_87884_20131206_022424_inLine +BABEL_OP2_303_87884_20131206_022424_outLine +BABEL_OP2_303_87889_20131213_215703_inLine +BABEL_OP2_303_87889_20131213_215703_outLine +BABEL_OP2_303_88982_20131115_181618_inLine +BABEL_OP2_303_88982_20131115_181618_outLine +BABEL_OP2_303_90080_20131228_233334_inLine +BABEL_OP2_303_90080_20131228_233334_outLine +BABEL_OP2_303_90740_20140221_220031_inLine +BABEL_OP2_303_90740_20140221_220031_outLine +BABEL_OP2_303_92176_20131115_153306_inLine +BABEL_OP2_303_92176_20131115_153306_outLine +BABEL_OP2_303_96324_20131107_162546_inLine +BABEL_OP2_303_96324_20131107_162546_outLine +BABEL_OP2_303_97988_20131204_195626_inLine +BABEL_OP2_303_97988_20131204_195626_outLine +BABEL_OP2_303_97988_20131204_211137_inLine +BABEL_OP2_303_97988_20131204_211137_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list new file mode 100644 index 00000000000..7d4ce3b8a3d --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP2_303_10058_20140205_001109_inLine +BABEL_OP2_303_10058_20140205_001109_outLine +BABEL_OP2_303_10638_20140218_213711_inLine +BABEL_OP2_303_10638_20140218_213711_outLine +BABEL_OP2_303_10938_20131104_204555_inLine +BABEL_OP2_303_10938_20131104_204555_outLine +BABEL_OP2_303_11352_20131224_005439_inLine +BABEL_OP2_303_11352_20131224_005439_outLine +BABEL_OP2_303_12036_20131101_174653_inLine +BABEL_OP2_303_12036_20131101_174653_outLine +BABEL_OP2_303_12242_20131113_222307_inLine +BABEL_OP2_303_12242_20131113_222307_outLine +BABEL_OP2_303_13324_20131107_211806_inLine +BABEL_OP2_303_13324_20131107_211806_outLine +BABEL_OP2_303_13586_20131115_180921_inLine +BABEL_OP2_303_13586_20131115_180921_outLine +BABEL_OP2_303_13664_20131108_184651_inLine +BABEL_OP2_303_13664_20131108_184651_outLine +BABEL_OP2_303_13744_20131026_234054_inLine +BABEL_OP2_303_13744_20131026_234054_outLine +BABEL_OP2_303_14229_20131114_032214_inLine +BABEL_OP2_303_14229_20131114_032214_outLine +BABEL_OP2_303_14350_20131105_195640_inLine +BABEL_OP2_303_14350_20131105_195640_outLine +BABEL_OP2_303_14899_20131102_204324_inLine +BABEL_OP2_303_14899_20131102_204324_outLine +BABEL_OP2_303_14972_20131114_023627_inLine +BABEL_OP2_303_14972_20131114_023627_outLine +BABEL_OP2_303_15702_20131206_225729_inLine +BABEL_OP2_303_15702_20131206_225729_outLine +BABEL_OP2_303_15730_20131101_163118_inLine +BABEL_OP2_303_15730_20131101_163118_outLine +BABEL_OP2_303_16184_20131204_033225_inLine +BABEL_OP2_303_16184_20131204_033225_outLine +BABEL_OP2_303_16839_20131223_215734_inLine +BABEL_OP2_303_16839_20131223_215734_outLine +BABEL_OP2_303_16886_20131108_204525_inLine +BABEL_OP2_303_16886_20131108_204525_outLine +BABEL_OP2_303_16938_20131112_015544_inLine +BABEL_OP2_303_16938_20131112_015544_outLine +BABEL_OP2_303_17520_20131114_164811_inLine +BABEL_OP2_303_17520_20131114_164811_outLine +BABEL_OP2_303_18242_20140218_014910_inLine +BABEL_OP2_303_18242_20140218_014910_outLine +BABEL_OP2_303_18924_20131112_001935_inLine +BABEL_OP2_303_18924_20131112_001935_outLine +BABEL_OP2_303_20437_20140202_232910_inLine +BABEL_OP2_303_20437_20140202_232910_outLine +BABEL_OP2_303_20437_20140202_234756_inLine +BABEL_OP2_303_20437_20140202_234756_outLine +BABEL_OP2_303_20985_20131122_183435_inLine +BABEL_OP2_303_20985_20131122_183435_outLine +BABEL_OP2_303_23006_20131113_150924_inLine +BABEL_OP2_303_23006_20131113_150924_outLine +BABEL_OP2_303_23046_20131114_171927_inLine +BABEL_OP2_303_23046_20131114_171927_outLine +BABEL_OP2_303_23239_20131206_181414_inLine +BABEL_OP2_303_23239_20131206_181414_outLine +BABEL_OP2_303_23260_20140203_194817_inLine +BABEL_OP2_303_23260_20140203_194817_outLine +BABEL_OP2_303_23505_20131109_184015_inLine +BABEL_OP2_303_23505_20131109_184015_outLine +BABEL_OP2_303_23980_20131114_202648_inLine +BABEL_OP2_303_23980_20131114_202648_outLine +BABEL_OP2_303_24010_20140218_224141_inLine +BABEL_OP2_303_24010_20140218_224141_outLine +BABEL_OP2_303_24323_20131201_180512_inLine +BABEL_OP2_303_24323_20131201_180512_outLine +BABEL_OP2_303_24470_20131204_174323_inLine +BABEL_OP2_303_24470_20131204_174323_outLine +BABEL_OP2_303_24982_20131114_012226_inLine +BABEL_OP2_303_24982_20131114_012226_outLine +BABEL_OP2_303_25198_20140121_180931_inLine +BABEL_OP2_303_25198_20140121_180931_outLine +BABEL_OP2_303_25719_20131205_191053_inLine +BABEL_OP2_303_25719_20131205_191053_outLine +BABEL_OP2_303_26072_20131216_221839_inLine +BABEL_OP2_303_26072_20131216_221839_outLine +BABEL_OP2_303_26388_20131113_034454_inLine +BABEL_OP2_303_26388_20131113_034454_outLine +BABEL_OP2_303_27125_20131024_195716_inLine +BABEL_OP2_303_27125_20131024_195716_outLine +BABEL_OP2_303_27590_20131118_222641_inLine +BABEL_OP2_303_27590_20131118_222641_outLine +BABEL_OP2_303_28419_20131113_195258_inLine +BABEL_OP2_303_28419_20131113_195258_outLine +BABEL_OP2_303_29404_20131224_014921_inLine +BABEL_OP2_303_29404_20131224_014921_outLine +BABEL_OP2_303_29482_20140219_221449_inLine +BABEL_OP2_303_29482_20140219_221449_outLine +BABEL_OP2_303_29685_20131105_180851_inLine +BABEL_OP2_303_29685_20131105_180851_outLine +BABEL_OP2_303_30013_20131116_185844_inLine +BABEL_OP2_303_30013_20131116_185844_outLine +BABEL_OP2_303_30345_20131224_005453_inLine +BABEL_OP2_303_30345_20131224_005453_outLine +BABEL_OP2_303_30395_20131112_004350_inLine +BABEL_OP2_303_30395_20131112_004350_outLine +BABEL_OP2_303_30645_20131029_193530_inLine +BABEL_OP2_303_30645_20131029_193530_outLine +BABEL_OP2_303_32048_20131204_223219_inLine +BABEL_OP2_303_32048_20131204_223219_outLine +BABEL_OP2_303_32171_20140203_203242_inLine +BABEL_OP2_303_32171_20140203_203242_outLine +BABEL_OP2_303_32301_20131120_212820_inLine +BABEL_OP2_303_32301_20131120_212820_outLine +BABEL_OP2_303_33229_20131206_220332_inLine +BABEL_OP2_303_33229_20131206_220332_outLine +BABEL_OP2_303_33424_20140129_211552_inLine +BABEL_OP2_303_33424_20140129_211552_outLine +BABEL_OP2_303_33672_20131029_201146_inLine +BABEL_OP2_303_33672_20131029_201146_outLine +BABEL_OP2_303_33913_20131116_003805_inLine +BABEL_OP2_303_33913_20131116_003805_outLine +BABEL_OP2_303_34106_20131027_203150_inLine +BABEL_OP2_303_34106_20131027_203150_outLine +BABEL_OP2_303_34811_20131115_235931_inLine +BABEL_OP2_303_34811_20131115_235931_outLine +BABEL_OP2_303_35000_20131210_184313_inLine +BABEL_OP2_303_35000_20131210_184313_outLine +BABEL_OP2_303_35008_20131120_185919_inLine +BABEL_OP2_303_35008_20131120_185919_outLine +BABEL_OP2_303_35143_20131206_023320_inLine +BABEL_OP2_303_35143_20131206_023320_outLine +BABEL_OP2_303_36594_20131215_014334_inLine +BABEL_OP2_303_36594_20131215_014334_outLine +BABEL_OP2_303_36594_20131215_022952_inLine +BABEL_OP2_303_36594_20131215_022952_outLine +BABEL_OP2_303_37228_20131216_171725_inLine +BABEL_OP2_303_37228_20131216_171725_outLine +BABEL_OP2_303_41469_20131025_210607_inLine +BABEL_OP2_303_41469_20131025_210607_outLine +BABEL_OP2_303_41609_20131031_164009_inLine +BABEL_OP2_303_41609_20131031_164009_outLine +BABEL_OP2_303_41680_20131108_184050_inLine +BABEL_OP2_303_41680_20131108_184050_outLine +BABEL_OP2_303_41692_20140119_000215_inLine +BABEL_OP2_303_41692_20140119_000215_outLine +BABEL_OP2_303_41692_20140120_002447_inLine +BABEL_OP2_303_41692_20140120_002447_outLine +BABEL_OP2_303_42526_20131216_190003_inLine +BABEL_OP2_303_42526_20131216_190003_outLine +BABEL_OP2_303_43784_20131115_013454_inLine +BABEL_OP2_303_43784_20131115_013454_outLine +BABEL_OP2_303_43784_20131115_014528_inLine +BABEL_OP2_303_43784_20131115_014528_outLine +BABEL_OP2_303_43788_20131202_222520_inLine +BABEL_OP2_303_43788_20131202_222520_outLine +BABEL_OP2_303_43920_20131130_143746_inLine +BABEL_OP2_303_43920_20131130_143746_outLine +BABEL_OP2_303_45459_20140201_203718_inLine +BABEL_OP2_303_45459_20140201_203718_outLine +BABEL_OP2_303_46330_20131210_212701_inLine +BABEL_OP2_303_46330_20131210_212701_outLine +BABEL_OP2_303_46688_20131108_184839_inLine +BABEL_OP2_303_46688_20131108_184839_outLine +BABEL_OP2_303_46757_20131116_193234_inLine +BABEL_OP2_303_46757_20131116_193234_outLine +BABEL_OP2_303_47215_20131108_200333_inLine +BABEL_OP2_303_47215_20131108_200333_outLine +BABEL_OP2_303_47487_20131104_200239_inLine +BABEL_OP2_303_47487_20131104_200239_outLine +BABEL_OP2_303_47637_20140222_233717_inLine +BABEL_OP2_303_47637_20140222_233717_outLine +BABEL_OP2_303_47866_20131230_165319_inLine +BABEL_OP2_303_47866_20131230_165319_outLine +BABEL_OP2_303_47878_20131116_184454_inLine +BABEL_OP2_303_47878_20131116_184454_outLine +BABEL_OP2_303_48844_20131030_014630_inLine +BABEL_OP2_303_48844_20131030_014630_outLine +BABEL_OP2_303_49027_20140127_225946_inLine +BABEL_OP2_303_49027_20140127_225946_outLine +BABEL_OP2_303_49197_20131115_221049_inLine +BABEL_OP2_303_49197_20131115_221049_outLine +BABEL_OP2_303_49216_20131031_011232_inLine +BABEL_OP2_303_49216_20131031_011232_outLine +BABEL_OP2_303_49437_20131211_205647_inLine +BABEL_OP2_303_49437_20131211_205647_outLine +BABEL_OP2_303_50565_20131102_213418_inLine +BABEL_OP2_303_50565_20131102_213418_outLine +BABEL_OP2_303_50779_20131215_002945_inLine +BABEL_OP2_303_50779_20131215_002945_outLine +BABEL_OP2_303_51015_20131121_004617_inLine +BABEL_OP2_303_51015_20131121_004617_outLine +BABEL_OP2_303_51968_20131113_214616_inLine +BABEL_OP2_303_51968_20131113_214616_outLine +BABEL_OP2_303_51968_20131113_220135_inLine +BABEL_OP2_303_51968_20131113_220135_outLine +BABEL_OP2_303_52272_20131027_195752_inLine +BABEL_OP2_303_52272_20131027_195752_outLine +BABEL_OP2_303_52381_20131216_174822_inLine +BABEL_OP2_303_52381_20131216_174822_outLine +BABEL_OP2_303_52490_20131027_172351_inLine +BABEL_OP2_303_52490_20131027_172351_outLine +BABEL_OP2_303_52804_20131105_185205_inLine +BABEL_OP2_303_52804_20131105_185205_outLine +BABEL_OP2_303_53144_20131227_024859_inLine +BABEL_OP2_303_53144_20131227_024859_outLine +BABEL_OP2_303_53665_20140204_194114_inLine +BABEL_OP2_303_53665_20140204_194114_outLine +BABEL_OP2_303_54104_20131030_190134_inLine +BABEL_OP2_303_54104_20131030_190134_outLine +BABEL_OP2_303_54162_20131114_015157_inLine +BABEL_OP2_303_54162_20131114_015157_outLine +BABEL_OP2_303_54744_20131101_012632_inLine +BABEL_OP2_303_54744_20131101_012632_outLine +BABEL_OP2_303_55968_20131027_154130_inLine +BABEL_OP2_303_55968_20131027_154130_outLine +BABEL_OP2_303_57141_20131129_191059_inLine +BABEL_OP2_303_57141_20131129_191059_outLine +BABEL_OP2_303_57464_20140204_205308_inLine +BABEL_OP2_303_57464_20140204_205308_outLine +BABEL_OP2_303_57464_20140204_220733_inLine +BABEL_OP2_303_57464_20140204_220733_outLine +BABEL_OP2_303_57566_20131205_002558_inLine +BABEL_OP2_303_57566_20131205_002558_outLine +BABEL_OP2_303_57782_20140222_210824_inLine +BABEL_OP2_303_57782_20140222_210824_outLine +BABEL_OP2_303_58313_20131114_234055_inLine +BABEL_OP2_303_58313_20131114_234055_outLine +BABEL_OP2_303_58821_20131121_205344_inLine +BABEL_OP2_303_58821_20131121_205344_outLine +BABEL_OP2_303_59509_20131130_021844_inLine +BABEL_OP2_303_59509_20131130_021844_outLine +BABEL_OP2_303_59635_20131205_021406_inLine +BABEL_OP2_303_59635_20131205_021406_outLine +BABEL_OP2_303_62014_20131114_203925_inLine +BABEL_OP2_303_62014_20131114_203925_outLine +BABEL_OP2_303_62714_20131228_155020_inLine +BABEL_OP2_303_62714_20131228_155020_outLine +BABEL_OP2_303_62810_20131028_225346_inLine +BABEL_OP2_303_62810_20131028_225346_outLine +BABEL_OP2_303_63604_20131101_000901_inLine +BABEL_OP2_303_63604_20131101_000901_outLine +BABEL_OP2_303_63730_20140218_210748_inLine +BABEL_OP2_303_63730_20140218_210748_outLine +BABEL_OP2_303_64014_20131229_214739_inLine +BABEL_OP2_303_64014_20131229_214739_outLine +BABEL_OP2_303_64065_20131111_230551_inLine +BABEL_OP2_303_64065_20131111_230551_outLine +BABEL_OP2_303_65561_20131122_180110_inLine +BABEL_OP2_303_65561_20131122_180110_outLine +BABEL_OP2_303_66001_20131031_192905_inLine +BABEL_OP2_303_66001_20131031_192905_outLine +BABEL_OP2_303_66361_20140203_182323_inLine +BABEL_OP2_303_66361_20140203_182323_outLine +BABEL_OP2_303_67283_20131109_213605_inLine +BABEL_OP2_303_67283_20131109_213605_outLine +BABEL_OP2_303_67401_20131114_215749_inLine +BABEL_OP2_303_67401_20131114_215749_outLine +BABEL_OP2_303_67401_20131114_221127_inLine +BABEL_OP2_303_67401_20131114_221127_outLine +BABEL_OP2_303_68068_20131204_212345_inLine +BABEL_OP2_303_68068_20131204_212345_outLine +BABEL_OP2_303_69153_20131204_184008_inLine +BABEL_OP2_303_69153_20131204_184008_outLine +BABEL_OP2_303_69992_20131030_011814_inLine +BABEL_OP2_303_69992_20131030_011814_outLine +BABEL_OP2_303_70221_20131124_180244_inLine +BABEL_OP2_303_70221_20131124_180244_outLine +BABEL_OP2_303_70251_20131027_201724_inLine +BABEL_OP2_303_70251_20131027_201724_outLine +BABEL_OP2_303_70452_20131115_202651_inLine +BABEL_OP2_303_70452_20131115_202651_outLine +BABEL_OP2_303_71067_20131115_221146_inLine +BABEL_OP2_303_71067_20131115_221146_outLine +BABEL_OP2_303_71189_20131225_050235_inLine +BABEL_OP2_303_71189_20131225_050235_outLine +BABEL_OP2_303_72040_20131112_173033_inLine +BABEL_OP2_303_72040_20131112_173033_outLine +BABEL_OP2_303_72844_20131111_192144_inLine +BABEL_OP2_303_72844_20131111_192144_outLine +BABEL_OP2_303_73022_20131216_173848_inLine +BABEL_OP2_303_73022_20131216_173848_outLine +BABEL_OP2_303_73299_20140217_173212_inLine +BABEL_OP2_303_73299_20140217_173212_outLine +BABEL_OP2_303_73591_20131020_193026_inLine +BABEL_OP2_303_73591_20131020_193026_outLine +BABEL_OP2_303_75342_20131122_191140_inLine +BABEL_OP2_303_75342_20131122_191140_outLine +BABEL_OP2_303_75505_20131102_220904_inLine +BABEL_OP2_303_75505_20131102_220904_outLine +BABEL_OP2_303_76902_20140205_233041_inLine +BABEL_OP2_303_76902_20140205_233041_outLine +BABEL_OP2_303_77730_20131107_221840_inLine +BABEL_OP2_303_77730_20131107_221840_outLine +BABEL_OP2_303_77744_20131113_232408_inLine +BABEL_OP2_303_77744_20131113_232408_outLine +BABEL_OP2_303_78544_20131204_194704_inLine +BABEL_OP2_303_78544_20131204_194704_outLine +BABEL_OP2_303_78604_20131101_194153_inLine +BABEL_OP2_303_78604_20131101_194153_outLine +BABEL_OP2_303_78943_20131115_213626_inLine +BABEL_OP2_303_78943_20131115_213626_outLine +BABEL_OP2_303_79451_20131114_213026_inLine +BABEL_OP2_303_79451_20131114_213026_outLine +BABEL_OP2_303_79590_20131113_222157_inLine +BABEL_OP2_303_79590_20131113_222157_outLine +BABEL_OP2_303_79751_20131105_025908_inLine +BABEL_OP2_303_79751_20131105_025908_outLine +BABEL_OP2_303_80559_20131101_190006_inLine +BABEL_OP2_303_80559_20131101_190006_outLine +BABEL_OP2_303_80622_20131130_040503_inLine +BABEL_OP2_303_80622_20131130_040503_outLine +BABEL_OP2_303_81149_20140203_201343_inLine +BABEL_OP2_303_81149_20140203_201343_outLine +BABEL_OP2_303_81287_20131121_184328_inLine +BABEL_OP2_303_81287_20131121_184328_outLine +BABEL_OP2_303_81671_20131205_004357_inLine +BABEL_OP2_303_81671_20131205_004357_outLine +BABEL_OP2_303_82622_20131029_212941_inLine +BABEL_OP2_303_82622_20131029_212941_outLine +BABEL_OP2_303_82935_20131205_024033_inLine +BABEL_OP2_303_82935_20131205_024033_outLine +BABEL_OP2_303_82935_20131205_025919_inLine +BABEL_OP2_303_82935_20131205_025919_outLine +BABEL_OP2_303_83771_20140119_181859_inLine +BABEL_OP2_303_83771_20140119_181859_outLine +BABEL_OP2_303_84458_20131204_213157_inLine +BABEL_OP2_303_84458_20131204_213157_outLine +BABEL_OP2_303_84547_20131026_230544_inLine +BABEL_OP2_303_84547_20131026_230544_outLine +BABEL_OP2_303_84605_20131112_192034_inLine +BABEL_OP2_303_84605_20131112_192034_outLine +BABEL_OP2_303_84805_20131204_153317_inLine +BABEL_OP2_303_84805_20131204_153317_outLine +BABEL_OP2_303_84936_20131115_204004_inLine +BABEL_OP2_303_84936_20131115_204004_outLine +BABEL_OP2_303_85340_20131111_215301_inLine +BABEL_OP2_303_85340_20131111_215301_outLine +BABEL_OP2_303_86191_20131114_221742_inLine +BABEL_OP2_303_86191_20131114_221742_outLine +BABEL_OP2_303_86321_20131204_175915_inLine +BABEL_OP2_303_86321_20131204_175915_outLine +BABEL_OP2_303_86467_20131025_013235_inLine +BABEL_OP2_303_86467_20131025_013235_outLine +BABEL_OP2_303_86676_20131204_185429_inLine +BABEL_OP2_303_86676_20131204_185429_outLine +BABEL_OP2_303_86713_20131206_165123_inLine +BABEL_OP2_303_86713_20131206_165123_outLine +BABEL_OP2_303_86891_20140222_195106_inLine +BABEL_OP2_303_86891_20140222_195106_outLine +BABEL_OP2_303_87313_20131116_193233_inLine +BABEL_OP2_303_87313_20131116_193233_outLine +BABEL_OP2_303_88776_20131031_184652_inLine +BABEL_OP2_303_88776_20131031_184652_outLine +BABEL_OP2_303_91125_20131102_191721_inLine +BABEL_OP2_303_91125_20131102_191721_outLine +BABEL_OP2_303_91944_20131107_214314_inLine +BABEL_OP2_303_91944_20131107_214314_outLine +BABEL_OP2_303_92605_20140205_192703_inLine +BABEL_OP2_303_92605_20140205_192703_outLine +BABEL_OP2_303_92757_20140211_221207_inLine +BABEL_OP2_303_92757_20140211_221207_outLine +BABEL_OP2_303_92792_20131223_042728_inLine +BABEL_OP2_303_92792_20131223_042728_outLine +BABEL_OP2_303_94025_20131211_211933_inLine +BABEL_OP2_303_94025_20131211_211933_outLine +BABEL_OP2_303_94333_20131029_193545_inLine +BABEL_OP2_303_94333_20131029_193545_outLine +BABEL_OP2_303_94745_20131204_205747_inLine +BABEL_OP2_303_94745_20131204_205747_outLine +BABEL_OP2_303_94869_20131101_184934_inLine +BABEL_OP2_303_94869_20131101_184934_outLine +BABEL_OP2_303_96690_20131114_194453_inLine +BABEL_OP2_303_96690_20131114_194453_outLine +BABEL_OP2_303_97286_20140205_223354_inLine +BABEL_OP2_303_97286_20140205_223354_outLine +BABEL_OP2_303_97772_20131024_230426_inLine +BABEL_OP2_303_97772_20131024_230426_outLine +BABEL_OP2_303_98311_20131107_224445_inLine +BABEL_OP2_303_98311_20131107_224445_outLine +BABEL_OP2_303_98356_20131121_191712_inLine +BABEL_OP2_303_98356_20131121_191712_outLine +BABEL_OP2_303_98390_20131029_164425_inLine +BABEL_OP2_303_98390_20131029_164425_outLine +BABEL_OP2_303_99955_20131215_222330_inLine +BABEL_OP2_303_99955_20131215_222330_outLine +BABEL_OP2_303_99955_20131216_231047_inLine +BABEL_OP2_303_99955_20131216_231047_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/training.list b/egs/babel/s5d/conf/lists/303-telugu/training.list new file mode 100644 index 00000000000..fec579c4325 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/training.list @@ -0,0 +1,514 @@ +BABEL_OP2_303_10058_20140205_001109_inLine +BABEL_OP2_303_10058_20140205_001109_outLine +BABEL_OP2_303_10188_20131108_175933_inLine +BABEL_OP2_303_10188_20131108_175933_outLine +BABEL_OP2_303_10638_20140218_213711_inLine +BABEL_OP2_303_10638_20140218_213711_outLine +BABEL_OP2_303_10938_20131104_204555_inLine +BABEL_OP2_303_10938_20131104_204555_outLine +BABEL_OP2_303_11352_20131224_005439_inLine +BABEL_OP2_303_11352_20131224_005439_outLine +BABEL_OP2_303_11673_20131026_034803_inLine +BABEL_OP2_303_11673_20131026_034803_outLine +BABEL_OP2_303_12036_20131101_174653_inLine +BABEL_OP2_303_12036_20131101_174653_outLine +BABEL_OP2_303_12242_20131113_222307_inLine +BABEL_OP2_303_12242_20131113_222307_outLine +BABEL_OP2_303_13030_20131109_023950_inLine +BABEL_OP2_303_13030_20131109_023950_outLine +BABEL_OP2_303_13324_20131107_211806_inLine +BABEL_OP2_303_13324_20131107_211806_outLine +BABEL_OP2_303_13586_20131115_180921_inLine +BABEL_OP2_303_13586_20131115_180921_outLine +BABEL_OP2_303_13664_20131108_184651_inLine +BABEL_OP2_303_13664_20131108_184651_outLine +BABEL_OP2_303_13744_20131026_234054_inLine +BABEL_OP2_303_13744_20131026_234054_outLine +BABEL_OP2_303_14229_20131114_032214_inLine +BABEL_OP2_303_14229_20131114_032214_outLine +BABEL_OP2_303_14350_20131105_195640_inLine +BABEL_OP2_303_14350_20131105_195640_outLine +BABEL_OP2_303_14875_20131112_211504_inLine +BABEL_OP2_303_14875_20131112_211504_outLine +BABEL_OP2_303_14899_20131102_204324_inLine +BABEL_OP2_303_14899_20131102_204324_outLine +BABEL_OP2_303_14929_20131112_164303_inLine +BABEL_OP2_303_14929_20131112_164303_outLine +BABEL_OP2_303_14929_20131112_165202_inLine +BABEL_OP2_303_14929_20131112_165202_outLine +BABEL_OP2_303_14929_20131112_171242_inLine +BABEL_OP2_303_14929_20131112_171242_outLine +BABEL_OP2_303_14972_20131114_023627_inLine +BABEL_OP2_303_14972_20131114_023627_outLine +BABEL_OP2_303_15702_20131206_225729_inLine +BABEL_OP2_303_15702_20131206_225729_outLine +BABEL_OP2_303_15730_20131101_163118_inLine +BABEL_OP2_303_15730_20131101_163118_outLine +BABEL_OP2_303_16184_20131204_033225_inLine +BABEL_OP2_303_16184_20131204_033225_outLine +BABEL_OP2_303_16839_20131223_215734_inLine +BABEL_OP2_303_16839_20131223_215734_outLine +BABEL_OP2_303_16886_20131108_204525_inLine +BABEL_OP2_303_16886_20131108_204525_outLine +BABEL_OP2_303_16938_20131112_015544_inLine +BABEL_OP2_303_16938_20131112_015544_outLine +BABEL_OP2_303_17127_20131224_002728_inLine +BABEL_OP2_303_17127_20131224_002728_outLine +BABEL_OP2_303_17520_20131114_164811_inLine +BABEL_OP2_303_17520_20131114_164811_outLine +BABEL_OP2_303_18242_20140218_014910_inLine +BABEL_OP2_303_18242_20140218_014910_outLine +BABEL_OP2_303_18380_20131111_015535_inLine +BABEL_OP2_303_18380_20131111_015535_outLine +BABEL_OP2_303_18380_20131119_224151_inLine +BABEL_OP2_303_18380_20131119_224151_outLine +BABEL_OP2_303_18924_20131112_001935_inLine +BABEL_OP2_303_18924_20131112_001935_outLine +BABEL_OP2_303_20437_20140202_232910_inLine +BABEL_OP2_303_20437_20140202_232910_outLine +BABEL_OP2_303_20437_20140202_234756_inLine +BABEL_OP2_303_20437_20140202_234756_outLine +BABEL_OP2_303_20985_20131122_183435_inLine +BABEL_OP2_303_20985_20131122_183435_outLine +BABEL_OP2_303_21435_20131226_175809_inLine +BABEL_OP2_303_21435_20131226_175809_outLine +BABEL_OP2_303_21435_20131226_181138_inLine +BABEL_OP2_303_21435_20131226_181138_outLine +BABEL_OP2_303_23006_20131113_150924_inLine +BABEL_OP2_303_23006_20131113_150924_outLine +BABEL_OP2_303_23046_20131114_171927_inLine +BABEL_OP2_303_23046_20131114_171927_outLine +BABEL_OP2_303_23239_20131206_181414_inLine +BABEL_OP2_303_23239_20131206_181414_outLine +BABEL_OP2_303_23260_20140203_194817_inLine +BABEL_OP2_303_23260_20140203_194817_outLine +BABEL_OP2_303_23505_20131109_184015_inLine +BABEL_OP2_303_23505_20131109_184015_outLine +BABEL_OP2_303_23681_20140119_223006_inLine +BABEL_OP2_303_23681_20140119_223006_outLine +BABEL_OP2_303_23980_20131114_202648_inLine +BABEL_OP2_303_23980_20131114_202648_outLine +BABEL_OP2_303_24010_20140218_224141_inLine +BABEL_OP2_303_24010_20140218_224141_outLine +BABEL_OP2_303_24231_20140201_230638_inLine +BABEL_OP2_303_24231_20140201_230638_outLine +BABEL_OP2_303_24323_20131201_180512_inLine +BABEL_OP2_303_24323_20131201_180512_outLine +BABEL_OP2_303_24470_20131204_174323_inLine +BABEL_OP2_303_24470_20131204_174323_outLine +BABEL_OP2_303_24589_20131114_182843_inLine +BABEL_OP2_303_24589_20131114_182843_outLine +BABEL_OP2_303_24982_20131114_012226_inLine +BABEL_OP2_303_24982_20131114_012226_outLine +BABEL_OP2_303_25198_20140121_180931_inLine +BABEL_OP2_303_25198_20140121_180931_outLine +BABEL_OP2_303_25719_20131205_191053_inLine +BABEL_OP2_303_25719_20131205_191053_outLine +BABEL_OP2_303_25767_20131028_161454_inLine +BABEL_OP2_303_25767_20131028_161454_outLine +BABEL_OP2_303_25961_20131030_225755_inLine +BABEL_OP2_303_25961_20131030_225755_outLine +BABEL_OP2_303_26072_20131216_221839_inLine +BABEL_OP2_303_26072_20131216_221839_outLine +BABEL_OP2_303_26388_20131113_034454_inLine +BABEL_OP2_303_26388_20131113_034454_outLine +BABEL_OP2_303_27125_20131024_195716_inLine +BABEL_OP2_303_27125_20131024_195716_outLine +BABEL_OP2_303_27590_20131118_222641_inLine +BABEL_OP2_303_27590_20131118_222641_outLine +BABEL_OP2_303_28419_20131113_195258_inLine +BABEL_OP2_303_28419_20131113_195258_outLine +BABEL_OP2_303_29404_20131224_014921_inLine +BABEL_OP2_303_29404_20131224_014921_outLine +BABEL_OP2_303_29482_20140219_221449_inLine +BABEL_OP2_303_29482_20140219_221449_outLine +BABEL_OP2_303_29685_20131105_180851_inLine +BABEL_OP2_303_29685_20131105_180851_outLine +BABEL_OP2_303_30013_20131116_185844_inLine +BABEL_OP2_303_30013_20131116_185844_outLine +BABEL_OP2_303_30345_20131224_005453_inLine +BABEL_OP2_303_30345_20131224_005453_outLine +BABEL_OP2_303_30395_20131112_004350_inLine +BABEL_OP2_303_30395_20131112_004350_outLine +BABEL_OP2_303_30645_20131029_193530_inLine +BABEL_OP2_303_30645_20131029_193530_outLine +BABEL_OP2_303_31490_20131105_010342_inLine +BABEL_OP2_303_31490_20131105_010342_outLine +BABEL_OP2_303_31490_20131105_011345_inLine +BABEL_OP2_303_31490_20131105_011345_outLine +BABEL_OP2_303_32048_20131204_223219_inLine +BABEL_OP2_303_32048_20131204_223219_outLine +BABEL_OP2_303_32171_20140203_203242_inLine +BABEL_OP2_303_32171_20140203_203242_outLine +BABEL_OP2_303_32301_20131120_212820_inLine +BABEL_OP2_303_32301_20131120_212820_outLine +BABEL_OP2_303_32861_20131216_223500_inLine +BABEL_OP2_303_32861_20131216_223500_outLine +BABEL_OP2_303_33229_20131206_220332_inLine +BABEL_OP2_303_33229_20131206_220332_outLine +BABEL_OP2_303_33424_20140129_211552_inLine +BABEL_OP2_303_33424_20140129_211552_outLine +BABEL_OP2_303_33672_20131029_201146_inLine +BABEL_OP2_303_33672_20131029_201146_outLine +BABEL_OP2_303_33704_20131210_195453_inLine +BABEL_OP2_303_33704_20131210_195453_outLine +BABEL_OP2_303_33913_20131116_003805_inLine +BABEL_OP2_303_33913_20131116_003805_outLine +BABEL_OP2_303_34106_20131027_203150_inLine +BABEL_OP2_303_34106_20131027_203150_outLine +BABEL_OP2_303_34811_20131115_235931_inLine +BABEL_OP2_303_34811_20131115_235931_outLine +BABEL_OP2_303_35000_20131210_184313_inLine +BABEL_OP2_303_35000_20131210_184313_outLine +BABEL_OP2_303_35008_20131120_185919_inLine +BABEL_OP2_303_35008_20131120_185919_outLine +BABEL_OP2_303_35069_20131205_165127_inLine +BABEL_OP2_303_35069_20131205_165127_outLine +BABEL_OP2_303_35143_20131206_023320_inLine +BABEL_OP2_303_35143_20131206_023320_outLine +BABEL_OP2_303_36341_20131024_221132_inLine +BABEL_OP2_303_36341_20131024_221132_outLine +BABEL_OP2_303_36594_20131215_014334_inLine +BABEL_OP2_303_36594_20131215_014334_outLine +BABEL_OP2_303_36594_20131215_022952_inLine +BABEL_OP2_303_36594_20131215_022952_outLine +BABEL_OP2_303_36669_20131110_155909_inLine +BABEL_OP2_303_36669_20131110_155909_outLine +BABEL_OP2_303_37228_20131216_171725_inLine +BABEL_OP2_303_37228_20131216_171725_outLine +BABEL_OP2_303_37682_20131105_023703_inLine +BABEL_OP2_303_37682_20131105_023703_outLine +BABEL_OP2_303_39307_20131027_043600_inLine +BABEL_OP2_303_39307_20131027_043600_outLine +BABEL_OP2_303_40565_20131116_182747_inLine +BABEL_OP2_303_40565_20131116_182747_outLine +BABEL_OP2_303_41469_20131025_210607_inLine +BABEL_OP2_303_41469_20131025_210607_outLine +BABEL_OP2_303_41493_20131027_155001_inLine +BABEL_OP2_303_41493_20131027_155001_outLine +BABEL_OP2_303_41609_20131031_164009_inLine +BABEL_OP2_303_41609_20131031_164009_outLine +BABEL_OP2_303_41680_20131108_184050_inLine +BABEL_OP2_303_41680_20131108_184050_outLine +BABEL_OP2_303_41692_20140119_000215_inLine +BABEL_OP2_303_41692_20140119_000215_outLine +BABEL_OP2_303_41692_20140120_002447_inLine +BABEL_OP2_303_41692_20140120_002447_outLine +BABEL_OP2_303_42526_20131216_190003_inLine +BABEL_OP2_303_42526_20131216_190003_outLine +BABEL_OP2_303_42718_20140118_201247_inLine +BABEL_OP2_303_42718_20140118_201247_outLine +BABEL_OP2_303_43115_20140201_195115_inLine +BABEL_OP2_303_43115_20140201_195115_outLine +BABEL_OP2_303_43784_20131115_013454_inLine +BABEL_OP2_303_43784_20131115_013454_outLine +BABEL_OP2_303_43784_20131115_014528_inLine +BABEL_OP2_303_43784_20131115_014528_outLine +BABEL_OP2_303_43788_20131202_222520_inLine +BABEL_OP2_303_43788_20131202_222520_outLine +BABEL_OP2_303_43789_20131111_163502_inLine +BABEL_OP2_303_43789_20131111_163502_outLine +BABEL_OP2_303_43920_20131130_143746_inLine +BABEL_OP2_303_43920_20131130_143746_outLine +BABEL_OP2_303_45459_20140201_203718_inLine +BABEL_OP2_303_45459_20140201_203718_outLine +BABEL_OP2_303_46330_20131210_212701_inLine +BABEL_OP2_303_46330_20131210_212701_outLine +BABEL_OP2_303_46550_20131111_233520_inLine +BABEL_OP2_303_46550_20131111_233520_outLine +BABEL_OP2_303_46558_20131028_190003_inLine +BABEL_OP2_303_46558_20131028_190003_outLine +BABEL_OP2_303_46688_20131108_184839_inLine +BABEL_OP2_303_46688_20131108_184839_outLine +BABEL_OP2_303_46757_20131116_193234_inLine +BABEL_OP2_303_46757_20131116_193234_outLine +BABEL_OP2_303_47215_20131108_200333_inLine +BABEL_OP2_303_47215_20131108_200333_outLine +BABEL_OP2_303_47487_20131104_200239_inLine +BABEL_OP2_303_47487_20131104_200239_outLine +BABEL_OP2_303_47637_20140222_233717_inLine +BABEL_OP2_303_47637_20140222_233717_outLine +BABEL_OP2_303_47823_20131201_004209_inLine +BABEL_OP2_303_47823_20131201_004209_outLine +BABEL_OP2_303_47866_20131230_165319_inLine +BABEL_OP2_303_47866_20131230_165319_outLine +BABEL_OP2_303_47878_20131116_184454_inLine +BABEL_OP2_303_47878_20131116_184454_outLine +BABEL_OP2_303_48844_20131030_014630_inLine +BABEL_OP2_303_48844_20131030_014630_outLine +BABEL_OP2_303_49027_20140127_225946_inLine +BABEL_OP2_303_49027_20140127_225946_outLine +BABEL_OP2_303_49197_20131115_221049_inLine +BABEL_OP2_303_49197_20131115_221049_outLine +BABEL_OP2_303_49216_20131031_011232_inLine +BABEL_OP2_303_49216_20131031_011232_outLine +BABEL_OP2_303_49437_20131211_205647_inLine +BABEL_OP2_303_49437_20131211_205647_outLine +BABEL_OP2_303_50565_20131102_213418_inLine +BABEL_OP2_303_50565_20131102_213418_outLine +BABEL_OP2_303_50726_20131028_210641_inLine +BABEL_OP2_303_50726_20131028_210641_outLine +BABEL_OP2_303_50779_20131215_002945_inLine +BABEL_OP2_303_50779_20131215_002945_outLine +BABEL_OP2_303_51015_20131121_004617_inLine +BABEL_OP2_303_51015_20131121_004617_outLine +BABEL_OP2_303_51540_20131204_041920_inLine +BABEL_OP2_303_51540_20131204_041920_outLine +BABEL_OP2_303_51968_20131113_214616_inLine +BABEL_OP2_303_51968_20131113_214616_outLine +BABEL_OP2_303_51968_20131113_220135_inLine +BABEL_OP2_303_51968_20131113_220135_outLine +BABEL_OP2_303_52272_20131027_195752_inLine +BABEL_OP2_303_52272_20131027_195752_outLine +BABEL_OP2_303_52381_20131216_174822_inLine +BABEL_OP2_303_52381_20131216_174822_outLine +BABEL_OP2_303_52490_20131027_172351_inLine +BABEL_OP2_303_52490_20131027_172351_outLine +BABEL_OP2_303_52804_20131105_185205_inLine +BABEL_OP2_303_52804_20131105_185205_outLine +BABEL_OP2_303_53144_20131227_024859_inLine +BABEL_OP2_303_53144_20131227_024859_outLine +BABEL_OP2_303_53665_20140204_194114_inLine +BABEL_OP2_303_53665_20140204_194114_outLine +BABEL_OP2_303_54104_20131030_190134_inLine +BABEL_OP2_303_54104_20131030_190134_outLine +BABEL_OP2_303_54162_20131114_015157_inLine +BABEL_OP2_303_54162_20131114_015157_outLine +BABEL_OP2_303_54744_20131101_012632_inLine +BABEL_OP2_303_54744_20131101_012632_outLine +BABEL_OP2_303_55968_20131027_154130_inLine +BABEL_OP2_303_55968_20131027_154130_outLine +BABEL_OP2_303_57141_20131129_191059_inLine +BABEL_OP2_303_57141_20131129_191059_outLine +BABEL_OP2_303_57464_20140204_205308_inLine +BABEL_OP2_303_57464_20140204_205308_outLine +BABEL_OP2_303_57464_20140204_220733_inLine +BABEL_OP2_303_57464_20140204_220733_outLine +BABEL_OP2_303_57566_20131205_002558_inLine +BABEL_OP2_303_57566_20131205_002558_outLine +BABEL_OP2_303_57782_20140222_210824_inLine +BABEL_OP2_303_57782_20140222_210824_outLine +BABEL_OP2_303_58313_20131114_234055_inLine +BABEL_OP2_303_58313_20131114_234055_outLine +BABEL_OP2_303_58821_20131121_205344_inLine +BABEL_OP2_303_58821_20131121_205344_outLine +BABEL_OP2_303_59509_20131130_021844_inLine +BABEL_OP2_303_59509_20131130_021844_outLine +BABEL_OP2_303_59635_20131205_021406_inLine +BABEL_OP2_303_59635_20131205_021406_outLine +BABEL_OP2_303_60538_20131111_200459_inLine +BABEL_OP2_303_60538_20131111_200459_outLine +BABEL_OP2_303_62014_20131114_203925_inLine +BABEL_OP2_303_62014_20131114_203925_outLine +BABEL_OP2_303_62714_20131228_155020_inLine +BABEL_OP2_303_62714_20131228_155020_outLine +BABEL_OP2_303_62810_20131028_225346_inLine +BABEL_OP2_303_62810_20131028_225346_outLine +BABEL_OP2_303_63084_20131115_202655_inLine +BABEL_OP2_303_63084_20131115_202655_outLine +BABEL_OP2_303_63604_20131101_000901_inLine +BABEL_OP2_303_63604_20131101_000901_outLine +BABEL_OP2_303_63730_20140218_210748_inLine +BABEL_OP2_303_63730_20140218_210748_outLine +BABEL_OP2_303_64014_20131229_214739_inLine +BABEL_OP2_303_64014_20131229_214739_outLine +BABEL_OP2_303_64065_20131111_230551_inLine +BABEL_OP2_303_64065_20131111_230551_outLine +BABEL_OP2_303_64768_20131113_203120_inLine +BABEL_OP2_303_64768_20131113_203120_outLine +BABEL_OP2_303_65077_20131024_174953_inLine +BABEL_OP2_303_65077_20131024_174953_outLine +BABEL_OP2_303_65561_20131122_180110_inLine +BABEL_OP2_303_65561_20131122_180110_outLine +BABEL_OP2_303_66001_20131031_192905_inLine +BABEL_OP2_303_66001_20131031_192905_outLine +BABEL_OP2_303_66361_20140203_182323_inLine +BABEL_OP2_303_66361_20140203_182323_outLine +BABEL_OP2_303_67283_20131109_213605_inLine +BABEL_OP2_303_67283_20131109_213605_outLine +BABEL_OP2_303_67401_20131114_215749_inLine +BABEL_OP2_303_67401_20131114_215749_outLine +BABEL_OP2_303_67401_20131114_221127_inLine +BABEL_OP2_303_67401_20131114_221127_outLine +BABEL_OP2_303_67964_20140222_211658_inLine +BABEL_OP2_303_67964_20140222_211658_outLine +BABEL_OP2_303_68068_20131204_212345_inLine +BABEL_OP2_303_68068_20131204_212345_outLine +BABEL_OP2_303_69107_20131113_222827_inLine +BABEL_OP2_303_69107_20131113_222827_outLine +BABEL_OP2_303_69153_20131204_184008_inLine +BABEL_OP2_303_69153_20131204_184008_outLine +BABEL_OP2_303_69633_20131130_193122_inLine +BABEL_OP2_303_69633_20131130_193122_outLine +BABEL_OP2_303_69992_20131030_011814_inLine +BABEL_OP2_303_69992_20131030_011814_outLine +BABEL_OP2_303_70221_20131124_180244_inLine +BABEL_OP2_303_70221_20131124_180244_outLine +BABEL_OP2_303_70251_20131027_201724_inLine +BABEL_OP2_303_70251_20131027_201724_outLine +BABEL_OP2_303_70452_20131115_202651_inLine +BABEL_OP2_303_70452_20131115_202651_outLine +BABEL_OP2_303_71067_20131115_221146_inLine +BABEL_OP2_303_71067_20131115_221146_outLine +BABEL_OP2_303_71189_20131225_050235_inLine +BABEL_OP2_303_71189_20131225_050235_outLine +BABEL_OP2_303_72040_20131112_173033_inLine +BABEL_OP2_303_72040_20131112_173033_outLine +BABEL_OP2_303_72587_20131115_221128_inLine +BABEL_OP2_303_72587_20131115_221128_outLine +BABEL_OP2_303_72844_20131111_192144_inLine +BABEL_OP2_303_72844_20131111_192144_outLine +BABEL_OP2_303_73022_20131216_173848_inLine +BABEL_OP2_303_73022_20131216_173848_outLine +BABEL_OP2_303_73299_20140217_173212_inLine +BABEL_OP2_303_73299_20140217_173212_outLine +BABEL_OP2_303_73591_20131020_193026_inLine +BABEL_OP2_303_73591_20131020_193026_outLine +BABEL_OP2_303_73990_20140219_201105_inLine +BABEL_OP2_303_73990_20140219_201105_outLine +BABEL_OP2_303_73990_20140219_202300_inLine +BABEL_OP2_303_73990_20140219_202300_outLine +BABEL_OP2_303_74886_20131101_194728_inLine +BABEL_OP2_303_74886_20131101_194728_outLine +BABEL_OP2_303_75342_20131122_191140_inLine +BABEL_OP2_303_75342_20131122_191140_outLine +BABEL_OP2_303_75365_20140218_173521_inLine +BABEL_OP2_303_75365_20140218_173521_outLine +BABEL_OP2_303_75505_20131102_220904_inLine +BABEL_OP2_303_75505_20131102_220904_outLine +BABEL_OP2_303_76756_20131115_182926_inLine +BABEL_OP2_303_76756_20131115_182926_outLine +BABEL_OP2_303_76902_20140205_233041_inLine +BABEL_OP2_303_76902_20140205_233041_outLine +BABEL_OP2_303_77730_20131107_221840_inLine +BABEL_OP2_303_77730_20131107_221840_outLine +BABEL_OP2_303_77744_20131113_232408_inLine +BABEL_OP2_303_77744_20131113_232408_outLine +BABEL_OP2_303_78454_20131114_230026_inLine +BABEL_OP2_303_78454_20131114_230026_outLine +BABEL_OP2_303_78544_20131204_194704_inLine +BABEL_OP2_303_78544_20131204_194704_outLine +BABEL_OP2_303_78604_20131101_194153_inLine +BABEL_OP2_303_78604_20131101_194153_outLine +BABEL_OP2_303_78943_20131115_213626_inLine +BABEL_OP2_303_78943_20131115_213626_outLine +BABEL_OP2_303_79451_20131114_213026_inLine +BABEL_OP2_303_79451_20131114_213026_outLine +BABEL_OP2_303_79590_20131113_222157_inLine +BABEL_OP2_303_79590_20131113_222157_outLine +BABEL_OP2_303_79751_20131105_025908_inLine +BABEL_OP2_303_79751_20131105_025908_outLine +BABEL_OP2_303_79820_20131114_181827_inLine +BABEL_OP2_303_79820_20131114_181827_outLine +BABEL_OP2_303_80559_20131101_190006_inLine +BABEL_OP2_303_80559_20131101_190006_outLine +BABEL_OP2_303_80622_20131130_040503_inLine +BABEL_OP2_303_80622_20131130_040503_outLine +BABEL_OP2_303_80881_20131027_165716_inLine +BABEL_OP2_303_80881_20131027_165716_outLine +BABEL_OP2_303_81149_20140203_201343_inLine +BABEL_OP2_303_81149_20140203_201343_outLine +BABEL_OP2_303_81287_20131121_184328_inLine +BABEL_OP2_303_81287_20131121_184328_outLine +BABEL_OP2_303_81424_20131120_192659_inLine +BABEL_OP2_303_81424_20131120_192659_outLine +BABEL_OP2_303_81671_20131205_004357_inLine +BABEL_OP2_303_81671_20131205_004357_outLine +BABEL_OP2_303_82622_20131029_212941_inLine +BABEL_OP2_303_82622_20131029_212941_outLine +BABEL_OP2_303_82935_20131205_024033_inLine +BABEL_OP2_303_82935_20131205_024033_outLine +BABEL_OP2_303_82935_20131205_025919_inLine +BABEL_OP2_303_82935_20131205_025919_outLine +BABEL_OP2_303_83771_20140119_181859_inLine +BABEL_OP2_303_83771_20140119_181859_outLine +BABEL_OP2_303_83935_20131122_222948_inLine +BABEL_OP2_303_83935_20131122_222948_outLine +BABEL_OP2_303_84061_20131104_224830_inLine +BABEL_OP2_303_84061_20131104_224830_outLine +BABEL_OP2_303_84327_20131122_203936_inLine +BABEL_OP2_303_84327_20131122_203936_outLine +BABEL_OP2_303_84458_20131204_213157_inLine +BABEL_OP2_303_84458_20131204_213157_outLine +BABEL_OP2_303_84547_20131026_230544_inLine +BABEL_OP2_303_84547_20131026_230544_outLine +BABEL_OP2_303_84605_20131112_192034_inLine +BABEL_OP2_303_84605_20131112_192034_outLine +BABEL_OP2_303_84805_20131204_153317_inLine +BABEL_OP2_303_84805_20131204_153317_outLine +BABEL_OP2_303_84936_20131115_204004_inLine +BABEL_OP2_303_84936_20131115_204004_outLine +BABEL_OP2_303_85248_20131206_184028_inLine +BABEL_OP2_303_85248_20131206_184028_outLine +BABEL_OP2_303_85340_20131111_215301_inLine +BABEL_OP2_303_85340_20131111_215301_outLine +BABEL_OP2_303_86191_20131114_221742_inLine +BABEL_OP2_303_86191_20131114_221742_outLine +BABEL_OP2_303_86321_20131204_175915_inLine +BABEL_OP2_303_86321_20131204_175915_outLine +BABEL_OP2_303_86467_20131025_013235_inLine +BABEL_OP2_303_86467_20131025_013235_outLine +BABEL_OP2_303_86676_20131204_185429_inLine +BABEL_OP2_303_86676_20131204_185429_outLine +BABEL_OP2_303_86713_20131206_165123_inLine +BABEL_OP2_303_86713_20131206_165123_outLine +BABEL_OP2_303_86891_20140222_195106_inLine +BABEL_OP2_303_86891_20140222_195106_outLine +BABEL_OP2_303_86952_20131105_173230_inLine +BABEL_OP2_303_86952_20131105_173230_outLine +BABEL_OP2_303_87313_20131116_193233_inLine +BABEL_OP2_303_87313_20131116_193233_outLine +BABEL_OP2_303_87884_20131206_022424_inLine +BABEL_OP2_303_87884_20131206_022424_outLine +BABEL_OP2_303_87889_20131213_215703_inLine +BABEL_OP2_303_87889_20131213_215703_outLine +BABEL_OP2_303_88776_20131031_184652_inLine +BABEL_OP2_303_88776_20131031_184652_outLine +BABEL_OP2_303_88982_20131115_181618_inLine +BABEL_OP2_303_88982_20131115_181618_outLine +BABEL_OP2_303_90080_20131228_233334_inLine +BABEL_OP2_303_90080_20131228_233334_outLine +BABEL_OP2_303_90740_20140221_220031_inLine +BABEL_OP2_303_90740_20140221_220031_outLine +BABEL_OP2_303_91125_20131102_191721_inLine +BABEL_OP2_303_91125_20131102_191721_outLine +BABEL_OP2_303_91944_20131107_214314_inLine +BABEL_OP2_303_91944_20131107_214314_outLine +BABEL_OP2_303_92176_20131115_153306_inLine +BABEL_OP2_303_92176_20131115_153306_outLine +BABEL_OP2_303_92605_20140205_192703_inLine +BABEL_OP2_303_92605_20140205_192703_outLine +BABEL_OP2_303_92757_20140211_221207_inLine +BABEL_OP2_303_92757_20140211_221207_outLine +BABEL_OP2_303_92792_20131223_042728_inLine +BABEL_OP2_303_92792_20131223_042728_outLine +BABEL_OP2_303_94025_20131211_211933_inLine +BABEL_OP2_303_94025_20131211_211933_outLine +BABEL_OP2_303_94333_20131029_193545_inLine +BABEL_OP2_303_94333_20131029_193545_outLine +BABEL_OP2_303_94745_20131204_205747_inLine +BABEL_OP2_303_94745_20131204_205747_outLine +BABEL_OP2_303_94869_20131101_184934_inLine +BABEL_OP2_303_94869_20131101_184934_outLine +BABEL_OP2_303_96324_20131107_162546_inLine +BABEL_OP2_303_96324_20131107_162546_outLine +BABEL_OP2_303_96690_20131114_194453_inLine +BABEL_OP2_303_96690_20131114_194453_outLine +BABEL_OP2_303_97286_20140205_223354_inLine +BABEL_OP2_303_97286_20140205_223354_outLine +BABEL_OP2_303_97772_20131024_230426_inLine +BABEL_OP2_303_97772_20131024_230426_outLine +BABEL_OP2_303_97988_20131204_195626_inLine +BABEL_OP2_303_97988_20131204_195626_outLine +BABEL_OP2_303_97988_20131204_211137_inLine +BABEL_OP2_303_97988_20131204_211137_outLine +BABEL_OP2_303_98311_20131107_224445_inLine +BABEL_OP2_303_98311_20131107_224445_outLine +BABEL_OP2_303_98356_20131121_191712_inLine +BABEL_OP2_303_98356_20131121_191712_outLine +BABEL_OP2_303_98390_20131029_164425_inLine +BABEL_OP2_303_98390_20131029_164425_outLine +BABEL_OP2_303_99955_20131215_222330_inLine +BABEL_OP2_303_99955_20131215_222330_outLine +BABEL_OP2_303_99955_20131216_231047_inLine +BABEL_OP2_303_99955_20131216_231047_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list b/egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list new file mode 100644 index 00000000000..29a7e3f80b4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list @@ -0,0 +1,501 @@ +BABEL_OP2_303_11310_20140309_202017_inLine +BABEL_OP2_303_11310_20140309_202017_outLine +BABEL_OP2_303_11723_20140320_222729_inLine +BABEL_OP2_303_11723_20140320_222729_outLine +BABEL_OP2_303_11723_20140320_223508_inLine +BABEL_OP2_303_11723_20140320_223508_outLine +BABEL_OP2_303_13184_20131204_011559_inLine +BABEL_OP2_303_13184_20131204_011559_outLine +BABEL_OP2_303_13189_20131211_195308_inLine +BABEL_OP2_303_13189_20131211_195308_outLine +BABEL_OP2_303_13792_20131029_222536_inLine +BABEL_OP2_303_13792_20131029_222536_outLine +BABEL_OP2_303_13929_20140327_182253_inLine +BABEL_OP2_303_13929_20140327_182253_outLine +BABEL_OP2_303_14575_20140328_215314_inLine +BABEL_OP2_303_14575_20140328_215314_outLine +BABEL_OP2_303_14723_20140327_220200_inLine +BABEL_OP2_303_14723_20140327_220200_outLine +BABEL_OP2_303_14884_20140320_193514_inLine +BABEL_OP2_303_14884_20140320_193514_outLine +BABEL_OP2_303_14884_20140320_195858_inLine +BABEL_OP2_303_14884_20140320_195858_outLine +BABEL_OP2_303_15926_20131130_215154_inLine +BABEL_OP2_303_15926_20131130_215154_outLine +BABEL_OP2_303_16351_20140309_193931_inLine +BABEL_OP2_303_16351_20140309_193931_outLine +BABEL_OP2_303_16726_20140328_174353_inLine +BABEL_OP2_303_16726_20140328_174353_outLine +BABEL_OP2_303_17511_20140327_212725_inLine +BABEL_OP2_303_17511_20140327_212725_outLine +BABEL_OP2_303_17751_20140130_221610_inLine +BABEL_OP2_303_17751_20140130_221610_outLine +BABEL_OP2_303_17890_20131116_201518_inLine +BABEL_OP2_303_17890_20131116_201518_outLine +BABEL_OP2_303_17914_20131229_223237_inLine +BABEL_OP2_303_17914_20131229_223237_outLine +BABEL_OP2_303_17937_20140319_174736_inLine +BABEL_OP2_303_17937_20140319_174736_outLine +BABEL_OP2_303_18280_20140328_223246_inLine +BABEL_OP2_303_18280_20140328_223246_outLine +BABEL_OP2_303_18297_20140125_191248_inLine +BABEL_OP2_303_18297_20140125_191248_outLine +BABEL_OP2_303_18566_20131228_173117_inLine +BABEL_OP2_303_18566_20131228_173117_outLine +BABEL_OP2_303_19101_20131114_161754_inLine +BABEL_OP2_303_19101_20131114_161754_outLine +BABEL_OP2_303_19440_20140325_010253_inLine +BABEL_OP2_303_19440_20140325_010253_outLine +BABEL_OP2_303_19444_20140324_030047_inLine +BABEL_OP2_303_19444_20140324_030047_outLine +BABEL_OP2_303_19621_20131117_014609_inLine +BABEL_OP2_303_19621_20131117_014609_outLine +BABEL_OP2_303_20682_20131128_201847_inLine +BABEL_OP2_303_20682_20131128_201847_outLine +BABEL_OP2_303_20738_20131230_225647_inLine +BABEL_OP2_303_20738_20131230_225647_outLine +BABEL_OP2_303_20896_20140328_234931_inLine +BABEL_OP2_303_20896_20140328_234931_outLine +BABEL_OP2_303_21159_20140318_195039_inLine +BABEL_OP2_303_21159_20140318_195039_outLine +BABEL_OP2_303_21244_20140129_215632_inLine +BABEL_OP2_303_21244_20140129_215632_outLine +BABEL_OP2_303_21315_20140405_194002_inLine +BABEL_OP2_303_21315_20140405_194002_outLine +BABEL_OP2_303_22021_20140413_225936_inLine +BABEL_OP2_303_22021_20140413_225936_outLine +BABEL_OP2_303_22591_20140404_023216_inLine +BABEL_OP2_303_22591_20140404_023216_outLine +BABEL_OP2_303_22641_20131025_191802_inLine +BABEL_OP2_303_22641_20131025_191802_outLine +BABEL_OP2_303_22643_20140319_183843_inLine +BABEL_OP2_303_22643_20140319_183843_outLine +BABEL_OP2_303_23355_20140324_163413_inLine +BABEL_OP2_303_23355_20140324_163413_outLine +BABEL_OP2_303_23628_20131114_233248_inLine +BABEL_OP2_303_23628_20131114_233248_outLine +BABEL_OP2_303_23700_20140330_203130_inLine +BABEL_OP2_303_23700_20140330_203130_outLine +BABEL_OP2_303_24587_20140324_011441_inLine +BABEL_OP2_303_24587_20140324_011441_outLine +BABEL_OP2_303_24587_20140324_035935_inLine +BABEL_OP2_303_24587_20140324_035935_outLine +BABEL_OP2_303_24648_20140324_212818_inLine +BABEL_OP2_303_24648_20140324_212818_outLine +BABEL_OP2_303_25012_20140309_203215_inLine +BABEL_OP2_303_25012_20140309_203215_outLine +BABEL_OP2_303_25085_20140213_175133_inLine +BABEL_OP2_303_25085_20140213_175133_outLine +BABEL_OP2_303_25242_20140308_200459_inLine +BABEL_OP2_303_25242_20140308_200459_outLine +BABEL_OP2_303_25496_20140325_025625_inLine +BABEL_OP2_303_25496_20140325_025625_outLine +BABEL_OP2_303_26074_20131114_211040_inLine +BABEL_OP2_303_26074_20131114_211040_outLine +BABEL_OP2_303_27014_20140309_212535_inLine +BABEL_OP2_303_27014_20140309_212535_outLine +BABEL_OP2_303_27478_20131228_145746_inLine +BABEL_OP2_303_27478_20131228_145746_outLine +BABEL_OP2_303_28303_20131030_203335_inLine +BABEL_OP2_303_28303_20131030_203335_outLine +BABEL_OP2_303_28814_20131216_215127_inLine +BABEL_OP2_303_28814_20131216_215127_outLine +BABEL_OP2_303_29072_20131118_191936_outLine +BABEL_OP2_303_29563_20140327_193023_inLine +BABEL_OP2_303_29563_20140327_193023_outLine +BABEL_OP2_303_29643_20140131_234915_inLine +BABEL_OP2_303_29643_20140131_234915_outLine +BABEL_OP2_303_29765_20140317_141957_inLine +BABEL_OP2_303_29765_20140317_141957_outLine +BABEL_OP2_303_30084_20140212_191819_inLine +BABEL_OP2_303_30084_20140212_191819_outLine +BABEL_OP2_303_30250_20131105_004442_inLine +BABEL_OP2_303_30250_20131105_004442_outLine +BABEL_OP2_303_32832_20131204_034501_inLine +BABEL_OP2_303_32832_20131204_034501_outLine +BABEL_OP2_303_33273_20131106_231154_inLine +BABEL_OP2_303_33273_20131106_231154_outLine +BABEL_OP2_303_33774_20140325_031929_inLine +BABEL_OP2_303_33774_20140325_031929_outLine +BABEL_OP2_303_34064_20140324_183744_inLine +BABEL_OP2_303_34064_20140324_183744_outLine +BABEL_OP2_303_34208_20140404_030609_inLine +BABEL_OP2_303_34208_20140404_030609_outLine +BABEL_OP2_303_34477_20131113_195424_inLine +BABEL_OP2_303_34477_20131113_195424_outLine +BABEL_OP2_303_35420_20140318_214611_inLine +BABEL_OP2_303_35420_20140318_214611_outLine +BABEL_OP2_303_35467_20131114_210333_inLine +BABEL_OP2_303_35467_20131114_210333_outLine +BABEL_OP2_303_35885_20131225_181427_inLine +BABEL_OP2_303_35885_20131225_181427_outLine +BABEL_OP2_303_36017_20140204_222306_inLine +BABEL_OP2_303_36017_20140204_222306_outLine +BABEL_OP2_303_36147_20140402_224231_inLine +BABEL_OP2_303_36147_20140402_224231_outLine +BABEL_OP2_303_36900_20131223_225105_inLine +BABEL_OP2_303_36900_20131223_225105_outLine +BABEL_OP2_303_36990_20131111_022257_inLine +BABEL_OP2_303_36990_20131111_022257_outLine +BABEL_OP2_303_37290_20131114_034451_inLine +BABEL_OP2_303_37290_20131114_034451_outLine +BABEL_OP2_303_38340_20131114_184816_inLine +BABEL_OP2_303_38340_20131114_184816_outLine +BABEL_OP2_303_39099_20140127_233334_inLine +BABEL_OP2_303_39099_20140127_233334_outLine +BABEL_OP2_303_39277_20140324_193505_inLine +BABEL_OP2_303_39277_20140324_193505_outLine +BABEL_OP2_303_39579_20140327_191248_inLine +BABEL_OP2_303_39579_20140327_191248_outLine +BABEL_OP2_303_39680_20131211_183650_inLine +BABEL_OP2_303_39680_20131211_183650_outLine +BABEL_OP2_303_40092_20140329_200501_inLine +BABEL_OP2_303_40092_20140329_200501_outLine +BABEL_OP2_303_40092_20140329_201239_inLine +BABEL_OP2_303_40092_20140329_201239_outLine +BABEL_OP2_303_40092_20140329_202122_inLine +BABEL_OP2_303_40092_20140329_202122_outLine +BABEL_OP2_303_40648_20140319_195523_inLine +BABEL_OP2_303_40648_20140319_195523_outLine +BABEL_OP2_303_40939_20140415_195416_inLine +BABEL_OP2_303_40939_20140415_195416_outLine +BABEL_OP2_303_41745_20131109_041340_inLine +BABEL_OP2_303_41745_20131109_041340_outLine +BABEL_OP2_303_42155_20131114_053239_inLine +BABEL_OP2_303_42155_20131114_053239_outLine +BABEL_OP2_303_42243_20131025_222121_inLine +BABEL_OP2_303_42243_20131025_222121_outLine +BABEL_OP2_303_42619_20131124_172939_inLine +BABEL_OP2_303_42619_20131124_172939_outLine +BABEL_OP2_303_42834_20131115_023812_inLine +BABEL_OP2_303_42834_20131115_023812_outLine +BABEL_OP2_303_43395_20140405_161423_inLine +BABEL_OP2_303_43395_20140405_161423_outLine +BABEL_OP2_303_44114_20140405_145238_inLine +BABEL_OP2_303_44114_20140405_145238_outLine +BABEL_OP2_303_44619_20131109_201926_inLine +BABEL_OP2_303_44619_20131109_201926_outLine +BABEL_OP2_303_44678_20140320_185927_inLine +BABEL_OP2_303_44678_20140320_185927_outLine +BABEL_OP2_303_44898_20140309_220734_inLine +BABEL_OP2_303_44898_20140309_220734_outLine +BABEL_OP2_303_45121_20140207_012357_inLine +BABEL_OP2_303_45121_20140207_012357_outLine +BABEL_OP2_303_45140_20140205_001649_inLine +BABEL_OP2_303_45140_20140205_001649_outLine +BABEL_OP2_303_45777_20131116_041840_inLine +BABEL_OP2_303_45777_20131116_041840_outLine +BABEL_OP2_303_46535_20140404_014728_inLine +BABEL_OP2_303_46535_20140404_014728_outLine +BABEL_OP2_303_46712_20131114_191120_inLine +BABEL_OP2_303_46712_20131114_191120_outLine +BABEL_OP2_303_47877_20131218_041443_inLine +BABEL_OP2_303_47877_20131218_041443_outLine +BABEL_OP2_303_47882_20140309_225723_inLine +BABEL_OP2_303_47882_20140309_225723_outLine +BABEL_OP2_303_48024_20140324_154856_inLine +BABEL_OP2_303_48024_20140324_154856_outLine +BABEL_OP2_303_49001_20131114_194536_inLine +BABEL_OP2_303_49001_20131114_194536_outLine +BABEL_OP2_303_49870_20140330_002407_inLine +BABEL_OP2_303_49870_20140330_002407_outLine +BABEL_OP2_303_49870_20140330_003441_inLine +BABEL_OP2_303_49870_20140330_003441_outLine +BABEL_OP2_303_49902_20131104_154633_inLine +BABEL_OP2_303_49902_20131104_154633_outLine +BABEL_OP2_303_49907_20131114_011516_inLine +BABEL_OP2_303_49907_20131114_011516_outLine +BABEL_OP2_303_50427_20131113_234859_inLine +BABEL_OP2_303_50427_20131113_234859_outLine +BABEL_OP2_303_50630_20131130_231747_inLine +BABEL_OP2_303_50630_20131130_231747_outLine +BABEL_OP2_303_50940_20140203_224023_inLine +BABEL_OP2_303_50940_20140203_224023_outLine +BABEL_OP2_303_50958_20131110_200903_inLine +BABEL_OP2_303_50958_20131110_200903_outLine +BABEL_OP2_303_51414_20140118_210505_inLine +BABEL_OP2_303_51414_20140118_210505_outLine +BABEL_OP2_303_51417_20131205_015949_inLine +BABEL_OP2_303_51417_20131205_015949_outLine +BABEL_OP2_303_52058_20140318_223046_inLine +BABEL_OP2_303_52058_20140318_223046_outLine +BABEL_OP2_303_52058_20140318_223719_inLine +BABEL_OP2_303_52058_20140318_223719_outLine +BABEL_OP2_303_52322_20140319_164229_inLine +BABEL_OP2_303_52322_20140319_164229_outLine +BABEL_OP2_303_52818_20131115_053831_inLine +BABEL_OP2_303_52818_20131115_053831_outLine +BABEL_OP2_303_53010_20140403_235230_inLine +BABEL_OP2_303_53010_20140403_235230_outLine +BABEL_OP2_303_53068_20140321_041556_inLine +BABEL_OP2_303_53068_20140321_041556_outLine +BABEL_OP2_303_53206_20140308_201930_inLine +BABEL_OP2_303_53206_20140308_201930_outLine +BABEL_OP2_303_54405_20131113_021212_inLine +BABEL_OP2_303_54405_20131113_021212_outLine +BABEL_OP2_303_54953_20131109_030545_inLine +BABEL_OP2_303_54953_20131109_030545_outLine +BABEL_OP2_303_55013_20140204_205447_inLine +BABEL_OP2_303_55013_20140204_205447_outLine +BABEL_OP2_303_55742_20131114_230121_inLine +BABEL_OP2_303_55742_20131114_230121_outLine +BABEL_OP2_303_56306_20131206_164521_inLine +BABEL_OP2_303_56306_20131206_164521_outLine +BABEL_OP2_303_56326_20140309_213505_inLine +BABEL_OP2_303_56326_20140309_213505_outLine +BABEL_OP2_303_56370_20131030_191610_inLine +BABEL_OP2_303_56370_20131030_191610_outLine +BABEL_OP2_303_56523_20131109_044230_inLine +BABEL_OP2_303_56523_20131109_044230_outLine +BABEL_OP2_303_56743_20131109_043328_inLine +BABEL_OP2_303_56743_20131109_043328_outLine +BABEL_OP2_303_57065_20131204_193037_inLine +BABEL_OP2_303_57065_20131204_193037_outLine +BABEL_OP2_303_57650_20131230_182126_inLine +BABEL_OP2_303_57650_20131230_182126_outLine +BABEL_OP2_303_58717_20131115_231922_inLine +BABEL_OP2_303_58717_20131115_231922_outLine +BABEL_OP2_303_59039_20140219_180738_inLine +BABEL_OP2_303_59039_20140219_180738_outLine +BABEL_OP2_303_59091_20131206_183149_inLine +BABEL_OP2_303_59091_20131206_183149_outLine +BABEL_OP2_303_59163_20140416_164729_inLine +BABEL_OP2_303_59163_20140416_164729_outLine +BABEL_OP2_303_59301_20131205_012957_inLine +BABEL_OP2_303_59301_20131205_012957_outLine +BABEL_OP2_303_59747_20131114_224542_inLine +BABEL_OP2_303_59747_20131114_224542_outLine +BABEL_OP2_303_60352_20131115_205920_inLine +BABEL_OP2_303_60352_20131115_205920_outLine +BABEL_OP2_303_60352_20131115_210809_inLine +BABEL_OP2_303_60352_20131115_210809_outLine +BABEL_OP2_303_60418_20131115_210956_inLine +BABEL_OP2_303_60418_20131115_210956_outLine +BABEL_OP2_303_60508_20131101_185756_inLine +BABEL_OP2_303_60508_20131101_185756_outLine +BABEL_OP2_303_60650_20140319_182240_inLine +BABEL_OP2_303_60650_20140319_182240_outLine +BABEL_OP2_303_60836_20131112_201953_inLine +BABEL_OP2_303_60836_20131112_201953_outLine +BABEL_OP2_303_61219_20131114_181005_inLine +BABEL_OP2_303_61219_20131114_181005_outLine +BABEL_OP2_303_61435_20131123_235604_inLine +BABEL_OP2_303_61435_20131123_235604_outLine +BABEL_OP2_303_61684_20140220_032432_inLine +BABEL_OP2_303_61684_20140220_032432_outLine +BABEL_OP2_303_61873_20131114_011706_inLine +BABEL_OP2_303_61873_20131114_011706_outLine +BABEL_OP2_303_61971_20131228_000329_inLine +BABEL_OP2_303_61971_20131228_000329_outLine +BABEL_OP2_303_62286_20131129_203236_inLine +BABEL_OP2_303_62286_20131129_203236_outLine +BABEL_OP2_303_62362_20140129_183345_inLine +BABEL_OP2_303_62362_20140129_183345_outLine +BABEL_OP2_303_62471_20140328_192801_inLine +BABEL_OP2_303_62471_20140328_192801_outLine +BABEL_OP2_303_62734_20131108_203310_inLine +BABEL_OP2_303_62734_20131108_203310_outLine +BABEL_OP2_303_63445_20131101_180928_inLine +BABEL_OP2_303_63445_20131101_180928_outLine +BABEL_OP2_303_63787_20131029_232219_inLine +BABEL_OP2_303_63787_20131029_232219_outLine +BABEL_OP2_303_63938_20131225_194045_inLine +BABEL_OP2_303_63938_20131225_194045_outLine +BABEL_OP2_303_65298_20140222_213911_inLine +BABEL_OP2_303_65298_20140222_213911_outLine +BABEL_OP2_303_65639_20140320_184458_inLine +BABEL_OP2_303_65639_20140320_184458_outLine +BABEL_OP2_303_65723_20131106_221517_inLine +BABEL_OP2_303_65723_20131106_221517_outLine +BABEL_OP2_303_66305_20131224_012218_inLine +BABEL_OP2_303_66305_20131224_022308_inLine +BABEL_OP2_303_67085_20140223_030002_inLine +BABEL_OP2_303_67085_20140223_030002_outLine +BABEL_OP2_303_67304_20140319_193543_inLine +BABEL_OP2_303_67304_20140319_193543_outLine +BABEL_OP2_303_67794_20131111_173553_inLine +BABEL_OP2_303_67794_20131111_173553_outLine +BABEL_OP2_303_68040_20131116_041049_inLine +BABEL_OP2_303_68040_20131116_041049_outLine +BABEL_OP2_303_68182_20131206_203404_inLine +BABEL_OP2_303_68182_20131206_203404_outLine +BABEL_OP2_303_68402_20140319_235557_inLine +BABEL_OP2_303_68402_20140319_235557_outLine +BABEL_OP2_303_68854_20140125_191013_inLine +BABEL_OP2_303_68854_20140125_191013_outLine +BABEL_OP2_303_69090_20140322_190538_inLine +BABEL_OP2_303_69090_20140322_190538_outLine +BABEL_OP2_303_69964_20140201_215153_inLine +BABEL_OP2_303_69964_20140201_215153_outLine +BABEL_OP2_303_69972_20140412_213250_inLine +BABEL_OP2_303_69972_20140412_213250_outLine +BABEL_OP2_303_70182_20140131_021121_inLine +BABEL_OP2_303_70182_20140131_021121_outLine +BABEL_OP2_303_70216_20140309_212242_inLine +BABEL_OP2_303_70216_20140309_212242_outLine +BABEL_OP2_303_70526_20140121_191817_inLine +BABEL_OP2_303_70526_20140121_191817_outLine +BABEL_OP2_303_71282_20131206_205821_inLine +BABEL_OP2_303_71282_20131206_205821_outLine +BABEL_OP2_303_71333_20131114_201026_inLine +BABEL_OP2_303_71333_20131114_201026_outLine +BABEL_OP2_303_71704_20131107_231553_inLine +BABEL_OP2_303_71704_20131107_231553_outLine +BABEL_OP2_303_71754_20140327_221321_inLine +BABEL_OP2_303_71754_20140327_221321_outLine +BABEL_OP2_303_73258_20131110_190632_inLine +BABEL_OP2_303_73258_20131110_190632_outLine +BABEL_OP2_303_73305_20140219_214719_inLine +BABEL_OP2_303_73305_20140219_214719_outLine +BABEL_OP2_303_73408_20140222_222505_inLine +BABEL_OP2_303_73408_20140222_222505_outLine +BABEL_OP2_303_73837_20131114_035127_inLine +BABEL_OP2_303_73837_20131114_035127_outLine +BABEL_OP2_303_74121_20131109_193228_inLine +BABEL_OP2_303_74121_20131109_193228_outLine +BABEL_OP2_303_75366_20140222_194703_inLine +BABEL_OP2_303_75366_20140222_194703_outLine +BABEL_OP2_303_75460_20140211_182910_inLine +BABEL_OP2_303_75460_20140211_182910_outLine +BABEL_OP2_303_77139_20131105_210350_inLine +BABEL_OP2_303_77139_20131105_210350_outLine +BABEL_OP2_303_79028_20140416_181014_inLine +BABEL_OP2_303_79028_20140416_181014_outLine +BABEL_OP2_303_79080_20131208_203223_inLine +BABEL_OP2_303_79080_20131208_203223_outLine +BABEL_OP2_303_79129_20131114_034645_outLine +BABEL_OP2_303_79723_20140413_221551_inLine +BABEL_OP2_303_79723_20140413_221551_outLine +BABEL_OP2_303_79898_20140309_211140_inLine +BABEL_OP2_303_79898_20140309_211140_outLine +BABEL_OP2_303_80721_20131201_171555_inLine +BABEL_OP2_303_80721_20131201_171555_outLine +BABEL_OP2_303_81427_20131105_001654_inLine +BABEL_OP2_303_81427_20131105_001654_outLine +BABEL_OP2_303_81674_20140202_220306_inLine +BABEL_OP2_303_81674_20140202_220306_outLine +BABEL_OP2_303_82361_20140204_232359_inLine +BABEL_OP2_303_82361_20140204_232359_outLine +BABEL_OP2_303_82626_20140315_024235_inLine +BABEL_OP2_303_82626_20140315_024235_outLine +BABEL_OP2_303_82863_20131111_030006_inLine +BABEL_OP2_303_82863_20131111_030006_outLine +BABEL_OP2_303_82904_20140204_205103_inLine +BABEL_OP2_303_83062_20140204_210837_inLine +BABEL_OP2_303_83062_20140204_210837_outLine +BABEL_OP2_303_83813_20140320_010221_inLine +BABEL_OP2_303_83813_20140320_010221_outLine +BABEL_OP2_303_84125_20131025_195026_inLine +BABEL_OP2_303_84125_20131025_195026_outLine +BABEL_OP2_303_84339_20140111_180841_inLine +BABEL_OP2_303_84339_20140111_180841_outLine +BABEL_OP2_303_84815_20131204_190755_inLine +BABEL_OP2_303_84815_20131204_190755_outLine +BABEL_OP2_303_85048_20131114_222244_inLine +BABEL_OP2_303_85048_20131114_222244_outLine +BABEL_OP2_303_85260_20140327_224114_inLine +BABEL_OP2_303_85260_20140327_224114_outLine +BABEL_OP2_303_86715_20140312_223757_inLine +BABEL_OP2_303_86715_20140312_223757_outLine +BABEL_OP2_303_86748_20131206_231713_inLine +BABEL_OP2_303_86748_20131206_231713_outLine +BABEL_OP2_303_86830_20131211_192459_inLine +BABEL_OP2_303_86830_20131211_192459_outLine +BABEL_OP2_303_86860_20140204_194637_inLine +BABEL_OP2_303_86860_20140204_194637_outLine +BABEL_OP2_303_88686_20131028_192526_inLine +BABEL_OP2_303_88686_20131028_192526_outLine +BABEL_OP2_303_88812_20140125_200044_inLine +BABEL_OP2_303_88812_20140125_200044_outLine +BABEL_OP2_303_88873_20131112_214623_inLine +BABEL_OP2_303_88873_20131112_214623_outLine +BABEL_OP2_303_89045_20131024_213611_inLine +BABEL_OP2_303_89045_20131024_213611_outLine +BABEL_OP2_303_89372_20131025_175446_inLine +BABEL_OP2_303_89372_20131025_175446_outLine +BABEL_OP2_303_89457_20131113_185151_inLine +BABEL_OP2_303_89457_20131113_185151_outLine +BABEL_OP2_303_89575_20131129_162850_inLine +BABEL_OP2_303_89575_20131129_162850_outLine +BABEL_OP2_303_89650_20140414_003815_inLine +BABEL_OP2_303_89650_20140414_003815_outLine +BABEL_OP2_303_90417_20140206_194432_inLine +BABEL_OP2_303_90417_20140206_194432_outLine +BABEL_OP2_303_91189_20140214_204530_inLine +BABEL_OP2_303_91189_20140214_204530_outLine +BABEL_OP2_303_91336_20131114_155601_inLine +BABEL_OP2_303_91336_20131114_155601_outLine +BABEL_OP2_303_91411_20140318_210954_inLine +BABEL_OP2_303_91411_20140318_210954_outLine +BABEL_OP2_303_91463_20131115_000512_inLine +BABEL_OP2_303_91463_20131115_000512_outLine +BABEL_OP2_303_91581_20131202_041422_inLine +BABEL_OP2_303_91581_20131202_041422_outLine +BABEL_OP2_303_91593_20140209_040916_inLine +BABEL_OP2_303_91593_20140209_040916_outLine +BABEL_OP2_303_91606_20140325_030918_inLine +BABEL_OP2_303_91606_20140325_030918_outLine +BABEL_OP2_303_91760_20140129_023507_inLine +BABEL_OP2_303_91760_20140129_023507_outLine +BABEL_OP2_303_91808_20140324_180442_inLine +BABEL_OP2_303_91808_20140324_180442_outLine +BABEL_OP2_303_91884_20131224_170738_inLine +BABEL_OP2_303_91884_20131224_170738_outLine +BABEL_OP2_303_91971_20140401_140304_inLine +BABEL_OP2_303_91971_20140401_140304_outLine +BABEL_OP2_303_92281_20140225_212826_inLine +BABEL_OP2_303_92281_20140225_212826_outLine +BABEL_OP2_303_93007_20140325_033131_inLine +BABEL_OP2_303_93007_20140325_033131_outLine +BABEL_OP2_303_93443_20140320_235342_inLine +BABEL_OP2_303_93443_20140320_235342_outLine +BABEL_OP2_303_93681_20140322_200153_inLine +BABEL_OP2_303_93681_20140322_200153_outLine +BABEL_OP2_303_93861_20131114_005221_inLine +BABEL_OP2_303_93861_20131114_005221_outLine +BABEL_OP2_303_93861_20131114_011200_inLine +BABEL_OP2_303_93861_20131114_011200_outLine +BABEL_OP2_303_93937_20140312_225604_inLine +BABEL_OP2_303_93937_20140312_225604_outLine +BABEL_OP2_303_93946_20131204_180611_inLine +BABEL_OP2_303_93946_20131204_180611_outLine +BABEL_OP2_303_94002_20131113_163221_inLine +BABEL_OP2_303_94002_20131113_163221_outLine +BABEL_OP2_303_94035_20140320_015111_inLine +BABEL_OP2_303_94035_20140320_015111_outLine +BABEL_OP2_303_94044_20140221_020012_inLine +BABEL_OP2_303_94044_20140221_020012_outLine +BABEL_OP2_303_94212_20140328_202919_inLine +BABEL_OP2_303_94212_20140328_202919_outLine +BABEL_OP2_303_94713_20140319_231311_inLine +BABEL_OP2_303_94713_20140319_231311_outLine +BABEL_OP2_303_95028_20140206_001106_inLine +BABEL_OP2_303_95028_20140206_001106_outLine +BABEL_OP2_303_95028_20140320_001627_inLine +BABEL_OP2_303_95028_20140320_001627_outLine +BABEL_OP2_303_95467_20140218_202005_inLine +BABEL_OP2_303_95467_20140218_202005_outLine +BABEL_OP2_303_95490_20131101_164715_inLine +BABEL_OP2_303_95490_20131101_164715_outLine +BABEL_OP2_303_95663_20131116_161029_inLine +BABEL_OP2_303_95663_20131116_161029_outLine +BABEL_OP2_303_95935_20131204_145738_inLine +BABEL_OP2_303_95935_20131204_145738_outLine +BABEL_OP2_303_96088_20140127_224534_inLine +BABEL_OP2_303_96088_20140127_224534_outLine +BABEL_OP2_303_97097_20140122_030319_inLine +BABEL_OP2_303_97097_20140122_030319_outLine +BABEL_OP2_303_97264_20131205_020902_inLine +BABEL_OP2_303_97264_20131205_020902_outLine +BABEL_OP2_303_97588_20131027_011205_inLine +BABEL_OP2_303_97588_20131027_011205_outLine +BABEL_OP2_303_98255_20140322_200157_inLine +BABEL_OP2_303_98255_20140322_200157_outLine +BABEL_OP2_303_98580_20131112_204407_inLine +BABEL_OP2_303_98580_20131112_204407_outLine +BABEL_OP2_303_99813_20131115_032632_inLine +BABEL_OP2_303_99813_20131115_032632_outLine +BABEL_OP2_303_99883_20140326_192513_inLine +BABEL_OP2_303_99883_20140326_192513_outLine +BABEL_OP2_303_99952_20140212_000327_inLine +BABEL_OP2_303_99952_20140212_000327_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list b/egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list new file mode 100644 index 00000000000..37f27ef3750 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list @@ -0,0 +1,122 @@ +BABEL_OP2_304_13040_20131130_214521_inLine +BABEL_OP2_304_13040_20131130_214521_outLine +BABEL_OP2_304_14158_20140115_023605_inLine +BABEL_OP2_304_14158_20140115_023605_outLine +BABEL_OP2_304_14575_20131024_232334_inLine +BABEL_OP2_304_14575_20131024_232334_outLine +BABEL_OP2_304_14575_20131024_235230_inLine +BABEL_OP2_304_14575_20131024_235230_outLine +BABEL_OP2_304_15163_20140108_001236_inLine +BABEL_OP2_304_15163_20140108_001236_outLine +BABEL_OP2_304_15262_20131210_004932_inLine +BABEL_OP2_304_15262_20131210_004932_outLine +BABEL_OP2_304_16056_20140123_070422_inLine +BABEL_OP2_304_16056_20140123_070422_outLine +BABEL_OP2_304_16787_20131206_025653_inLine +BABEL_OP2_304_16787_20131206_025653_outLine +BABEL_OP2_304_17511_20131126_055458_inLine +BABEL_OP2_304_17511_20131126_055458_outLine +BABEL_OP2_304_17573_20140203_230300_inLine +BABEL_OP2_304_17573_20140203_230300_outLine +BABEL_OP2_304_17914_20140228_184910_inLine +BABEL_OP2_304_17914_20140228_184910_outLine +BABEL_OP2_304_21581_20131216_220706_inLine +BABEL_OP2_304_21581_20131216_220706_outLine +BABEL_OP2_304_22021_20131023_221926_inLine +BABEL_OP2_304_22021_20131023_221926_outLine +BABEL_OP2_304_22288_20131112_035653_inLine +BABEL_OP2_304_22288_20131112_035653_outLine +BABEL_OP2_304_26206_20140120_022753_inLine +BABEL_OP2_304_26206_20140120_022753_outLine +BABEL_OP2_304_29777_20140217_064220_inLine +BABEL_OP2_304_29777_20140217_064220_outLine +BABEL_OP2_304_31500_20131109_033149_inLine +BABEL_OP2_304_31500_20131109_033149_outLine +BABEL_OP2_304_31979_20140109_015624_inLine +BABEL_OP2_304_31979_20140109_015624_outLine +BABEL_OP2_304_32959_20140210_005641_inLine +BABEL_OP2_304_32959_20140210_005641_outLine +BABEL_OP2_304_33800_20131023_012145_inLine +BABEL_OP2_304_33800_20131023_012145_outLine +BABEL_OP2_304_34208_20131031_044912_inLine +BABEL_OP2_304_34208_20131031_044912_outLine +BABEL_OP2_304_35069_20140304_002856_inLine +BABEL_OP2_304_35069_20140304_002856_outLine +BABEL_OP2_304_35202_20140111_000728_inLine +BABEL_OP2_304_35202_20140111_000728_outLine +BABEL_OP2_304_37064_20131129_035959_inLine +BABEL_OP2_304_37064_20131129_035959_outLine +BABEL_OP2_304_37068_20131023_011604_inLine +BABEL_OP2_304_37068_20131023_011604_outLine +BABEL_OP2_304_39927_20131021_221542_inLine +BABEL_OP2_304_39927_20131021_221542_outLine +BABEL_OP2_304_40330_20131109_021648_inLine +BABEL_OP2_304_40330_20131109_021648_outLine +BABEL_OP2_304_42877_20131022_230033_inLine +BABEL_OP2_304_42877_20131022_230033_outLine +BABEL_OP2_304_44420_20131214_233135_inLine +BABEL_OP2_304_44420_20131214_233135_outLine +BABEL_OP2_304_46702_20131115_213311_inLine +BABEL_OP2_304_46702_20131115_213311_outLine +BABEL_OP2_304_46712_20131209_044650_inLine +BABEL_OP2_304_46712_20131209_044650_outLine +BABEL_OP2_304_46974_20140220_023915_inLine +BABEL_OP2_304_46974_20140220_023915_outLine +BABEL_OP2_304_54735_20131112_025013_inLine +BABEL_OP2_304_54735_20131112_025013_outLine +BABEL_OP2_304_63265_20131108_044545_inLine +BABEL_OP2_304_63265_20131108_044545_outLine +BABEL_OP2_304_63307_20140121_215145_inLine +BABEL_OP2_304_63307_20140121_215145_outLine +BABEL_OP2_304_63938_20140303_232624_inLine +BABEL_OP2_304_63938_20140303_232624_outLine +BABEL_OP2_304_64494_20131212_025147_inLine +BABEL_OP2_304_64494_20131212_025147_outLine +BABEL_OP2_304_67671_20131106_030834_inLine +BABEL_OP2_304_67671_20131106_030834_outLine +BABEL_OP2_304_70110_20131118_222225_inLine +BABEL_OP2_304_70110_20131118_222225_outLine +BABEL_OP2_304_70282_20140114_194359_inLine +BABEL_OP2_304_70282_20140114_194359_outLine +BABEL_OP2_304_71704_20131215_005510_inLine +BABEL_OP2_304_71704_20131215_005510_outLine +BABEL_OP2_304_73622_20131216_061333_inLine +BABEL_OP2_304_73622_20131216_061333_outLine +BABEL_OP2_304_76837_20131020_200525_inLine +BABEL_OP2_304_76837_20131020_200525_outLine +BABEL_OP2_304_78877_20131023_202733_inLine +BABEL_OP2_304_78877_20131023_202733_outLine +BABEL_OP2_304_84079_20131112_195009_inLine +BABEL_OP2_304_84079_20131112_195009_outLine +BABEL_OP2_304_86878_20131129_043842_inLine +BABEL_OP2_304_86878_20131129_043842_outLine +BABEL_OP2_304_87629_20140121_223247_inLine +BABEL_OP2_304_87629_20140121_223247_outLine +BABEL_OP2_304_87693_20131214_012505_inLine +BABEL_OP2_304_87693_20131214_012505_outLine +BABEL_OP2_304_88394_20131030_012001_inLine +BABEL_OP2_304_88394_20131030_012001_outLine +BABEL_OP2_304_88873_20131215_052029_inLine +BABEL_OP2_304_88873_20131215_052029_outLine +BABEL_OP2_304_89457_20140107_011232_inLine +BABEL_OP2_304_89457_20140107_011232_outLine +BABEL_OP2_304_91411_20140214_045051_inLine +BABEL_OP2_304_91411_20140214_045051_outLine +BABEL_OP2_304_94002_20140106_061517_inLine +BABEL_OP2_304_94002_20140106_061517_outLine +BABEL_OP2_304_94035_20131028_044307_inLine +BABEL_OP2_304_94035_20131028_044307_outLine +BABEL_OP2_304_94166_20140222_223654_inLine +BABEL_OP2_304_94166_20140222_223654_outLine +BABEL_OP2_304_94587_20140203_223943_inLine +BABEL_OP2_304_94587_20140203_223943_outLine +BABEL_OP2_304_95966_20140116_013030_inLine +BABEL_OP2_304_95966_20140116_013030_outLine +BABEL_OP2_304_96041_20131110_011619_inLine +BABEL_OP2_304_96041_20131110_011619_outLine +BABEL_OP2_304_96934_20131207_231603_inLine +BABEL_OP2_304_96934_20131207_231603_outLine +BABEL_OP2_304_97604_20140221_172005_inLine +BABEL_OP2_304_97604_20140221_172005_outLine +BABEL_OP2_304_99732_20140213_211724_inLine +BABEL_OP2_304_99732_20140213_211724_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/dev.list b/egs/babel/s5d/conf/lists/304-lithuanian/dev.list new file mode 100644 index 00000000000..37f27ef3750 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/dev.list @@ -0,0 +1,122 @@ +BABEL_OP2_304_13040_20131130_214521_inLine +BABEL_OP2_304_13040_20131130_214521_outLine +BABEL_OP2_304_14158_20140115_023605_inLine +BABEL_OP2_304_14158_20140115_023605_outLine +BABEL_OP2_304_14575_20131024_232334_inLine +BABEL_OP2_304_14575_20131024_232334_outLine +BABEL_OP2_304_14575_20131024_235230_inLine +BABEL_OP2_304_14575_20131024_235230_outLine +BABEL_OP2_304_15163_20140108_001236_inLine +BABEL_OP2_304_15163_20140108_001236_outLine +BABEL_OP2_304_15262_20131210_004932_inLine +BABEL_OP2_304_15262_20131210_004932_outLine +BABEL_OP2_304_16056_20140123_070422_inLine +BABEL_OP2_304_16056_20140123_070422_outLine +BABEL_OP2_304_16787_20131206_025653_inLine +BABEL_OP2_304_16787_20131206_025653_outLine +BABEL_OP2_304_17511_20131126_055458_inLine +BABEL_OP2_304_17511_20131126_055458_outLine +BABEL_OP2_304_17573_20140203_230300_inLine +BABEL_OP2_304_17573_20140203_230300_outLine +BABEL_OP2_304_17914_20140228_184910_inLine +BABEL_OP2_304_17914_20140228_184910_outLine +BABEL_OP2_304_21581_20131216_220706_inLine +BABEL_OP2_304_21581_20131216_220706_outLine +BABEL_OP2_304_22021_20131023_221926_inLine +BABEL_OP2_304_22021_20131023_221926_outLine +BABEL_OP2_304_22288_20131112_035653_inLine +BABEL_OP2_304_22288_20131112_035653_outLine +BABEL_OP2_304_26206_20140120_022753_inLine +BABEL_OP2_304_26206_20140120_022753_outLine +BABEL_OP2_304_29777_20140217_064220_inLine +BABEL_OP2_304_29777_20140217_064220_outLine +BABEL_OP2_304_31500_20131109_033149_inLine +BABEL_OP2_304_31500_20131109_033149_outLine +BABEL_OP2_304_31979_20140109_015624_inLine +BABEL_OP2_304_31979_20140109_015624_outLine +BABEL_OP2_304_32959_20140210_005641_inLine +BABEL_OP2_304_32959_20140210_005641_outLine +BABEL_OP2_304_33800_20131023_012145_inLine +BABEL_OP2_304_33800_20131023_012145_outLine +BABEL_OP2_304_34208_20131031_044912_inLine +BABEL_OP2_304_34208_20131031_044912_outLine +BABEL_OP2_304_35069_20140304_002856_inLine +BABEL_OP2_304_35069_20140304_002856_outLine +BABEL_OP2_304_35202_20140111_000728_inLine +BABEL_OP2_304_35202_20140111_000728_outLine +BABEL_OP2_304_37064_20131129_035959_inLine +BABEL_OP2_304_37064_20131129_035959_outLine +BABEL_OP2_304_37068_20131023_011604_inLine +BABEL_OP2_304_37068_20131023_011604_outLine +BABEL_OP2_304_39927_20131021_221542_inLine +BABEL_OP2_304_39927_20131021_221542_outLine +BABEL_OP2_304_40330_20131109_021648_inLine +BABEL_OP2_304_40330_20131109_021648_outLine +BABEL_OP2_304_42877_20131022_230033_inLine +BABEL_OP2_304_42877_20131022_230033_outLine +BABEL_OP2_304_44420_20131214_233135_inLine +BABEL_OP2_304_44420_20131214_233135_outLine +BABEL_OP2_304_46702_20131115_213311_inLine +BABEL_OP2_304_46702_20131115_213311_outLine +BABEL_OP2_304_46712_20131209_044650_inLine +BABEL_OP2_304_46712_20131209_044650_outLine +BABEL_OP2_304_46974_20140220_023915_inLine +BABEL_OP2_304_46974_20140220_023915_outLine +BABEL_OP2_304_54735_20131112_025013_inLine +BABEL_OP2_304_54735_20131112_025013_outLine +BABEL_OP2_304_63265_20131108_044545_inLine +BABEL_OP2_304_63265_20131108_044545_outLine +BABEL_OP2_304_63307_20140121_215145_inLine +BABEL_OP2_304_63307_20140121_215145_outLine +BABEL_OP2_304_63938_20140303_232624_inLine +BABEL_OP2_304_63938_20140303_232624_outLine +BABEL_OP2_304_64494_20131212_025147_inLine +BABEL_OP2_304_64494_20131212_025147_outLine +BABEL_OP2_304_67671_20131106_030834_inLine +BABEL_OP2_304_67671_20131106_030834_outLine +BABEL_OP2_304_70110_20131118_222225_inLine +BABEL_OP2_304_70110_20131118_222225_outLine +BABEL_OP2_304_70282_20140114_194359_inLine +BABEL_OP2_304_70282_20140114_194359_outLine +BABEL_OP2_304_71704_20131215_005510_inLine +BABEL_OP2_304_71704_20131215_005510_outLine +BABEL_OP2_304_73622_20131216_061333_inLine +BABEL_OP2_304_73622_20131216_061333_outLine +BABEL_OP2_304_76837_20131020_200525_inLine +BABEL_OP2_304_76837_20131020_200525_outLine +BABEL_OP2_304_78877_20131023_202733_inLine +BABEL_OP2_304_78877_20131023_202733_outLine +BABEL_OP2_304_84079_20131112_195009_inLine +BABEL_OP2_304_84079_20131112_195009_outLine +BABEL_OP2_304_86878_20131129_043842_inLine +BABEL_OP2_304_86878_20131129_043842_outLine +BABEL_OP2_304_87629_20140121_223247_inLine +BABEL_OP2_304_87629_20140121_223247_outLine +BABEL_OP2_304_87693_20131214_012505_inLine +BABEL_OP2_304_87693_20131214_012505_outLine +BABEL_OP2_304_88394_20131030_012001_inLine +BABEL_OP2_304_88394_20131030_012001_outLine +BABEL_OP2_304_88873_20131215_052029_inLine +BABEL_OP2_304_88873_20131215_052029_outLine +BABEL_OP2_304_89457_20140107_011232_inLine +BABEL_OP2_304_89457_20140107_011232_outLine +BABEL_OP2_304_91411_20140214_045051_inLine +BABEL_OP2_304_91411_20140214_045051_outLine +BABEL_OP2_304_94002_20140106_061517_inLine +BABEL_OP2_304_94002_20140106_061517_outLine +BABEL_OP2_304_94035_20131028_044307_inLine +BABEL_OP2_304_94035_20131028_044307_outLine +BABEL_OP2_304_94166_20140222_223654_inLine +BABEL_OP2_304_94166_20140222_223654_outLine +BABEL_OP2_304_94587_20140203_223943_inLine +BABEL_OP2_304_94587_20140203_223943_outLine +BABEL_OP2_304_95966_20140116_013030_inLine +BABEL_OP2_304_95966_20140116_013030_outLine +BABEL_OP2_304_96041_20131110_011619_inLine +BABEL_OP2_304_96041_20131110_011619_outLine +BABEL_OP2_304_96934_20131207_231603_inLine +BABEL_OP2_304_96934_20131207_231603_outLine +BABEL_OP2_304_97604_20140221_172005_inLine +BABEL_OP2_304_97604_20140221_172005_outLine +BABEL_OP2_304_99732_20140213_211724_inLine +BABEL_OP2_304_99732_20140213_211724_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/eval.list b/egs/babel/s5d/conf/lists/304-lithuanian/eval.list new file mode 100644 index 00000000000..506241eadc5 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/eval.list @@ -0,0 +1,192 @@ +BABEL_OP2_304_10416_20140107_061620_inLine +BABEL_OP2_304_10416_20140107_061620_outLine +BABEL_OP2_304_14723_20131125_042706_inLine +BABEL_OP2_304_14723_20131125_042706_outLine +BABEL_OP2_304_16351_20131027_201533_inLine +BABEL_OP2_304_16351_20131027_201533_outLine +BABEL_OP2_304_16802_20131108_055143_inLine +BABEL_OP2_304_16802_20131108_055143_outLine +BABEL_OP2_304_18863_20140222_035802_inLine +BABEL_OP2_304_18863_20140222_035802_outLine +BABEL_OP2_304_20724_20131109_014600_inLine +BABEL_OP2_304_20724_20131109_014600_outLine +BABEL_OP2_304_22641_20131201_215149_inLine +BABEL_OP2_304_22641_20131201_215149_outLine +BABEL_OP2_304_23355_20131126_211038_inLine +BABEL_OP2_304_23355_20131126_211038_outLine +BABEL_OP2_304_23395_20140214_042808_inLine +BABEL_OP2_304_23395_20140214_042808_outLine +BABEL_OP2_304_23628_20131208_203311_inLine +BABEL_OP2_304_23628_20131208_203311_outLine +BABEL_OP2_304_23700_20131025_204511_inLine +BABEL_OP2_304_23700_20131025_204511_outLine +BABEL_OP2_304_23731_20140111_003449_inLine +BABEL_OP2_304_23731_20140111_003449_outLine +BABEL_OP2_304_24033_20140304_045137_inLine +BABEL_OP2_304_24033_20140304_045137_outLine +BABEL_OP2_304_24209_20131022_193019_inLine +BABEL_OP2_304_24209_20131022_193019_outLine +BABEL_OP2_304_24209_20131022_193936_inLine +BABEL_OP2_304_24209_20131022_193936_outLine +BABEL_OP2_304_25068_20131019_030524_inLine +BABEL_OP2_304_25068_20131019_030524_outLine +BABEL_OP2_304_26869_20131031_215636_inLine +BABEL_OP2_304_26869_20131031_215636_outLine +BABEL_OP2_304_28422_20140112_043550_inLine +BABEL_OP2_304_28422_20140112_043550_outLine +BABEL_OP2_304_28538_20140106_011449_inLine +BABEL_OP2_304_28538_20140106_011449_outLine +BABEL_OP2_304_28585_20140225_043733_inLine +BABEL_OP2_304_28585_20140225_043733_outLine +BABEL_OP2_304_30250_20140120_020901_inLine +BABEL_OP2_304_30250_20140120_020901_outLine +BABEL_OP2_304_36219_20131216_035438_inLine +BABEL_OP2_304_36219_20131216_035438_outLine +BABEL_OP2_304_36632_20131024_201211_inLine +BABEL_OP2_304_36632_20131024_201211_outLine +BABEL_OP2_304_39159_20131208_045854_inLine +BABEL_OP2_304_39159_20131208_045854_outLine +BABEL_OP2_304_39277_20131020_204845_inLine +BABEL_OP2_304_39277_20131020_204845_outLine +BABEL_OP2_304_41109_20140220_021208_inLine +BABEL_OP2_304_41109_20140220_021208_outLine +BABEL_OP2_304_43285_20140124_012117_inLine +BABEL_OP2_304_43285_20140124_012117_outLine +BABEL_OP2_304_44255_20140222_010712_inLine +BABEL_OP2_304_44255_20140222_010712_outLine +BABEL_OP2_304_44681_20131023_205447_inLine +BABEL_OP2_304_44681_20131023_205447_outLine +BABEL_OP2_304_45106_20140117_233013_inLine +BABEL_OP2_304_45106_20140117_233013_outLine +BABEL_OP2_304_45699_20131022_213702_inLine +BABEL_OP2_304_45699_20131022_213702_outLine +BABEL_OP2_304_46905_20131025_213636_inLine +BABEL_OP2_304_46905_20131025_213636_outLine +BABEL_OP2_304_47882_20131027_194825_inLine +BABEL_OP2_304_47882_20131027_194825_outLine +BABEL_OP2_304_48200_20140221_015225_inLine +BABEL_OP2_304_48200_20140221_015225_outLine +BABEL_OP2_304_49641_20131112_211903_inLine +BABEL_OP2_304_49641_20131112_211903_outLine +BABEL_OP2_304_49775_20131114_210107_inLine +BABEL_OP2_304_49775_20131114_210107_outLine +BABEL_OP2_304_50962_20131206_052346_inLine +BABEL_OP2_304_50962_20131206_052346_outLine +BABEL_OP2_304_53206_20131021_231814_inLine +BABEL_OP2_304_53206_20131021_231814_outLine +BABEL_OP2_304_53441_20131026_001731_inLine +BABEL_OP2_304_53441_20131026_001731_outLine +BABEL_OP2_304_53758_20131110_023501_inLine +BABEL_OP2_304_53758_20131110_023501_outLine +BABEL_OP2_304_54040_20140207_031046_inLine +BABEL_OP2_304_54040_20140207_031046_outLine +BABEL_OP2_304_55742_20131210_035616_inLine +BABEL_OP2_304_55742_20131210_035616_outLine +BABEL_OP2_304_57650_20140228_212617_inLine +BABEL_OP2_304_57650_20140228_212617_outLine +BABEL_OP2_304_57654_20131129_021919_inLine +BABEL_OP2_304_57654_20131129_021919_outLine +BABEL_OP2_304_57922_20140212_234031_inLine +BABEL_OP2_304_57922_20140212_234031_outLine +BABEL_OP2_304_60508_20131213_013224_inLine +BABEL_OP2_304_60508_20131213_013224_outLine +BABEL_OP2_304_62434_20131204_015115_inLine +BABEL_OP2_304_62434_20131204_015115_outLine +BABEL_OP2_304_63481_20131218_054343_inLine +BABEL_OP2_304_63481_20131218_054343_outLine +BABEL_OP2_304_63484_20131108_002450_inLine +BABEL_OP2_304_63484_20131108_002450_outLine +BABEL_OP2_304_65339_20131108_025612_inLine +BABEL_OP2_304_65339_20131108_025612_outLine +BABEL_OP2_304_66967_20131211_212833_inLine +BABEL_OP2_304_66967_20131211_212833_outLine +BABEL_OP2_304_67373_20131213_035431_inLine +BABEL_OP2_304_67373_20131213_035431_outLine +BABEL_OP2_304_67726_20131021_224218_inLine +BABEL_OP2_304_67726_20131021_224218_outLine +BABEL_OP2_304_67794_20131211_225335_inLine +BABEL_OP2_304_67794_20131211_225335_outLine +BABEL_OP2_304_68823_20131020_204717_inLine +BABEL_OP2_304_68823_20131020_204717_outLine +BABEL_OP2_304_69090_20131028_014204_inLine +BABEL_OP2_304_69090_20131028_014204_outLine +BABEL_OP2_304_69574_20131114_192607_inLine +BABEL_OP2_304_69574_20131114_192607_outLine +BABEL_OP2_304_70726_20131024_044755_inLine +BABEL_OP2_304_70726_20131024_044755_outLine +BABEL_OP2_304_71278_20131021_222320_inLine +BABEL_OP2_304_71278_20131021_222320_outLine +BABEL_OP2_304_73837_20131203_050134_inLine +BABEL_OP2_304_73837_20131203_050134_outLine +BABEL_OP2_304_74111_20140214_221515_inLine +BABEL_OP2_304_74111_20140214_221515_outLine +BABEL_OP2_304_74280_20131114_221312_inLine +BABEL_OP2_304_74280_20131114_221312_outLine +BABEL_OP2_304_75465_20140214_020356_inLine +BABEL_OP2_304_75465_20140214_020356_outLine +BABEL_OP2_304_76773_20131201_022925_inLine +BABEL_OP2_304_76773_20131201_022925_outLine +BABEL_OP2_304_77904_20131023_031446_inLine +BABEL_OP2_304_77904_20131023_031446_outLine +BABEL_OP2_304_77990_20131201_021431_inLine +BABEL_OP2_304_77990_20131201_021431_outLine +BABEL_OP2_304_78609_20140215_083334_inLine +BABEL_OP2_304_78609_20140215_083334_outLine +BABEL_OP2_304_78630_20131216_203357_inLine +BABEL_OP2_304_78630_20131216_203357_outLine +BABEL_OP2_304_78958_20131106_193325_inLine +BABEL_OP2_304_78958_20131106_193325_outLine +BABEL_OP2_304_78976_20131207_040932_inLine +BABEL_OP2_304_78976_20131207_040932_outLine +BABEL_OP2_304_80241_20131031_000650_inLine +BABEL_OP2_304_80241_20131031_000650_outLine +BABEL_OP2_304_83366_20140114_021841_inLine +BABEL_OP2_304_83366_20140114_021841_outLine +BABEL_OP2_304_83643_20131112_015611_inLine +BABEL_OP2_304_83643_20131112_015611_outLine +BABEL_OP2_304_83775_20140106_012027_inLine +BABEL_OP2_304_83775_20140106_012027_outLine +BABEL_OP2_304_83783_20140123_015127_inLine +BABEL_OP2_304_83783_20140123_015127_outLine +BABEL_OP2_304_84029_20131107_051843_inLine +BABEL_OP2_304_84029_20131107_051843_outLine +BABEL_OP2_304_85260_20131024_194755_inLine +BABEL_OP2_304_85260_20131024_194755_outLine +BABEL_OP2_304_86885_20131024_233222_inLine +BABEL_OP2_304_86885_20131024_233222_outLine +BABEL_OP2_304_89045_20131115_232122_inLine +BABEL_OP2_304_89045_20131115_232122_outLine +BABEL_OP2_304_89226_20131024_203728_inLine +BABEL_OP2_304_89226_20131024_203728_outLine +BABEL_OP2_304_89372_20131115_002102_inLine +BABEL_OP2_304_89372_20131115_002102_outLine +BABEL_OP2_304_90930_20131020_000019_inLine +BABEL_OP2_304_90930_20131020_000019_outLine +BABEL_OP2_304_90935_20131204_230914_inLine +BABEL_OP2_304_90935_20131204_230914_outLine +BABEL_OP2_304_91971_20131023_230515_inLine +BABEL_OP2_304_91971_20131023_230515_outLine +BABEL_OP2_304_92509_20131210_214423_inLine +BABEL_OP2_304_92509_20131210_214423_outLine +BABEL_OP2_304_92698_20140118_013836_inLine +BABEL_OP2_304_92698_20140118_013836_outLine +BABEL_OP2_304_93946_20140213_192924_inLine +BABEL_OP2_304_93946_20140213_192924_outLine +BABEL_OP2_304_94869_20131114_004423_inLine +BABEL_OP2_304_94869_20131114_004423_outLine +BABEL_OP2_304_95077_20140213_032447_inLine +BABEL_OP2_304_95077_20140213_032447_outLine +BABEL_OP2_304_96504_20131215_211136_inLine +BABEL_OP2_304_96504_20131215_211136_outLine +BABEL_OP2_304_96504_20131215_212158_inLine +BABEL_OP2_304_96504_20131215_212158_outLine +BABEL_OP2_304_97448_20131109_203008_inLine +BABEL_OP2_304_97448_20131109_203008_outLine +BABEL_OP2_304_97570_20140114_012633_inLine +BABEL_OP2_304_97570_20140114_012633_outLine +BABEL_OP2_304_97772_20131115_013811_inLine +BABEL_OP2_304_97772_20131115_013811_outLine +BABEL_OP2_304_98255_20131126_040940_inLine +BABEL_OP2_304_98255_20131126_040940_outLine +BABEL_OP2_304_98888_20140116_000206_inLine +BABEL_OP2_304_98888_20140116_000206_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list b/egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list new file mode 100644 index 00000000000..bf4691f0f34 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list @@ -0,0 +1,60 @@ +BABEL_OP2_304_10416_20140107_061620_inLine +BABEL_OP2_304_10416_20140107_061620_outLine +BABEL_OP2_304_14723_20131125_042706_inLine +BABEL_OP2_304_14723_20131125_042706_outLine +BABEL_OP2_304_16351_20131027_201533_inLine +BABEL_OP2_304_16351_20131027_201533_outLine +BABEL_OP2_304_18863_20140222_035802_inLine +BABEL_OP2_304_18863_20140222_035802_outLine +BABEL_OP2_304_22641_20131201_215149_inLine +BABEL_OP2_304_22641_20131201_215149_outLine +BABEL_OP2_304_25068_20131019_030524_inLine +BABEL_OP2_304_25068_20131019_030524_outLine +BABEL_OP2_304_28422_20140112_043550_inLine +BABEL_OP2_304_28422_20140112_043550_outLine +BABEL_OP2_304_28585_20140225_043733_inLine +BABEL_OP2_304_28585_20140225_043733_outLine +BABEL_OP2_304_30250_20140120_020901_inLine +BABEL_OP2_304_30250_20140120_020901_outLine +BABEL_OP2_304_36219_20131216_035438_inLine +BABEL_OP2_304_36219_20131216_035438_outLine +BABEL_OP2_304_39159_20131208_045854_inLine +BABEL_OP2_304_39159_20131208_045854_outLine +BABEL_OP2_304_41109_20140220_021208_inLine +BABEL_OP2_304_41109_20140220_021208_outLine +BABEL_OP2_304_43285_20140124_012117_inLine +BABEL_OP2_304_43285_20140124_012117_outLine +BABEL_OP2_304_44255_20140222_010712_inLine +BABEL_OP2_304_44255_20140222_010712_outLine +BABEL_OP2_304_44681_20131023_205447_inLine +BABEL_OP2_304_44681_20131023_205447_outLine +BABEL_OP2_304_45106_20140117_233013_inLine +BABEL_OP2_304_45106_20140117_233013_outLine +BABEL_OP2_304_45699_20131022_213702_inLine +BABEL_OP2_304_45699_20131022_213702_outLine +BABEL_OP2_304_53206_20131021_231814_inLine +BABEL_OP2_304_53206_20131021_231814_outLine +BABEL_OP2_304_57922_20140212_234031_inLine +BABEL_OP2_304_57922_20140212_234031_outLine +BABEL_OP2_304_60508_20131213_013224_inLine +BABEL_OP2_304_60508_20131213_013224_outLine +BABEL_OP2_304_63481_20131218_054343_inLine +BABEL_OP2_304_63481_20131218_054343_outLine +BABEL_OP2_304_65339_20131108_025612_inLine +BABEL_OP2_304_65339_20131108_025612_outLine +BABEL_OP2_304_66967_20131211_212833_inLine +BABEL_OP2_304_66967_20131211_212833_outLine +BABEL_OP2_304_70726_20131024_044755_inLine +BABEL_OP2_304_70726_20131024_044755_outLine +BABEL_OP2_304_78609_20140215_083334_inLine +BABEL_OP2_304_78609_20140215_083334_outLine +BABEL_OP2_304_83366_20140114_021841_inLine +BABEL_OP2_304_83366_20140114_021841_outLine +BABEL_OP2_304_83775_20140106_012027_inLine +BABEL_OP2_304_83775_20140106_012027_outLine +BABEL_OP2_304_85260_20131024_194755_inLine +BABEL_OP2_304_85260_20131024_194755_outLine +BABEL_OP2_304_97448_20131109_203008_inLine +BABEL_OP2_304_97448_20131109_203008_outLine +BABEL_OP2_304_98888_20140116_000206_inLine +BABEL_OP2_304_98888_20140116_000206_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list new file mode 100644 index 00000000000..858a278660f --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list @@ -0,0 +1,120 @@ +BABEL_OP2_304_10019_20131215_000700_inLine +BABEL_OP2_304_10019_20131215_000700_outLine +BABEL_OP2_304_11768_20131025_195124_inLine +BABEL_OP2_304_11768_20131025_195124_outLine +BABEL_OP2_304_13929_20131020_015822_inLine +BABEL_OP2_304_13929_20131020_015822_outLine +BABEL_OP2_304_15420_20131207_024154_inLine +BABEL_OP2_304_15420_20131207_024154_outLine +BABEL_OP2_304_17937_20131127_033509_inLine +BABEL_OP2_304_17937_20131127_033509_outLine +BABEL_OP2_304_18037_20131024_213803_inLine +BABEL_OP2_304_18037_20131024_213803_outLine +BABEL_OP2_304_18731_20131023_003305_inLine +BABEL_OP2_304_18731_20131023_003305_outLine +BABEL_OP2_304_20916_20131114_013626_inLine +BABEL_OP2_304_20916_20131114_013626_outLine +BABEL_OP2_304_21029_20131212_035937_inLine +BABEL_OP2_304_21029_20131212_035937_outLine +BABEL_OP2_304_22170_20140304_071139_inLine +BABEL_OP2_304_22170_20140304_071139_outLine +BABEL_OP2_304_23098_20131107_033644_inLine +BABEL_OP2_304_23098_20131107_033644_outLine +BABEL_OP2_304_26074_20140112_023253_inLine +BABEL_OP2_304_26074_20140112_023253_outLine +BABEL_OP2_304_34564_20140213_195420_inLine +BABEL_OP2_304_34564_20140213_195420_outLine +BABEL_OP2_304_35420_20131029_043734_inLine +BABEL_OP2_304_35420_20131029_043734_outLine +BABEL_OP2_304_35838_20131024_211303_inLine +BABEL_OP2_304_35838_20131024_211303_outLine +BABEL_OP2_304_36147_20131019_040800_inLine +BABEL_OP2_304_36147_20131019_040800_outLine +BABEL_OP2_304_39688_20131109_222248_inLine +BABEL_OP2_304_39688_20131109_222248_outLine +BABEL_OP2_304_40092_20131031_014914_inLine +BABEL_OP2_304_40092_20131031_014914_outLine +BABEL_OP2_304_41493_20131113_221501_inLine +BABEL_OP2_304_41493_20131113_221501_outLine +BABEL_OP2_304_42126_20131024_215636_inLine +BABEL_OP2_304_42126_20131024_215636_outLine +BABEL_OP2_304_46333_20131204_195151_inLine +BABEL_OP2_304_46333_20131204_195151_outLine +BABEL_OP2_304_47877_20140227_065455_inLine +BABEL_OP2_304_47877_20140227_065455_outLine +BABEL_OP2_304_48789_20140108_012933_inLine +BABEL_OP2_304_48789_20140108_012933_outLine +BABEL_OP2_304_51417_20140228_011906_inLine +BABEL_OP2_304_51417_20140228_011906_outLine +BABEL_OP2_304_52025_20131116_004427_inLine +BABEL_OP2_304_52025_20131116_004427_outLine +BABEL_OP2_304_56429_20131129_223408_inLine +BABEL_OP2_304_56429_20131129_223408_outLine +BABEL_OP2_304_56684_20140223_001031_inLine +BABEL_OP2_304_56684_20140223_001031_outLine +BABEL_OP2_304_56720_20140119_005254_inLine +BABEL_OP2_304_56720_20140119_005254_outLine +BABEL_OP2_304_56743_20131218_042118_inLine +BABEL_OP2_304_56743_20131218_042118_outLine +BABEL_OP2_304_57609_20140121_202504_inLine +BABEL_OP2_304_57609_20140121_202504_outLine +BABEL_OP2_304_58103_20131212_013517_inLine +BABEL_OP2_304_58103_20131212_013517_outLine +BABEL_OP2_304_59291_20140207_213735_inLine +BABEL_OP2_304_59291_20140207_213735_outLine +BABEL_OP2_304_60418_20140111_062723_inLine +BABEL_OP2_304_60418_20140111_062723_outLine +BABEL_OP2_304_61219_20131206_061726_inLine +BABEL_OP2_304_61219_20131206_061726_outLine +BABEL_OP2_304_61357_20140113_232629_inLine +BABEL_OP2_304_61357_20140113_232629_outLine +BABEL_OP2_304_61963_20140226_192451_inLine +BABEL_OP2_304_61963_20140226_192451_outLine +BABEL_OP2_304_62323_20131113_001039_inLine +BABEL_OP2_304_62323_20131113_001039_outLine +BABEL_OP2_304_63445_20131127_005349_inLine +BABEL_OP2_304_63445_20131127_005349_outLine +BABEL_OP2_304_64759_20140118_203442_inLine +BABEL_OP2_304_64759_20140118_203442_outLine +BABEL_OP2_304_64796_20131128_060852_inLine +BABEL_OP2_304_64796_20131128_060852_outLine +BABEL_OP2_304_65077_20131115_005739_inLine +BABEL_OP2_304_65077_20131115_005739_outLine +BABEL_OP2_304_66026_20140212_224055_inLine +BABEL_OP2_304_66026_20140212_224055_outLine +BABEL_OP2_304_68910_20131101_042132_inLine +BABEL_OP2_304_68910_20131101_042132_outLine +BABEL_OP2_304_72903_20131113_023457_inLine +BABEL_OP2_304_72903_20131113_023457_outLine +BABEL_OP2_304_73042_20131214_052022_inLine +BABEL_OP2_304_73042_20131214_052022_outLine +BABEL_OP2_304_74455_20140224_013111_inLine +BABEL_OP2_304_74455_20140224_013111_outLine +BABEL_OP2_304_78360_20140301_020449_inLine +BABEL_OP2_304_78360_20140301_020449_outLine +BABEL_OP2_304_79723_20131023_023756_inLine +BABEL_OP2_304_79723_20131023_023756_outLine +BABEL_OP2_304_79820_20131214_042918_inLine +BABEL_OP2_304_79820_20131214_042918_outLine +BABEL_OP2_304_80721_20140213_051749_inLine +BABEL_OP2_304_80721_20140213_051749_outLine +BABEL_OP2_304_81427_20131211_012524_inLine +BABEL_OP2_304_81427_20131211_012524_outLine +BABEL_OP2_304_83813_20131028_033118_inLine +BABEL_OP2_304_83813_20131028_033118_outLine +BABEL_OP2_304_83851_20131203_212613_inLine +BABEL_OP2_304_83851_20131203_212613_outLine +BABEL_OP2_304_84125_20131115_235931_inLine +BABEL_OP2_304_84125_20131115_235931_outLine +BABEL_OP2_304_85179_20140214_071121_inLine +BABEL_OP2_304_85179_20140214_071121_outLine +BABEL_OP2_304_92252_20131022_042600_inLine +BABEL_OP2_304_92252_20131022_042600_outLine +BABEL_OP2_304_93443_20131127_032037_inLine +BABEL_OP2_304_93443_20131127_032037_outLine +BABEL_OP2_304_96205_20140107_233946_inLine +BABEL_OP2_304_96205_20140107_233946_outLine +BABEL_OP2_304_98565_20131023_235505_inLine +BABEL_OP2_304_98565_20131023_235505_outLine +BABEL_OP2_304_99920_20140211_023914_inLine +BABEL_OP2_304_99920_20140211_023914_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list new file mode 100644 index 00000000000..5ddd7320c00 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list @@ -0,0 +1,364 @@ +BABEL_OP2_304_10313_20131021_235202_inLine +BABEL_OP2_304_10313_20131021_235202_outLine +BABEL_OP2_304_10469_20131103_031709_inLine +BABEL_OP2_304_10469_20131103_031709_outLine +BABEL_OP2_304_11419_20131022_014303_inLine +BABEL_OP2_304_11419_20131022_014303_outLine +BABEL_OP2_304_11681_20131213_001647_inLine +BABEL_OP2_304_11681_20131213_001647_outLine +BABEL_OP2_304_12220_20131217_183010_inLine +BABEL_OP2_304_12220_20131217_183010_outLine +BABEL_OP2_304_13030_20131214_223348_inLine +BABEL_OP2_304_13030_20131214_223348_outLine +BABEL_OP2_304_13483_20140121_014427_inLine +BABEL_OP2_304_13483_20140121_014427_outLine +BABEL_OP2_304_13547_20131025_230206_inLine +BABEL_OP2_304_13547_20131025_230206_outLine +BABEL_OP2_304_14229_20131203_213430_inLine +BABEL_OP2_304_14229_20131203_213430_outLine +BABEL_OP2_304_14440_20140116_035720_inLine +BABEL_OP2_304_14440_20140116_035720_outLine +BABEL_OP2_304_14875_20131215_025538_inLine +BABEL_OP2_304_14875_20131215_025538_outLine +BABEL_OP2_304_15535_20140120_031512_inLine +BABEL_OP2_304_15535_20140120_031512_outLine +BABEL_OP2_304_15869_20131024_035059_inLine +BABEL_OP2_304_15869_20131024_035059_outLine +BABEL_OP2_304_16249_20131019_215021_inLine +BABEL_OP2_304_16249_20131019_215021_outLine +BABEL_OP2_304_16938_20140117_232323_inLine +BABEL_OP2_304_16938_20140117_232323_outLine +BABEL_OP2_304_17032_20140121_010326_inLine +BABEL_OP2_304_17032_20140121_010326_outLine +BABEL_OP2_304_17923_20140112_012407_inLine +BABEL_OP2_304_17923_20140112_012407_outLine +BABEL_OP2_304_18033_20131019_011702_inLine +BABEL_OP2_304_18033_20131019_011702_outLine +BABEL_OP2_304_19440_20131022_001353_inLine +BABEL_OP2_304_19440_20131022_001353_outLine +BABEL_OP2_304_19782_20140214_025658_inLine +BABEL_OP2_304_19782_20140214_025658_outLine +BABEL_OP2_304_20330_20140222_024609_inLine +BABEL_OP2_304_20330_20140222_024609_outLine +BABEL_OP2_304_20454_20131022_030532_inLine +BABEL_OP2_304_20454_20131022_030532_outLine +BABEL_OP2_304_20800_20140109_021508_inLine +BABEL_OP2_304_20800_20140109_021508_outLine +BABEL_OP2_304_21109_20140217_041609_inLine +BABEL_OP2_304_21109_20140217_041609_outLine +BABEL_OP2_304_22629_20131106_052813_inLine +BABEL_OP2_304_22629_20131106_052813_outLine +BABEL_OP2_304_23995_20140221_014044_inLine +BABEL_OP2_304_23995_20140221_014044_outLine +BABEL_OP2_304_24532_20131115_041442_inLine +BABEL_OP2_304_24532_20131115_041442_outLine +BABEL_OP2_304_24589_20131211_034826_inLine +BABEL_OP2_304_24589_20131211_034826_outLine +BABEL_OP2_304_24648_20131023_232628_inLine +BABEL_OP2_304_24648_20131023_232628_outLine +BABEL_OP2_304_25895_20131106_022638_inLine +BABEL_OP2_304_25895_20131106_022638_outLine +BABEL_OP2_304_26602_20140220_035529_inLine +BABEL_OP2_304_26602_20140220_035529_outLine +BABEL_OP2_304_27042_20140214_003359_inLine +BABEL_OP2_304_27042_20140214_003359_outLine +BABEL_OP2_304_27125_20131115_034249_inLine +BABEL_OP2_304_27125_20131115_034249_outLine +BABEL_OP2_304_27189_20131107_042859_inLine +BABEL_OP2_304_27189_20131107_042859_outLine +BABEL_OP2_304_27218_20131214_022520_inLine +BABEL_OP2_304_27218_20131214_022520_outLine +BABEL_OP2_304_28945_20131208_071533_inLine +BABEL_OP2_304_28945_20131208_071533_outLine +BABEL_OP2_304_29135_20131211_225441_inLine +BABEL_OP2_304_29135_20131211_225441_outLine +BABEL_OP2_304_29168_20131210_211235_inLine +BABEL_OP2_304_29168_20131210_211235_outLine +BABEL_OP2_304_29208_20140108_013943_inLine +BABEL_OP2_304_29208_20140108_013943_outLine +BABEL_OP2_304_29663_20131101_024202_inLine +BABEL_OP2_304_29663_20131101_024202_outLine +BABEL_OP2_304_29746_20131020_192452_inLine +BABEL_OP2_304_29746_20131020_192452_outLine +BABEL_OP2_304_30253_20140210_055904_inLine +BABEL_OP2_304_30253_20140210_055904_outLine +BABEL_OP2_304_30426_20131108_232120_inLine +BABEL_OP2_304_30426_20131108_232120_outLine +BABEL_OP2_304_31624_20131216_054258_inLine +BABEL_OP2_304_31624_20131216_054258_outLine +BABEL_OP2_304_32169_20131101_054038_inLine +BABEL_OP2_304_32169_20131101_054038_outLine +BABEL_OP2_304_32832_20140223_005017_inLine +BABEL_OP2_304_32832_20140223_005017_outLine +BABEL_OP2_304_32861_20140303_235600_inLine +BABEL_OP2_304_32861_20140303_235600_outLine +BABEL_OP2_304_33111_20140304_043553_inLine +BABEL_OP2_304_33111_20140304_043553_outLine +BABEL_OP2_304_33149_20131127_000224_inLine +BABEL_OP2_304_33149_20131127_000224_outLine +BABEL_OP2_304_34064_20131020_210038_inLine +BABEL_OP2_304_34064_20131020_210038_outLine +BABEL_OP2_304_34064_20131021_223728_inLine +BABEL_OP2_304_34064_20131021_223728_outLine +BABEL_OP2_304_34328_20140106_031822_inLine +BABEL_OP2_304_34328_20140106_031822_outLine +BABEL_OP2_304_34336_20131210_042513_inLine +BABEL_OP2_304_34336_20131210_042513_outLine +BABEL_OP2_304_34647_20131109_231717_inLine +BABEL_OP2_304_34647_20131109_231717_outLine +BABEL_OP2_304_36300_20131030_210103_inLine +BABEL_OP2_304_36300_20131030_210103_outLine +BABEL_OP2_304_36341_20131208_040050_inLine +BABEL_OP2_304_36341_20131208_040050_outLine +BABEL_OP2_304_36990_20140106_050927_inLine +BABEL_OP2_304_36990_20140106_050927_outLine +BABEL_OP2_304_37684_20131019_020843_inLine +BABEL_OP2_304_37684_20131019_020843_outLine +BABEL_OP2_304_38963_20131109_190146_inLine +BABEL_OP2_304_38963_20131109_190146_outLine +BABEL_OP2_304_39680_20140226_002516_inLine +BABEL_OP2_304_39680_20140226_002516_outLine +BABEL_OP2_304_40624_20131107_024514_inLine +BABEL_OP2_304_40624_20131107_024514_outLine +BABEL_OP2_304_40713_20131210_063734_inLine +BABEL_OP2_304_40713_20131210_063734_outLine +BABEL_OP2_304_41233_20140222_034336_inLine +BABEL_OP2_304_41233_20140222_034336_outLine +BABEL_OP2_304_41442_20140214_035912_inLine +BABEL_OP2_304_41442_20140214_035912_outLine +BABEL_OP2_304_41741_20131215_020846_inLine +BABEL_OP2_304_41741_20131215_020846_outLine +BABEL_OP2_304_42243_20131218_052141_inLine +BABEL_OP2_304_42243_20131218_052141_outLine +BABEL_OP2_304_42497_20131130_034031_inLine +BABEL_OP2_304_42497_20131130_034031_outLine +BABEL_OP2_304_44868_20140110_204822_inLine +BABEL_OP2_304_44868_20140110_204822_outLine +BABEL_OP2_304_45374_20131019_200425_inLine +BABEL_OP2_304_45374_20131019_200425_outLine +BABEL_OP2_304_45642_20140114_234140_inLine +BABEL_OP2_304_45642_20140114_234140_outLine +BABEL_OP2_304_45843_20140114_205141_inLine +BABEL_OP2_304_45843_20140114_205141_outLine +BABEL_OP2_304_46315_20140122_004043_inLine +BABEL_OP2_304_46315_20140122_004043_outLine +BABEL_OP2_304_46389_20131022_050904_inLine +BABEL_OP2_304_46389_20131022_050904_outLine +BABEL_OP2_304_47110_20131023_015940_inLine +BABEL_OP2_304_47110_20131023_015940_outLine +BABEL_OP2_304_47270_20140222_021820_inLine +BABEL_OP2_304_47270_20140222_021820_outLine +BABEL_OP2_304_47451_20140203_224639_inLine +BABEL_OP2_304_47451_20140203_224639_outLine +BABEL_OP2_304_48024_20131031_215347_inLine +BABEL_OP2_304_48024_20131031_215347_outLine +BABEL_OP2_304_49001_20131214_003327_inLine +BABEL_OP2_304_49001_20131214_003327_outLine +BABEL_OP2_304_49287_20140118_013355_inLine +BABEL_OP2_304_49287_20140118_013355_outLine +BABEL_OP2_304_50175_20131124_033223_inLine +BABEL_OP2_304_50175_20131124_033223_outLine +BABEL_OP2_304_50175_20131124_035833_inLine +BABEL_OP2_304_50175_20131124_035833_outLine +BABEL_OP2_304_50565_20140124_052942_inLine +BABEL_OP2_304_50565_20140124_052942_outLine +BABEL_OP2_304_50726_20131213_031251_inLine +BABEL_OP2_304_50726_20131213_031251_outLine +BABEL_OP2_304_51540_20140304_011452_inLine +BABEL_OP2_304_51540_20140304_011452_outLine +BABEL_OP2_304_52058_20131022_055536_inLine +BABEL_OP2_304_52058_20131022_055536_outLine +BABEL_OP2_304_52438_20131206_043319_inLine +BABEL_OP2_304_52438_20131206_043319_outLine +BABEL_OP2_304_52818_20140112_011936_inLine +BABEL_OP2_304_52818_20140112_011936_outLine +BABEL_OP2_304_53419_20140213_061844_inLine +BABEL_OP2_304_53419_20140213_061844_outLine +BABEL_OP2_304_53842_20140109_012849_inLine +BABEL_OP2_304_53842_20140109_012849_outLine +BABEL_OP2_304_54744_20131205_024818_inLine +BABEL_OP2_304_54744_20131205_024818_outLine +BABEL_OP2_304_55042_20131112_051412_inLine +BABEL_OP2_304_55042_20131112_051412_outLine +BABEL_OP2_304_55381_20140217_005926_inLine +BABEL_OP2_304_55381_20140217_005926_outLine +BABEL_OP2_304_55818_20131218_020051_inLine +BABEL_OP2_304_55818_20131218_020051_outLine +BABEL_OP2_304_56057_20131112_043401_inLine +BABEL_OP2_304_56057_20131112_043401_outLine +BABEL_OP2_304_56117_20131023_035134_inLine +BABEL_OP2_304_56117_20131023_035134_outLine +BABEL_OP2_304_56674_20131024_233415_inLine +BABEL_OP2_304_56674_20131024_233415_outLine +BABEL_OP2_304_57035_20131106_183242_inLine +BABEL_OP2_304_57035_20131106_183242_outLine +BABEL_OP2_304_57566_20140227_000622_inLine +BABEL_OP2_304_57566_20140227_000622_outLine +BABEL_OP2_304_57935_20140211_015542_inLine +BABEL_OP2_304_57935_20140211_015542_outLine +BABEL_OP2_304_58585_20140226_022746_inLine +BABEL_OP2_304_58585_20140226_022746_outLine +BABEL_OP2_304_58717_20140112_000351_inLine +BABEL_OP2_304_58717_20140112_000351_outLine +BABEL_OP2_304_59163_20131022_033947_inLine +BABEL_OP2_304_59163_20131022_033947_outLine +BABEL_OP2_304_59645_20140110_210530_inLine +BABEL_OP2_304_59645_20140110_210530_outLine +BABEL_OP2_304_60282_20131031_040356_inLine +BABEL_OP2_304_60282_20131031_040356_outLine +BABEL_OP2_304_60397_20131024_183527_inLine +BABEL_OP2_304_60397_20131024_183527_outLine +BABEL_OP2_304_60538_20131211_043030_inLine +BABEL_OP2_304_60538_20131211_043030_outLine +BABEL_OP2_304_60830_20140106_224130_inLine +BABEL_OP2_304_60830_20140106_224130_outLine +BABEL_OP2_304_61011_20131206_213833_inLine +BABEL_OP2_304_61011_20131206_213833_outLine +BABEL_OP2_304_61225_20131113_052324_inLine +BABEL_OP2_304_61225_20131113_052324_outLine +BABEL_OP2_304_61831_20131201_042817_inLine +BABEL_OP2_304_61831_20131201_042817_outLine +BABEL_OP2_304_61888_20140228_181648_inLine +BABEL_OP2_304_61888_20140228_181648_outLine +BABEL_OP2_304_62177_20140227_184207_inLine +BABEL_OP2_304_62177_20140227_184207_outLine +BABEL_OP2_304_63081_20131206_195135_inLine +BABEL_OP2_304_63081_20131206_195135_outLine +BABEL_OP2_304_63671_20131024_002535_inLine +BABEL_OP2_304_63671_20131024_002535_outLine +BABEL_OP2_304_63920_20131108_182401_inLine +BABEL_OP2_304_63920_20131108_182401_outLine +BABEL_OP2_304_64014_20140228_033939_inLine +BABEL_OP2_304_64014_20140228_033939_outLine +BABEL_OP2_304_64469_20131023_182630_inLine +BABEL_OP2_304_64469_20131023_182630_outLine +BABEL_OP2_304_64688_20131109_040635_inLine +BABEL_OP2_304_64688_20131109_040635_outLine +BABEL_OP2_304_65336_20131109_051329_inLine +BABEL_OP2_304_65336_20131109_051329_outLine +BABEL_OP2_304_65723_20131201_233928_inLine +BABEL_OP2_304_65723_20131201_233928_outLine +BABEL_OP2_304_65882_20131128_220533_inLine +BABEL_OP2_304_65882_20131128_220533_outLine +BABEL_OP2_304_66001_20131208_023839_inLine +BABEL_OP2_304_66001_20131208_023839_outLine +BABEL_OP2_304_66350_20131022_021812_inLine +BABEL_OP2_304_66350_20131022_021812_outLine +BABEL_OP2_304_66837_20140213_053859_inLine +BABEL_OP2_304_66837_20140213_053859_outLine +BABEL_OP2_304_67304_20140216_025015_inLine +BABEL_OP2_304_67304_20140216_025015_outLine +BABEL_OP2_304_67552_20140114_011538_inLine +BABEL_OP2_304_67552_20140114_011538_outLine +BABEL_OP2_304_67894_20131112_060500_inLine +BABEL_OP2_304_67894_20131112_060500_outLine +BABEL_OP2_304_68059_20140111_025607_inLine +BABEL_OP2_304_68059_20140111_025607_outLine +BABEL_OP2_304_68908_20131127_032840_inLine +BABEL_OP2_304_68908_20131127_032840_outLine +BABEL_OP2_304_69107_20140123_192506_inLine +BABEL_OP2_304_69107_20140123_192506_outLine +BABEL_OP2_304_69153_20140212_204658_inLine +BABEL_OP2_304_69153_20140212_204658_outLine +BABEL_OP2_304_69992_20131213_195450_inLine +BABEL_OP2_304_69992_20131213_195450_outLine +BABEL_OP2_304_70216_20131020_173420_inLine +BABEL_OP2_304_70216_20131020_173420_outLine +BABEL_OP2_304_71263_20140113_223556_inLine +BABEL_OP2_304_71263_20140113_223556_outLine +BABEL_OP2_304_71401_20131020_005620_inLine +BABEL_OP2_304_71401_20131020_005620_outLine +BABEL_OP2_304_72844_20131115_202958_inLine +BABEL_OP2_304_72844_20131115_202958_outLine +BABEL_OP2_304_74226_20140217_044122_inLine +BABEL_OP2_304_74226_20140217_044122_outLine +BABEL_OP2_304_75064_20131209_035217_inLine +BABEL_OP2_304_75064_20131209_035217_outLine +BABEL_OP2_304_75223_20131205_012248_inLine +BABEL_OP2_304_75223_20131205_012248_outLine +BABEL_OP2_304_75930_20131020_013042_inLine +BABEL_OP2_304_75930_20131020_013042_outLine +BABEL_OP2_304_75975_20131019_054431_inLine +BABEL_OP2_304_75975_20131019_054431_outLine +BABEL_OP2_304_76069_20131113_042346_inLine +BABEL_OP2_304_76069_20131113_042346_outLine +BABEL_OP2_304_76730_20131025_213853_inLine +BABEL_OP2_304_76730_20131025_213853_outLine +BABEL_OP2_304_76793_20131126_013011_inLine +BABEL_OP2_304_76793_20131126_013011_outLine +BABEL_OP2_304_77033_20140228_043125_inLine +BABEL_OP2_304_77033_20140228_043125_outLine +BABEL_OP2_304_77730_20131217_042107_inLine +BABEL_OP2_304_77730_20131217_042107_outLine +BABEL_OP2_304_78943_20131208_222716_inLine +BABEL_OP2_304_78943_20131208_222716_outLine +BABEL_OP2_304_79028_20131022_221243_inLine +BABEL_OP2_304_79028_20131022_221243_outLine +BABEL_OP2_304_79045_20140214_202301_inLine +BABEL_OP2_304_79045_20140214_202301_outLine +BABEL_OP2_304_80209_20131112_232041_inLine +BABEL_OP2_304_80209_20131112_232041_outLine +BABEL_OP2_304_80383_20131107_233543_inLine +BABEL_OP2_304_80383_20131107_233543_outLine +BABEL_OP2_304_80577_20140301_014201_inLine +BABEL_OP2_304_80577_20140301_014201_outLine +BABEL_OP2_304_80881_20131205_175435_inLine +BABEL_OP2_304_80881_20131205_175435_outLine +BABEL_OP2_304_81404_20131213_041501_inLine +BABEL_OP2_304_81404_20131213_041501_outLine +BABEL_OP2_304_81769_20140105_005749_inLine +BABEL_OP2_304_81769_20140105_005749_outLine +BABEL_OP2_304_82863_20140106_054346_inLine +BABEL_OP2_304_82863_20140106_054346_outLine +BABEL_OP2_304_83436_20131211_025218_inLine +BABEL_OP2_304_83436_20131211_025218_outLine +BABEL_OP2_304_83935_20140123_034100_inLine +BABEL_OP2_304_83935_20140123_034100_outLine +BABEL_OP2_304_84194_20131129_040805_inLine +BABEL_OP2_304_84194_20131129_040805_outLine +BABEL_OP2_304_84605_20131215_005949_inLine +BABEL_OP2_304_84605_20131215_005949_outLine +BABEL_OP2_304_85248_20140222_235016_inLine +BABEL_OP2_304_85248_20140222_235016_outLine +BABEL_OP2_304_86100_20131112_221929_inLine +BABEL_OP2_304_86100_20131112_221929_outLine +BABEL_OP2_304_86472_20140116_050058_inLine +BABEL_OP2_304_86472_20140116_050058_outLine +BABEL_OP2_304_86557_20131130_234925_inLine +BABEL_OP2_304_86557_20131130_234925_outLine +BABEL_OP2_304_86829_20131107_180321_inLine +BABEL_OP2_304_86829_20131107_180321_outLine +BABEL_OP2_304_86830_20140228_051058_inLine +BABEL_OP2_304_86830_20140228_051058_outLine +BABEL_OP2_304_87280_20140207_030432_inLine +BABEL_OP2_304_87280_20140207_030432_outLine +BABEL_OP2_304_87866_20131106_002751_inLine +BABEL_OP2_304_87866_20131106_002751_outLine +BABEL_OP2_304_88982_20140111_233039_inLine +BABEL_OP2_304_88982_20140111_233039_outLine +BABEL_OP2_304_89650_20131024_023031_inLine +BABEL_OP2_304_89650_20131024_023031_outLine +BABEL_OP2_304_91581_20140203_231410_inLine +BABEL_OP2_304_91581_20140203_231410_outLine +BABEL_OP2_304_92440_20131109_003559_inLine +BABEL_OP2_304_92440_20131109_003559_outLine +BABEL_OP2_304_92886_20131202_233808_inLine +BABEL_OP2_304_92886_20131202_233808_outLine +BABEL_OP2_304_93224_20140121_191942_inLine +BABEL_OP2_304_93224_20140121_191942_outLine +BABEL_OP2_304_93475_20131213_025105_inLine +BABEL_OP2_304_93475_20131213_025105_outLine +BABEL_OP2_304_93681_20131202_212236_inLine +BABEL_OP2_304_93681_20131202_212236_outLine +BABEL_OP2_304_94465_20140213_013300_inLine +BABEL_OP2_304_94465_20140213_013300_outLine +BABEL_OP2_304_94923_20140212_021923_inLine +BABEL_OP2_304_94923_20140212_021923_outLine +BABEL_OP2_304_95677_20131024_031406_inLine +BABEL_OP2_304_95677_20131024_031406_outLine +BABEL_OP2_304_96405_20131214_205112_inLine +BABEL_OP2_304_96405_20131214_205112_outLine +BABEL_OP2_304_98165_20131218_234422_inLine +BABEL_OP2_304_98165_20131218_234422_outLine +BABEL_OP2_304_99264_20140222_211846_inLine +BABEL_OP2_304_99264_20140222_211846_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/training.list b/egs/babel/s5d/conf/lists/304-lithuanian/training.list new file mode 100644 index 00000000000..72d421bf1a9 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/training.list @@ -0,0 +1,484 @@ +BABEL_OP2_304_10019_20131215_000700_inLine +BABEL_OP2_304_10019_20131215_000700_outLine +BABEL_OP2_304_10313_20131021_235202_inLine +BABEL_OP2_304_10313_20131021_235202_outLine +BABEL_OP2_304_10469_20131103_031709_inLine +BABEL_OP2_304_10469_20131103_031709_outLine +BABEL_OP2_304_11419_20131022_014303_inLine +BABEL_OP2_304_11419_20131022_014303_outLine +BABEL_OP2_304_11681_20131213_001647_inLine +BABEL_OP2_304_11681_20131213_001647_outLine +BABEL_OP2_304_11768_20131025_195124_inLine +BABEL_OP2_304_11768_20131025_195124_outLine +BABEL_OP2_304_12220_20131217_183010_inLine +BABEL_OP2_304_12220_20131217_183010_outLine +BABEL_OP2_304_13030_20131214_223348_inLine +BABEL_OP2_304_13030_20131214_223348_outLine +BABEL_OP2_304_13483_20140121_014427_inLine +BABEL_OP2_304_13483_20140121_014427_outLine +BABEL_OP2_304_13547_20131025_230206_inLine +BABEL_OP2_304_13547_20131025_230206_outLine +BABEL_OP2_304_13929_20131020_015822_inLine +BABEL_OP2_304_13929_20131020_015822_outLine +BABEL_OP2_304_14229_20131203_213430_inLine +BABEL_OP2_304_14229_20131203_213430_outLine +BABEL_OP2_304_14440_20140116_035720_inLine +BABEL_OP2_304_14440_20140116_035720_outLine +BABEL_OP2_304_14875_20131215_025538_inLine +BABEL_OP2_304_14875_20131215_025538_outLine +BABEL_OP2_304_15420_20131207_024154_inLine +BABEL_OP2_304_15420_20131207_024154_outLine +BABEL_OP2_304_15535_20140120_031512_inLine +BABEL_OP2_304_15535_20140120_031512_outLine +BABEL_OP2_304_15869_20131024_035059_inLine +BABEL_OP2_304_15869_20131024_035059_outLine +BABEL_OP2_304_16249_20131019_215021_inLine +BABEL_OP2_304_16249_20131019_215021_outLine +BABEL_OP2_304_16938_20140117_232323_inLine +BABEL_OP2_304_16938_20140117_232323_outLine +BABEL_OP2_304_17032_20140121_010326_inLine +BABEL_OP2_304_17032_20140121_010326_outLine +BABEL_OP2_304_17923_20140112_012407_inLine +BABEL_OP2_304_17923_20140112_012407_outLine +BABEL_OP2_304_17937_20131127_033509_inLine +BABEL_OP2_304_17937_20131127_033509_outLine +BABEL_OP2_304_18033_20131019_011702_inLine +BABEL_OP2_304_18033_20131019_011702_outLine +BABEL_OP2_304_18037_20131024_213803_inLine +BABEL_OP2_304_18037_20131024_213803_outLine +BABEL_OP2_304_18731_20131023_003305_inLine +BABEL_OP2_304_18731_20131023_003305_outLine +BABEL_OP2_304_19440_20131022_001353_inLine +BABEL_OP2_304_19440_20131022_001353_outLine +BABEL_OP2_304_19782_20140214_025658_inLine +BABEL_OP2_304_19782_20140214_025658_outLine +BABEL_OP2_304_20330_20140222_024609_inLine +BABEL_OP2_304_20330_20140222_024609_outLine +BABEL_OP2_304_20454_20131022_030532_inLine +BABEL_OP2_304_20454_20131022_030532_outLine +BABEL_OP2_304_20800_20140109_021508_inLine +BABEL_OP2_304_20800_20140109_021508_outLine +BABEL_OP2_304_20916_20131114_013626_inLine +BABEL_OP2_304_20916_20131114_013626_outLine +BABEL_OP2_304_21029_20131212_035937_inLine +BABEL_OP2_304_21029_20131212_035937_outLine +BABEL_OP2_304_21109_20140217_041609_inLine +BABEL_OP2_304_21109_20140217_041609_outLine +BABEL_OP2_304_22170_20140304_071139_inLine +BABEL_OP2_304_22170_20140304_071139_outLine +BABEL_OP2_304_22629_20131106_052813_inLine +BABEL_OP2_304_22629_20131106_052813_outLine +BABEL_OP2_304_23098_20131107_033644_inLine +BABEL_OP2_304_23098_20131107_033644_outLine +BABEL_OP2_304_23995_20140221_014044_inLine +BABEL_OP2_304_23995_20140221_014044_outLine +BABEL_OP2_304_24532_20131115_041442_inLine +BABEL_OP2_304_24532_20131115_041442_outLine +BABEL_OP2_304_24589_20131211_034826_inLine +BABEL_OP2_304_24589_20131211_034826_outLine +BABEL_OP2_304_24648_20131023_232628_inLine +BABEL_OP2_304_24648_20131023_232628_outLine +BABEL_OP2_304_25895_20131106_022638_inLine +BABEL_OP2_304_25895_20131106_022638_outLine +BABEL_OP2_304_26074_20140112_023253_inLine +BABEL_OP2_304_26074_20140112_023253_outLine +BABEL_OP2_304_26602_20140220_035529_inLine +BABEL_OP2_304_26602_20140220_035529_outLine +BABEL_OP2_304_27042_20140214_003359_inLine +BABEL_OP2_304_27042_20140214_003359_outLine +BABEL_OP2_304_27125_20131115_034249_inLine +BABEL_OP2_304_27125_20131115_034249_outLine +BABEL_OP2_304_27189_20131107_042859_inLine +BABEL_OP2_304_27189_20131107_042859_outLine +BABEL_OP2_304_27218_20131214_022520_inLine +BABEL_OP2_304_27218_20131214_022520_outLine +BABEL_OP2_304_28945_20131208_071533_inLine +BABEL_OP2_304_28945_20131208_071533_outLine +BABEL_OP2_304_29135_20131211_225441_inLine +BABEL_OP2_304_29135_20131211_225441_outLine +BABEL_OP2_304_29168_20131210_211235_inLine +BABEL_OP2_304_29168_20131210_211235_outLine +BABEL_OP2_304_29208_20140108_013943_inLine +BABEL_OP2_304_29208_20140108_013943_outLine +BABEL_OP2_304_29663_20131101_024202_inLine +BABEL_OP2_304_29663_20131101_024202_outLine +BABEL_OP2_304_29746_20131020_192452_inLine +BABEL_OP2_304_29746_20131020_192452_outLine +BABEL_OP2_304_30253_20140210_055904_inLine +BABEL_OP2_304_30253_20140210_055904_outLine +BABEL_OP2_304_30426_20131108_232120_inLine +BABEL_OP2_304_30426_20131108_232120_outLine +BABEL_OP2_304_31624_20131216_054258_inLine +BABEL_OP2_304_31624_20131216_054258_outLine +BABEL_OP2_304_32169_20131101_054038_inLine +BABEL_OP2_304_32169_20131101_054038_outLine +BABEL_OP2_304_32832_20140223_005017_inLine +BABEL_OP2_304_32832_20140223_005017_outLine +BABEL_OP2_304_32861_20140303_235600_inLine +BABEL_OP2_304_32861_20140303_235600_outLine +BABEL_OP2_304_33111_20140304_043553_inLine +BABEL_OP2_304_33111_20140304_043553_outLine +BABEL_OP2_304_33149_20131127_000224_inLine +BABEL_OP2_304_33149_20131127_000224_outLine +BABEL_OP2_304_34064_20131020_210038_inLine +BABEL_OP2_304_34064_20131020_210038_outLine +BABEL_OP2_304_34064_20131021_223728_inLine +BABEL_OP2_304_34064_20131021_223728_outLine +BABEL_OP2_304_34328_20140106_031822_inLine +BABEL_OP2_304_34328_20140106_031822_outLine +BABEL_OP2_304_34336_20131210_042513_inLine +BABEL_OP2_304_34336_20131210_042513_outLine +BABEL_OP2_304_34564_20140213_195420_inLine +BABEL_OP2_304_34564_20140213_195420_outLine +BABEL_OP2_304_34647_20131109_231717_inLine +BABEL_OP2_304_34647_20131109_231717_outLine +BABEL_OP2_304_35420_20131029_043734_inLine +BABEL_OP2_304_35420_20131029_043734_outLine +BABEL_OP2_304_35838_20131024_211303_inLine +BABEL_OP2_304_35838_20131024_211303_outLine +BABEL_OP2_304_36147_20131019_040800_inLine +BABEL_OP2_304_36147_20131019_040800_outLine +BABEL_OP2_304_36300_20131030_210103_inLine +BABEL_OP2_304_36300_20131030_210103_outLine +BABEL_OP2_304_36341_20131208_040050_inLine +BABEL_OP2_304_36341_20131208_040050_outLine +BABEL_OP2_304_36990_20140106_050927_inLine +BABEL_OP2_304_36990_20140106_050927_outLine +BABEL_OP2_304_37684_20131019_020843_inLine +BABEL_OP2_304_37684_20131019_020843_outLine +BABEL_OP2_304_38963_20131109_190146_inLine +BABEL_OP2_304_38963_20131109_190146_outLine +BABEL_OP2_304_39680_20140226_002516_inLine +BABEL_OP2_304_39680_20140226_002516_outLine +BABEL_OP2_304_39688_20131109_222248_inLine +BABEL_OP2_304_39688_20131109_222248_outLine +BABEL_OP2_304_40092_20131031_014914_inLine +BABEL_OP2_304_40092_20131031_014914_outLine +BABEL_OP2_304_40624_20131107_024514_inLine +BABEL_OP2_304_40624_20131107_024514_outLine +BABEL_OP2_304_40713_20131210_063734_inLine +BABEL_OP2_304_40713_20131210_063734_outLine +BABEL_OP2_304_41233_20140222_034336_inLine +BABEL_OP2_304_41233_20140222_034336_outLine +BABEL_OP2_304_41442_20140214_035912_inLine +BABEL_OP2_304_41442_20140214_035912_outLine +BABEL_OP2_304_41493_20131113_221501_inLine +BABEL_OP2_304_41493_20131113_221501_outLine +BABEL_OP2_304_41741_20131215_020846_inLine +BABEL_OP2_304_41741_20131215_020846_outLine +BABEL_OP2_304_42126_20131024_215636_inLine +BABEL_OP2_304_42126_20131024_215636_outLine +BABEL_OP2_304_42243_20131218_052141_inLine +BABEL_OP2_304_42243_20131218_052141_outLine +BABEL_OP2_304_42497_20131130_034031_inLine +BABEL_OP2_304_42497_20131130_034031_outLine +BABEL_OP2_304_44868_20140110_204822_inLine +BABEL_OP2_304_44868_20140110_204822_outLine +BABEL_OP2_304_45374_20131019_200425_inLine +BABEL_OP2_304_45374_20131019_200425_outLine +BABEL_OP2_304_45642_20140114_234140_inLine +BABEL_OP2_304_45642_20140114_234140_outLine +BABEL_OP2_304_45843_20140114_205141_inLine +BABEL_OP2_304_45843_20140114_205141_outLine +BABEL_OP2_304_46315_20140122_004043_inLine +BABEL_OP2_304_46315_20140122_004043_outLine +BABEL_OP2_304_46333_20131204_195151_inLine +BABEL_OP2_304_46333_20131204_195151_outLine +BABEL_OP2_304_46389_20131022_050904_inLine +BABEL_OP2_304_46389_20131022_050904_outLine +BABEL_OP2_304_47110_20131023_015940_inLine +BABEL_OP2_304_47110_20131023_015940_outLine +BABEL_OP2_304_47270_20140222_021820_inLine +BABEL_OP2_304_47270_20140222_021820_outLine +BABEL_OP2_304_47451_20140203_224639_inLine +BABEL_OP2_304_47451_20140203_224639_outLine +BABEL_OP2_304_47877_20140227_065455_inLine +BABEL_OP2_304_47877_20140227_065455_outLine +BABEL_OP2_304_48024_20131031_215347_inLine +BABEL_OP2_304_48024_20131031_215347_outLine +BABEL_OP2_304_48789_20140108_012933_inLine +BABEL_OP2_304_48789_20140108_012933_outLine +BABEL_OP2_304_49001_20131214_003327_inLine +BABEL_OP2_304_49001_20131214_003327_outLine +BABEL_OP2_304_49287_20140118_013355_inLine +BABEL_OP2_304_49287_20140118_013355_outLine +BABEL_OP2_304_50175_20131124_033223_inLine +BABEL_OP2_304_50175_20131124_033223_outLine +BABEL_OP2_304_50175_20131124_035833_inLine +BABEL_OP2_304_50175_20131124_035833_outLine +BABEL_OP2_304_50565_20140124_052942_inLine +BABEL_OP2_304_50565_20140124_052942_outLine +BABEL_OP2_304_50726_20131213_031251_inLine +BABEL_OP2_304_50726_20131213_031251_outLine +BABEL_OP2_304_51417_20140228_011906_inLine +BABEL_OP2_304_51417_20140228_011906_outLine +BABEL_OP2_304_51540_20140304_011452_inLine +BABEL_OP2_304_51540_20140304_011452_outLine +BABEL_OP2_304_52025_20131116_004427_inLine +BABEL_OP2_304_52025_20131116_004427_outLine +BABEL_OP2_304_52058_20131022_055536_inLine +BABEL_OP2_304_52058_20131022_055536_outLine +BABEL_OP2_304_52438_20131206_043319_inLine +BABEL_OP2_304_52438_20131206_043319_outLine +BABEL_OP2_304_52818_20140112_011936_inLine +BABEL_OP2_304_52818_20140112_011936_outLine +BABEL_OP2_304_53419_20140213_061844_inLine +BABEL_OP2_304_53419_20140213_061844_outLine +BABEL_OP2_304_53842_20140109_012849_inLine +BABEL_OP2_304_53842_20140109_012849_outLine +BABEL_OP2_304_54744_20131205_024818_inLine +BABEL_OP2_304_54744_20131205_024818_outLine +BABEL_OP2_304_55042_20131112_051412_inLine +BABEL_OP2_304_55042_20131112_051412_outLine +BABEL_OP2_304_55381_20140217_005926_inLine +BABEL_OP2_304_55381_20140217_005926_outLine +BABEL_OP2_304_55818_20131218_020051_inLine +BABEL_OP2_304_55818_20131218_020051_outLine +BABEL_OP2_304_56057_20131112_043401_inLine +BABEL_OP2_304_56057_20131112_043401_outLine +BABEL_OP2_304_56117_20131023_035134_inLine +BABEL_OP2_304_56117_20131023_035134_outLine +BABEL_OP2_304_56429_20131129_223408_inLine +BABEL_OP2_304_56429_20131129_223408_outLine +BABEL_OP2_304_56674_20131024_233415_inLine +BABEL_OP2_304_56674_20131024_233415_outLine +BABEL_OP2_304_56684_20140223_001031_inLine +BABEL_OP2_304_56684_20140223_001031_outLine +BABEL_OP2_304_56720_20140119_005254_inLine +BABEL_OP2_304_56720_20140119_005254_outLine +BABEL_OP2_304_56743_20131218_042118_inLine +BABEL_OP2_304_56743_20131218_042118_outLine +BABEL_OP2_304_57035_20131106_183242_inLine +BABEL_OP2_304_57035_20131106_183242_outLine +BABEL_OP2_304_57566_20140227_000622_inLine +BABEL_OP2_304_57566_20140227_000622_outLine +BABEL_OP2_304_57609_20140121_202504_inLine +BABEL_OP2_304_57609_20140121_202504_outLine +BABEL_OP2_304_57935_20140211_015542_inLine +BABEL_OP2_304_57935_20140211_015542_outLine +BABEL_OP2_304_58103_20131212_013517_inLine +BABEL_OP2_304_58103_20131212_013517_outLine +BABEL_OP2_304_58585_20140226_022746_inLine +BABEL_OP2_304_58585_20140226_022746_outLine +BABEL_OP2_304_58717_20140112_000351_inLine +BABEL_OP2_304_58717_20140112_000351_outLine +BABEL_OP2_304_59163_20131022_033947_inLine +BABEL_OP2_304_59163_20131022_033947_outLine +BABEL_OP2_304_59291_20140207_213735_inLine +BABEL_OP2_304_59291_20140207_213735_outLine +BABEL_OP2_304_59645_20140110_210530_inLine +BABEL_OP2_304_59645_20140110_210530_outLine +BABEL_OP2_304_60282_20131031_040356_inLine +BABEL_OP2_304_60282_20131031_040356_outLine +BABEL_OP2_304_60397_20131024_183527_inLine +BABEL_OP2_304_60397_20131024_183527_outLine +BABEL_OP2_304_60418_20140111_062723_inLine +BABEL_OP2_304_60418_20140111_062723_outLine +BABEL_OP2_304_60538_20131211_043030_inLine +BABEL_OP2_304_60538_20131211_043030_outLine +BABEL_OP2_304_60830_20140106_224130_inLine +BABEL_OP2_304_60830_20140106_224130_outLine +BABEL_OP2_304_61011_20131206_213833_inLine +BABEL_OP2_304_61011_20131206_213833_outLine +BABEL_OP2_304_61219_20131206_061726_inLine +BABEL_OP2_304_61219_20131206_061726_outLine +BABEL_OP2_304_61225_20131113_052324_inLine +BABEL_OP2_304_61225_20131113_052324_outLine +BABEL_OP2_304_61357_20140113_232629_inLine +BABEL_OP2_304_61357_20140113_232629_outLine +BABEL_OP2_304_61831_20131201_042817_inLine +BABEL_OP2_304_61831_20131201_042817_outLine +BABEL_OP2_304_61888_20140228_181648_inLine +BABEL_OP2_304_61888_20140228_181648_outLine +BABEL_OP2_304_61963_20140226_192451_inLine +BABEL_OP2_304_61963_20140226_192451_outLine +BABEL_OP2_304_62177_20140227_184207_inLine +BABEL_OP2_304_62177_20140227_184207_outLine +BABEL_OP2_304_62323_20131113_001039_inLine +BABEL_OP2_304_62323_20131113_001039_outLine +BABEL_OP2_304_63081_20131206_195135_inLine +BABEL_OP2_304_63081_20131206_195135_outLine +BABEL_OP2_304_63445_20131127_005349_inLine +BABEL_OP2_304_63445_20131127_005349_outLine +BABEL_OP2_304_63671_20131024_002535_inLine +BABEL_OP2_304_63671_20131024_002535_outLine +BABEL_OP2_304_63920_20131108_182401_inLine +BABEL_OP2_304_63920_20131108_182401_outLine +BABEL_OP2_304_64014_20140228_033939_inLine +BABEL_OP2_304_64014_20140228_033939_outLine +BABEL_OP2_304_64469_20131023_182630_inLine +BABEL_OP2_304_64469_20131023_182630_outLine +BABEL_OP2_304_64688_20131109_040635_inLine +BABEL_OP2_304_64688_20131109_040635_outLine +BABEL_OP2_304_64759_20140118_203442_inLine +BABEL_OP2_304_64759_20140118_203442_outLine +BABEL_OP2_304_64796_20131128_060852_inLine +BABEL_OP2_304_64796_20131128_060852_outLine +BABEL_OP2_304_65077_20131115_005739_inLine +BABEL_OP2_304_65077_20131115_005739_outLine +BABEL_OP2_304_65336_20131109_051329_inLine +BABEL_OP2_304_65336_20131109_051329_outLine +BABEL_OP2_304_65723_20131201_233928_inLine +BABEL_OP2_304_65723_20131201_233928_outLine +BABEL_OP2_304_65882_20131128_220533_inLine +BABEL_OP2_304_65882_20131128_220533_outLine +BABEL_OP2_304_66001_20131208_023839_inLine +BABEL_OP2_304_66001_20131208_023839_outLine +BABEL_OP2_304_66026_20140212_224055_inLine +BABEL_OP2_304_66026_20140212_224055_outLine +BABEL_OP2_304_66350_20131022_021812_inLine +BABEL_OP2_304_66350_20131022_021812_outLine +BABEL_OP2_304_66837_20140213_053859_inLine +BABEL_OP2_304_66837_20140213_053859_outLine +BABEL_OP2_304_67304_20140216_025015_inLine +BABEL_OP2_304_67304_20140216_025015_outLine +BABEL_OP2_304_67552_20140114_011538_inLine +BABEL_OP2_304_67552_20140114_011538_outLine +BABEL_OP2_304_67894_20131112_060500_inLine +BABEL_OP2_304_67894_20131112_060500_outLine +BABEL_OP2_304_68059_20140111_025607_inLine +BABEL_OP2_304_68059_20140111_025607_outLine +BABEL_OP2_304_68908_20131127_032840_inLine +BABEL_OP2_304_68908_20131127_032840_outLine +BABEL_OP2_304_68910_20131101_042132_inLine +BABEL_OP2_304_68910_20131101_042132_outLine +BABEL_OP2_304_69107_20140123_192506_inLine +BABEL_OP2_304_69107_20140123_192506_outLine +BABEL_OP2_304_69153_20140212_204658_inLine +BABEL_OP2_304_69153_20140212_204658_outLine +BABEL_OP2_304_69992_20131213_195450_inLine +BABEL_OP2_304_69992_20131213_195450_outLine +BABEL_OP2_304_70216_20131020_173420_inLine +BABEL_OP2_304_70216_20131020_173420_outLine +BABEL_OP2_304_71263_20140113_223556_inLine +BABEL_OP2_304_71263_20140113_223556_outLine +BABEL_OP2_304_71401_20131020_005620_inLine +BABEL_OP2_304_71401_20131020_005620_outLine +BABEL_OP2_304_72844_20131115_202958_inLine +BABEL_OP2_304_72844_20131115_202958_outLine +BABEL_OP2_304_72903_20131113_023457_inLine +BABEL_OP2_304_72903_20131113_023457_outLine +BABEL_OP2_304_73042_20131214_052022_inLine +BABEL_OP2_304_73042_20131214_052022_outLine +BABEL_OP2_304_74226_20140217_044122_inLine +BABEL_OP2_304_74226_20140217_044122_outLine +BABEL_OP2_304_74455_20140224_013111_inLine +BABEL_OP2_304_74455_20140224_013111_outLine +BABEL_OP2_304_75064_20131209_035217_inLine +BABEL_OP2_304_75064_20131209_035217_outLine +BABEL_OP2_304_75223_20131205_012248_inLine +BABEL_OP2_304_75223_20131205_012248_outLine +BABEL_OP2_304_75930_20131020_013042_inLine +BABEL_OP2_304_75930_20131020_013042_outLine +BABEL_OP2_304_75975_20131019_054431_inLine +BABEL_OP2_304_75975_20131019_054431_outLine +BABEL_OP2_304_76069_20131113_042346_inLine +BABEL_OP2_304_76069_20131113_042346_outLine +BABEL_OP2_304_76730_20131025_213853_inLine +BABEL_OP2_304_76730_20131025_213853_outLine +BABEL_OP2_304_76793_20131126_013011_inLine +BABEL_OP2_304_76793_20131126_013011_outLine +BABEL_OP2_304_77033_20140228_043125_inLine +BABEL_OP2_304_77033_20140228_043125_outLine +BABEL_OP2_304_77730_20131217_042107_inLine +BABEL_OP2_304_77730_20131217_042107_outLine +BABEL_OP2_304_78360_20140301_020449_inLine +BABEL_OP2_304_78360_20140301_020449_outLine +BABEL_OP2_304_78943_20131208_222716_inLine +BABEL_OP2_304_78943_20131208_222716_outLine +BABEL_OP2_304_79028_20131022_221243_inLine +BABEL_OP2_304_79028_20131022_221243_outLine +BABEL_OP2_304_79045_20140214_202301_inLine +BABEL_OP2_304_79045_20140214_202301_outLine +BABEL_OP2_304_79723_20131023_023756_inLine +BABEL_OP2_304_79723_20131023_023756_outLine +BABEL_OP2_304_79820_20131214_042918_inLine +BABEL_OP2_304_79820_20131214_042918_outLine +BABEL_OP2_304_80209_20131112_232041_inLine +BABEL_OP2_304_80209_20131112_232041_outLine +BABEL_OP2_304_80383_20131107_233543_inLine +BABEL_OP2_304_80383_20131107_233543_outLine +BABEL_OP2_304_80577_20140301_014201_inLine +BABEL_OP2_304_80577_20140301_014201_outLine +BABEL_OP2_304_80721_20140213_051749_inLine +BABEL_OP2_304_80721_20140213_051749_outLine +BABEL_OP2_304_80881_20131205_175435_inLine +BABEL_OP2_304_80881_20131205_175435_outLine +BABEL_OP2_304_81404_20131213_041501_inLine +BABEL_OP2_304_81404_20131213_041501_outLine +BABEL_OP2_304_81427_20131211_012524_inLine +BABEL_OP2_304_81427_20131211_012524_outLine +BABEL_OP2_304_81769_20140105_005749_inLine +BABEL_OP2_304_81769_20140105_005749_outLine +BABEL_OP2_304_82863_20140106_054346_inLine +BABEL_OP2_304_82863_20140106_054346_outLine +BABEL_OP2_304_83436_20131211_025218_inLine +BABEL_OP2_304_83436_20131211_025218_outLine +BABEL_OP2_304_83813_20131028_033118_inLine +BABEL_OP2_304_83813_20131028_033118_outLine +BABEL_OP2_304_83851_20131203_212613_inLine +BABEL_OP2_304_83851_20131203_212613_outLine +BABEL_OP2_304_83935_20140123_034100_inLine +BABEL_OP2_304_83935_20140123_034100_outLine +BABEL_OP2_304_84125_20131115_235931_inLine +BABEL_OP2_304_84125_20131115_235931_outLine +BABEL_OP2_304_84194_20131129_040805_inLine +BABEL_OP2_304_84194_20131129_040805_outLine +BABEL_OP2_304_84605_20131215_005949_inLine +BABEL_OP2_304_84605_20131215_005949_outLine +BABEL_OP2_304_85179_20140214_071121_inLine +BABEL_OP2_304_85179_20140214_071121_outLine +BABEL_OP2_304_85248_20140222_235016_inLine +BABEL_OP2_304_85248_20140222_235016_outLine +BABEL_OP2_304_86100_20131112_221929_inLine +BABEL_OP2_304_86100_20131112_221929_outLine +BABEL_OP2_304_86472_20140116_050058_inLine +BABEL_OP2_304_86472_20140116_050058_outLine +BABEL_OP2_304_86557_20131130_234925_inLine +BABEL_OP2_304_86557_20131130_234925_outLine +BABEL_OP2_304_86829_20131107_180321_inLine +BABEL_OP2_304_86829_20131107_180321_outLine +BABEL_OP2_304_86830_20140228_051058_inLine +BABEL_OP2_304_86830_20140228_051058_outLine +BABEL_OP2_304_87280_20140207_030432_inLine +BABEL_OP2_304_87280_20140207_030432_outLine +BABEL_OP2_304_87866_20131106_002751_inLine +BABEL_OP2_304_87866_20131106_002751_outLine +BABEL_OP2_304_88982_20140111_233039_inLine +BABEL_OP2_304_88982_20140111_233039_outLine +BABEL_OP2_304_89650_20131024_023031_inLine +BABEL_OP2_304_89650_20131024_023031_outLine +BABEL_OP2_304_91581_20140203_231410_inLine +BABEL_OP2_304_91581_20140203_231410_outLine +BABEL_OP2_304_92252_20131022_042600_inLine +BABEL_OP2_304_92252_20131022_042600_outLine +BABEL_OP2_304_92440_20131109_003559_inLine +BABEL_OP2_304_92440_20131109_003559_outLine +BABEL_OP2_304_92886_20131202_233808_inLine +BABEL_OP2_304_92886_20131202_233808_outLine +BABEL_OP2_304_93224_20140121_191942_inLine +BABEL_OP2_304_93224_20140121_191942_outLine +BABEL_OP2_304_93443_20131127_032037_inLine +BABEL_OP2_304_93443_20131127_032037_outLine +BABEL_OP2_304_93475_20131213_025105_inLine +BABEL_OP2_304_93475_20131213_025105_outLine +BABEL_OP2_304_93681_20131202_212236_inLine +BABEL_OP2_304_93681_20131202_212236_outLine +BABEL_OP2_304_94465_20140213_013300_inLine +BABEL_OP2_304_94465_20140213_013300_outLine +BABEL_OP2_304_94923_20140212_021923_inLine +BABEL_OP2_304_94923_20140212_021923_outLine +BABEL_OP2_304_95677_20131024_031406_inLine +BABEL_OP2_304_95677_20131024_031406_outLine +BABEL_OP2_304_96205_20140107_233946_inLine +BABEL_OP2_304_96205_20140107_233946_outLine +BABEL_OP2_304_96405_20131214_205112_inLine +BABEL_OP2_304_96405_20131214_205112_outLine +BABEL_OP2_304_98165_20131218_234422_inLine +BABEL_OP2_304_98165_20131218_234422_outLine +BABEL_OP2_304_98565_20131023_235505_inLine +BABEL_OP2_304_98565_20131023_235505_outLine +BABEL_OP2_304_99264_20140222_211846_inLine +BABEL_OP2_304_99264_20140222_211846_outLine +BABEL_OP2_304_99920_20140211_023914_inLine +BABEL_OP2_304_99920_20140211_023914_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list b/egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list new file mode 100644 index 00000000000..fe18640b4ca --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list @@ -0,0 +1,524 @@ +BABEL_OP2_304_10319_20131123_212421_inLine +BABEL_OP2_304_10938_20131210_232654_outLine +BABEL_OP2_304_10974_20140112_002642_inLine +BABEL_OP2_304_10974_20140112_002642_outLine +BABEL_OP2_304_11663_20140228_001249_inLine +BABEL_OP2_304_11663_20140228_001249_outLine +BABEL_OP2_304_12036_20140212_031355_inLine +BABEL_OP2_304_12036_20140212_031355_outLine +BABEL_OP2_304_13184_20140226_230154_inLine +BABEL_OP2_304_13184_20140226_230154_outLine +BABEL_OP2_304_13490_20140115_234942_inLine +BABEL_OP2_304_13490_20140115_234942_outLine +BABEL_OP2_304_13709_20140301_011301_inLine +BABEL_OP2_304_13709_20140301_011301_outLine +BABEL_OP2_304_14179_20140213_202716_inLine +BABEL_OP2_304_14179_20140213_202716_outLine +BABEL_OP2_304_14228_20140214_035430_inLine +BABEL_OP2_304_14228_20140214_035430_outLine +BABEL_OP2_304_14350_20131215_015450_outLine +BABEL_OP2_304_14884_20131019_215509_outLine +BABEL_OP2_304_14929_20140120_043551_inLine +BABEL_OP2_304_15324_20140111_022619_inLine +BABEL_OP2_304_15324_20140111_022619_outLine +BABEL_OP2_304_15702_20140121_182136_outLine +BABEL_OP2_304_15926_20140114_014611_inLine +BABEL_OP2_304_15926_20140114_014611_outLine +BABEL_OP2_304_16149_20131218_020646_inLine +BABEL_OP2_304_16149_20131218_020646_outLine +BABEL_OP2_304_17165_20140115_221908_inLine +BABEL_OP2_304_17165_20140115_221908_outLine +BABEL_OP2_304_17440_20140210_030642_inLine +BABEL_OP2_304_17440_20140210_030642_outLine +BABEL_OP2_304_17496_20140124_200649_inLine +BABEL_OP2_304_17496_20140124_200649_outLine +BABEL_OP2_304_17567_20140112_001028_inLine +BABEL_OP2_304_17567_20140112_001028_outLine +BABEL_OP2_304_18370_20131019_230544_inLine +BABEL_OP2_304_18924_20140122_013011_inLine +BABEL_OP2_304_19134_20140211_192844_inLine +BABEL_OP2_304_19134_20140211_192844_outLine +BABEL_OP2_304_19621_20140111_004245_inLine +BABEL_OP2_304_19621_20140111_004245_outLine +BABEL_OP2_304_19672_20140111_213853_inLine +BABEL_OP2_304_19672_20140111_213853_outLine +BABEL_OP2_304_20682_20140212_225527_inLine +BABEL_OP2_304_20682_20140212_225527_outLine +BABEL_OP2_304_20721_20131126_051609_inLine +BABEL_OP2_304_20896_20131024_162213_outLine +BABEL_OP2_304_21004_20140202_211736_inLine +BABEL_OP2_304_21004_20140202_211736_outLine +BABEL_OP2_304_21794_20140107_223934_inLine +BABEL_OP2_304_21794_20140107_223934_outLine +BABEL_OP2_304_22280_20140112_040339_inLine +BABEL_OP2_304_22280_20140112_040339_outLine +BABEL_OP2_304_22612_20140214_015154_inLine +BABEL_OP2_304_22612_20140214_015154_outLine +BABEL_OP2_304_23092_20140210_000452_inLine +BABEL_OP2_304_23092_20140210_000452_outLine +BABEL_OP2_304_23151_20140226_015000_inLine +BABEL_OP2_304_23151_20140226_015000_outLine +BABEL_OP2_304_23153_20140108_004710_inLine +BABEL_OP2_304_23153_20140108_004710_outLine +BABEL_OP2_304_23239_20140113_023420_inLine +BABEL_OP2_304_23239_20140113_023420_outLine +BABEL_OP2_304_23505_20140207_012218_inLine +BABEL_OP2_304_23505_20140207_012218_outLine +BABEL_OP2_304_24270_20140112_035305_inLine +BABEL_OP2_304_24270_20140112_035305_outLine +BABEL_OP2_304_24470_20140213_013507_inLine +BABEL_OP2_304_24470_20140213_013507_outLine +BABEL_OP2_304_26072_20140303_191300_inLine +BABEL_OP2_304_26072_20140303_191300_outLine +BABEL_OP2_304_26836_20131203_035109_inLine +BABEL_OP2_304_27203_20140114_184748_inLine +BABEL_OP2_304_27203_20140114_184748_outLine +BABEL_OP2_304_27590_20140120_000746_inLine +BABEL_OP2_304_27590_20140120_000746_outLine +BABEL_OP2_304_27841_20140224_192246_inLine +BABEL_OP2_304_27841_20140224_192246_outLine +BABEL_OP2_304_28190_20140216_014935_inLine +BABEL_OP2_304_28190_20140216_014935_outLine +BABEL_OP2_304_28303_20131204_205256_inLine +BABEL_OP2_304_28303_20131204_205256_outLine +BABEL_OP2_304_28606_20140118_214444_inLine +BABEL_OP2_304_28606_20140118_214444_outLine +BABEL_OP2_304_28814_20140221_030045_inLine +BABEL_OP2_304_28814_20140221_030045_outLine +BABEL_OP2_304_29076_20140214_031718_inLine +BABEL_OP2_304_29076_20140214_031718_outLine +BABEL_OP2_304_29323_20140224_220848_inLine +BABEL_OP2_304_29323_20140224_220848_outLine +BABEL_OP2_304_29416_20140209_221011_inLine +BABEL_OP2_304_29416_20140209_221011_outLine +BABEL_OP2_304_29685_20131203_214940_inLine +BABEL_OP2_304_29685_20131203_214940_outLine +BABEL_OP2_304_30345_20140220_044057_inLine +BABEL_OP2_304_30345_20140220_044057_outLine +BABEL_OP2_304_30432_20140114_200919_inLine +BABEL_OP2_304_30432_20140114_200919_outLine +BABEL_OP2_304_31109_20140117_222519_outLine +BABEL_OP2_304_31184_20140107_010323_inLine +BABEL_OP2_304_31184_20140107_010323_outLine +BABEL_OP2_304_31346_20140214_012447_inLine +BABEL_OP2_304_31346_20140214_012447_outLine +BABEL_OP2_304_31583_20140217_070401_inLine +BABEL_OP2_304_31583_20140217_070401_outLine +BABEL_OP2_304_31628_20140113_222259_inLine +BABEL_OP2_304_31628_20140113_222259_outLine +BABEL_OP2_304_31728_20131026_182200_outLine +BABEL_OP2_304_32380_20131202_212228_inLine +BABEL_OP2_304_32708_20131214_184617_inLine +BABEL_OP2_304_33175_20131218_055325_inLine +BABEL_OP2_304_33175_20131218_055325_outLine +BABEL_OP2_304_33229_20140224_173431_inLine +BABEL_OP2_304_33229_20140224_173431_outLine +BABEL_OP2_304_33635_20140108_055209_inLine +BABEL_OP2_304_33635_20140108_055209_outLine +BABEL_OP2_304_33913_20140208_001736_outLine +BABEL_OP2_304_33951_20140107_014839_inLine +BABEL_OP2_304_33951_20140107_014839_outLine +BABEL_OP2_304_34197_20140120_003604_inLine +BABEL_OP2_304_34197_20140120_010000_inLine +BABEL_OP2_304_34477_20131207_235614_inLine +BABEL_OP2_304_34477_20131207_235614_outLine +BABEL_OP2_304_34811_20140111_213202_inLine +BABEL_OP2_304_34811_20140111_213202_outLine +BABEL_OP2_304_34903_20140114_055610_outLine +BABEL_OP2_304_35000_20140214_054306_inLine +BABEL_OP2_304_35000_20140214_054306_outLine +BABEL_OP2_304_35143_20140213_194234_inLine +BABEL_OP2_304_35143_20140213_194234_outLine +BABEL_OP2_304_35583_20140221_183221_inLine +BABEL_OP2_304_35583_20140221_183221_outLine +BABEL_OP2_304_36669_20140107_024312_inLine +BABEL_OP2_304_36669_20140107_024312_outLine +BABEL_OP2_304_36894_20131214_210131_inLine +BABEL_OP2_304_36894_20131214_210131_outLine +BABEL_OP2_304_37271_20140214_210456_inLine +BABEL_OP2_304_37271_20140214_210456_outLine +BABEL_OP2_304_37281_20140112_044135_outLine +BABEL_OP2_304_37776_20131029_063539_inLine +BABEL_OP2_304_37853_20140228_011410_outLine +BABEL_OP2_304_38689_20140122_053200_inLine +BABEL_OP2_304_38689_20140122_053200_outLine +BABEL_OP2_304_39059_20140223_225026_inLine +BABEL_OP2_304_39059_20140223_225026_outLine +BABEL_OP2_304_39307_20131212_235627_outLine +BABEL_OP2_304_39426_20140222_001315_inLine +BABEL_OP2_304_39426_20140222_001315_outLine +BABEL_OP2_304_39555_20140217_001722_inLine +BABEL_OP2_304_39555_20140217_001722_outLine +BABEL_OP2_304_39744_20131205_204607_inLine +BABEL_OP2_304_39744_20131205_204607_outLine +BABEL_OP2_304_40557_20140214_015226_inLine +BABEL_OP2_304_40557_20140214_015226_outLine +BABEL_OP2_304_40648_20140105_023520_inLine +BABEL_OP2_304_40648_20140105_023520_outLine +BABEL_OP2_304_40740_20140303_034635_inLine +BABEL_OP2_304_40740_20140303_034635_outLine +BABEL_OP2_304_41038_20140120_031808_inLine +BABEL_OP2_304_41038_20140120_031808_outLine +BABEL_OP2_304_41097_20140114_065635_inLine +BABEL_OP2_304_41097_20140114_065635_outLine +BABEL_OP2_304_41100_20131212_035211_outLine +BABEL_OP2_304_41682_20131027_233404_outLine +BABEL_OP2_304_42029_20140225_042923_outLine +BABEL_OP2_304_42600_20131203_001235_inLine +BABEL_OP2_304_42619_20140124_050222_inLine +BABEL_OP2_304_42619_20140124_050222_outLine +BABEL_OP2_304_42942_20140122_011934_inLine +BABEL_OP2_304_42942_20140122_011934_outLine +BABEL_OP2_304_43788_20140113_035220_inLine +BABEL_OP2_304_43788_20140113_035220_outLine +BABEL_OP2_304_43789_20140108_205751_inLine +BABEL_OP2_304_43789_20140108_205751_outLine +BABEL_OP2_304_44477_20140116_062617_outLine +BABEL_OP2_304_44709_20140111_061727_inLine +BABEL_OP2_304_44709_20140111_061727_outLine +BABEL_OP2_304_44847_20140123_043546_inLine +BABEL_OP2_304_44847_20140123_043546_outLine +BABEL_OP2_304_46041_20140216_224708_inLine +BABEL_OP2_304_46041_20140216_224708_outLine +BABEL_OP2_304_46066_20140303_215904_inLine +BABEL_OP2_304_46066_20140303_215904_outLine +BABEL_OP2_304_46757_20140123_053540_outLine +BABEL_OP2_304_47186_20140213_022646_inLine +BABEL_OP2_304_47186_20140213_022646_outLine +BABEL_OP2_304_47283_20131208_055437_inLine +BABEL_OP2_304_47283_20131208_055437_outLine +BABEL_OP2_304_47823_20140213_200425_inLine +BABEL_OP2_304_47823_20140213_200425_outLine +BABEL_OP2_304_47959_20131206_002421_inLine +BABEL_OP2_304_47959_20131206_002421_outLine +BABEL_OP2_304_48243_20131130_191603_inLine +BABEL_OP2_304_48243_20131130_191603_outLine +BABEL_OP2_304_48422_20140226_030714_inLine +BABEL_OP2_304_48422_20140226_030714_outLine +BABEL_OP2_304_49118_20140224_035159_inLine +BABEL_OP2_304_49118_20140224_035159_outLine +BABEL_OP2_304_49502_20131128_050926_inLine +BABEL_OP2_304_49502_20131128_050926_outLine +BABEL_OP2_304_49637_20131204_225149_outLine +BABEL_OP2_304_49739_20131025_221623_outLine +BABEL_OP2_304_49812_20140221_004200_inLine +BABEL_OP2_304_49812_20140221_004200_outLine +BABEL_OP2_304_49870_20131026_011340_outLine +BABEL_OP2_304_49902_20131215_011726_inLine +BABEL_OP2_304_49902_20131215_011726_outLine +BABEL_OP2_304_50427_20140121_041440_inLine +BABEL_OP2_304_50427_20140121_041440_outLine +BABEL_OP2_304_50549_20140225_015442_inLine +BABEL_OP2_304_50549_20140225_015442_outLine +BABEL_OP2_304_50630_20140115_225817_outLine +BABEL_OP2_304_50958_20140107_035922_inLine +BABEL_OP2_304_50958_20140107_035922_outLine +BABEL_OP2_304_51156_20131108_034329_outLine +BABEL_OP2_304_51611_20131215_000933_outLine +BABEL_OP2_304_51611_20131215_001818_outLine +BABEL_OP2_304_51955_20131130_031610_inLine +BABEL_OP2_304_51955_20131130_031610_outLine +BABEL_OP2_304_51968_20140115_014540_outLine +BABEL_OP2_304_52404_20140111_203352_inLine +BABEL_OP2_304_52404_20140111_203352_outLine +BABEL_OP2_304_52422_20140227_022646_inLine +BABEL_OP2_304_52422_20140227_022646_outLine +BABEL_OP2_304_52442_20140111_220136_inLine +BABEL_OP2_304_52442_20140111_220136_outLine +BABEL_OP2_304_52717_20131212_005407_inLine +BABEL_OP2_304_52717_20131212_005407_outLine +BABEL_OP2_304_52932_20131203_060700_inLine +BABEL_OP2_304_53957_20140213_050235_inLine +BABEL_OP2_304_53957_20140213_050235_outLine +BABEL_OP2_304_54104_20140122_065051_inLine +BABEL_OP2_304_54162_20140117_004147_outLine +BABEL_OP2_304_54405_20140111_033306_inLine +BABEL_OP2_304_54405_20140111_033306_outLine +BABEL_OP2_304_54477_20140214_014521_outLine +BABEL_OP2_304_56198_20131207_044103_inLine +BABEL_OP2_304_56198_20131207_044103_outLine +BABEL_OP2_304_56198_20131207_044824_inLine +BABEL_OP2_304_56198_20131207_044824_outLine +BABEL_OP2_304_56370_20131210_222152_inLine +BABEL_OP2_304_56370_20131210_222152_outLine +BABEL_OP2_304_56606_20131029_001934_inLine +BABEL_OP2_304_57067_20140226_203014_inLine +BABEL_OP2_304_57067_20140226_203014_outLine +BABEL_OP2_304_57093_20140108_195135_inLine +BABEL_OP2_304_57093_20140108_195135_outLine +BABEL_OP2_304_57529_20140213_194042_inLine +BABEL_OP2_304_57529_20140213_194042_outLine +BABEL_OP2_304_57919_20131019_181730_inLine +BABEL_OP2_304_58489_20140213_201405_inLine +BABEL_OP2_304_58489_20140213_201405_outLine +BABEL_OP2_304_58821_20140122_043719_inLine +BABEL_OP2_304_58821_20140122_043719_outLine +BABEL_OP2_304_59078_20140114_043013_inLine +BABEL_OP2_304_59078_20140114_043013_outLine +BABEL_OP2_304_59301_20140220_055528_inLine +BABEL_OP2_304_59301_20140220_055528_outLine +BABEL_OP2_304_59301_20140220_061405_inLine +BABEL_OP2_304_59301_20140220_061405_outLine +BABEL_OP2_304_59509_20140111_012159_inLine +BABEL_OP2_304_59509_20140111_012159_outLine +BABEL_OP2_304_59993_20131207_052409_inLine +BABEL_OP2_304_59993_20131207_052409_outLine +BABEL_OP2_304_60650_20131104_000431_outLine +BABEL_OP2_304_61040_20140214_024448_inLine +BABEL_OP2_304_61040_20140214_024448_outLine +BABEL_OP2_304_61167_20131218_000849_inLine +BABEL_OP2_304_61167_20131218_000849_outLine +BABEL_OP2_304_61190_20131202_233122_inLine +BABEL_OP2_304_61190_20131202_233122_outLine +BABEL_OP2_304_61435_20140220_043508_inLine +BABEL_OP2_304_61435_20140220_043508_outLine +BABEL_OP2_304_62014_20140110_225736_inLine +BABEL_OP2_304_62014_20140110_225736_outLine +BABEL_OP2_304_62286_20140107_221925_inLine +BABEL_OP2_304_62286_20140107_221925_outLine +BABEL_OP2_304_62471_20131023_045947_inLine +BABEL_OP2_304_62471_20131023_045947_outLine +BABEL_OP2_304_62491_20131024_043538_outLine +BABEL_OP2_304_62734_20131211_050743_inLine +BABEL_OP2_304_62734_20131211_050743_outLine +BABEL_OP2_304_62810_20131205_000409_outLine +BABEL_OP2_304_62810_20131205_001411_outLine +BABEL_OP2_304_62835_20140111_023213_inLine +BABEL_OP2_304_62835_20140111_023213_outLine +BABEL_OP2_304_62852_20131217_062508_outLine +BABEL_OP2_304_63220_20140118_205257_outLine +BABEL_OP2_304_63757_20140112_050031_inLine +BABEL_OP2_304_64065_20131208_042849_inLine +BABEL_OP2_304_64065_20131208_042849_outLine +BABEL_OP2_304_64350_20140112_002927_inLine +BABEL_OP2_304_64350_20140112_002927_outLine +BABEL_OP2_304_64398_20140114_001457_inLine +BABEL_OP2_304_64635_20131031_005941_outLine +BABEL_OP2_304_64768_20131205_215037_inLine +BABEL_OP2_304_64768_20131205_215037_outLine +BABEL_OP2_304_64870_20140121_052458_inLine +BABEL_OP2_304_64870_20140121_052458_outLine +BABEL_OP2_304_65064_20140211_004709_inLine +BABEL_OP2_304_65064_20140211_004709_outLine +BABEL_OP2_304_65298_20140225_223150_outLine +BABEL_OP2_304_65367_20140223_221652_inLine +BABEL_OP2_304_65367_20140223_221652_outLine +BABEL_OP2_304_66519_20131217_221237_inLine +BABEL_OP2_304_66519_20131217_221237_outLine +BABEL_OP2_304_66959_20140214_010021_inLine +BABEL_OP2_304_66959_20140214_010021_outLine +BABEL_OP2_304_67659_20131210_070128_inLine +BABEL_OP2_304_67659_20131210_070128_outLine +BABEL_OP2_304_67842_20131209_002442_inLine +BABEL_OP2_304_67842_20131209_002442_outLine +BABEL_OP2_304_68244_20140112_003451_inLine +BABEL_OP2_304_68244_20140112_003451_outLine +BABEL_OP2_304_69578_20140116_015102_inLine +BABEL_OP2_304_69578_20140116_015102_outLine +BABEL_OP2_304_69633_20140112_001408_inLine +BABEL_OP2_304_69633_20140112_001408_outLine +BABEL_OP2_304_69636_20140114_002409_outLine +BABEL_OP2_304_70221_20140114_051222_inLine +BABEL_OP2_304_70221_20140114_051222_outLine +BABEL_OP2_304_70293_20131019_050801_inLine +BABEL_OP2_304_70343_20140123_031245_inLine +BABEL_OP2_304_70343_20140123_031245_outLine +BABEL_OP2_304_70713_20140304_052610_inLine +BABEL_OP2_304_70713_20140304_052610_outLine +BABEL_OP2_304_71038_20140227_011955_inLine +BABEL_OP2_304_71038_20140227_011955_outLine +BABEL_OP2_304_71067_20140111_025531_inLine +BABEL_OP2_304_71067_20140111_025531_outLine +BABEL_OP2_304_71282_20140303_003653_inLine +BABEL_OP2_304_71282_20140303_003653_outLine +BABEL_OP2_304_71559_20140217_031954_inLine +BABEL_OP2_304_71559_20140217_031954_outLine +BABEL_OP2_304_72324_20140111_214356_inLine +BABEL_OP2_304_72324_20140111_214356_outLine +BABEL_OP2_304_73072_20131211_203538_inLine +BABEL_OP2_304_73119_20131207_030241_inLine +BABEL_OP2_304_73119_20131207_030241_outLine +BABEL_OP2_304_73518_20140227_044044_inLine +BABEL_OP2_304_73518_20140227_044044_outLine +BABEL_OP2_304_74253_20140209_020556_outLine +BABEL_OP2_304_75261_20140214_002012_inLine +BABEL_OP2_304_75261_20140214_002012_outLine +BABEL_OP2_304_76218_20140112_030818_inLine +BABEL_OP2_304_76238_20140213_065615_outLine +BABEL_OP2_304_77391_20131206_031416_inLine +BABEL_OP2_304_77391_20131206_031416_outLine +BABEL_OP2_304_77567_20131217_032300_inLine +BABEL_OP2_304_78016_20131211_033559_inLine +BABEL_OP2_304_78016_20131211_033559_outLine +BABEL_OP2_304_78016_20131211_034555_inLine +BABEL_OP2_304_78016_20131211_034555_outLine +BABEL_OP2_304_78016_20131211_035830_inLine +BABEL_OP2_304_78016_20131211_035830_outLine +BABEL_OP2_304_78544_20140118_220548_inLine +BABEL_OP2_304_78544_20140118_220548_outLine +BABEL_OP2_304_78544_20140118_221258_inLine +BABEL_OP2_304_78544_20140118_221258_outLine +BABEL_OP2_304_78544_20140118_222525_inLine +BABEL_OP2_304_78544_20140118_222525_outLine +BABEL_OP2_304_78833_20131024_214927_outLine +BABEL_OP2_304_79129_20140303_004430_inLine +BABEL_OP2_304_79129_20140303_004430_outLine +BABEL_OP2_304_79139_20140117_233824_inLine +BABEL_OP2_304_79139_20140117_233824_outLine +BABEL_OP2_304_79167_20140113_043213_inLine +BABEL_OP2_304_79167_20140113_043213_outLine +BABEL_OP2_304_79571_20140115_210036_inLine +BABEL_OP2_304_79571_20140115_210036_outLine +BABEL_OP2_304_79590_20140115_194001_inLine +BABEL_OP2_304_79590_20140115_194001_outLine +BABEL_OP2_304_80136_20140221_210907_inLine +BABEL_OP2_304_80136_20140221_210907_outLine +BABEL_OP2_304_80306_20140113_211243_inLine +BABEL_OP2_304_80306_20140113_211243_outLine +BABEL_OP2_304_80781_20131207_214652_inLine +BABEL_OP2_304_80781_20131207_214652_outLine +BABEL_OP2_304_81392_20140120_040823_inLine +BABEL_OP2_304_81392_20140120_040823_outLine +BABEL_OP2_304_81435_20140122_044047_inLine +BABEL_OP2_304_81435_20140122_044047_outLine +BABEL_OP2_304_81553_20140221_190721_inLine +BABEL_OP2_304_81553_20140221_190721_outLine +BABEL_OP2_304_81622_20140115_191114_inLine +BABEL_OP2_304_81622_20140115_191114_outLine +BABEL_OP2_304_81671_20140303_000114_inLine +BABEL_OP2_304_81671_20140303_000114_outLine +BABEL_OP2_304_82138_20140108_210521_inLine +BABEL_OP2_304_82138_20140108_210521_outLine +BABEL_OP2_304_82140_20140109_010030_inLine +BABEL_OP2_304_82140_20140109_010030_outLine +BABEL_OP2_304_82966_20140212_003555_outLine +BABEL_OP2_304_82979_20131206_030414_inLine +BABEL_OP2_304_82979_20131206_030414_outLine +BABEL_OP2_304_83238_20140121_050333_inLine +BABEL_OP2_304_83238_20140121_050333_outLine +BABEL_OP2_304_83609_20131031_045140_inLine +BABEL_OP2_304_83609_20131031_045140_outLine +BABEL_OP2_304_84055_20140304_014209_inLine +BABEL_OP2_304_84055_20140304_014209_outLine +BABEL_OP2_304_84327_20140119_004436_inLine +BABEL_OP2_304_84327_20140119_004436_outLine +BABEL_OP2_304_84430_20131024_015151_inLine +BABEL_OP2_304_84467_20131030_000051_outLine +BABEL_OP2_304_84541_20131113_030920_inLine +BABEL_OP2_304_84583_20140122_023451_inLine +BABEL_OP2_304_84583_20140122_023451_outLine +BABEL_OP2_304_84715_20140225_204018_inLine +BABEL_OP2_304_84715_20140225_204018_outLine +BABEL_OP2_304_84823_20140213_070220_inLine +BABEL_OP2_304_84823_20140213_070220_outLine +BABEL_OP2_304_84936_20140115_025845_inLine +BABEL_OP2_304_84936_20140115_025845_outLine +BABEL_OP2_304_85028_20140216_043545_inLine +BABEL_OP2_304_85028_20140216_043545_outLine +BABEL_OP2_304_85048_20140213_194500_inLine +BABEL_OP2_304_85048_20140213_194500_outLine +BABEL_OP2_304_85651_20131213_231614_inLine +BABEL_OP2_304_85651_20131213_231614_outLine +BABEL_OP2_304_86191_20131208_035829_inLine +BABEL_OP2_304_86191_20131208_035829_outLine +BABEL_OP2_304_86433_20140122_053030_inLine +BABEL_OP2_304_86433_20140122_053030_outLine +BABEL_OP2_304_86676_20140109_025931_inLine +BABEL_OP2_304_86676_20140109_025931_outLine +BABEL_OP2_304_86748_20140225_213348_inLine +BABEL_OP2_304_86748_20140225_213348_outLine +BABEL_OP2_304_87073_20131114_031449_outLine +BABEL_OP2_304_87074_20131216_043306_inLine +BABEL_OP2_304_87074_20131216_043306_outLine +BABEL_OP2_304_87313_20140115_011909_inLine +BABEL_OP2_304_87313_20140115_011909_outLine +BABEL_OP2_304_87889_20140225_010303_inLine +BABEL_OP2_304_87889_20140225_010303_outLine +BABEL_OP2_304_88445_20140112_004454_inLine +BABEL_OP2_304_88445_20140112_004454_outLine +BABEL_OP2_304_88661_20140109_034129_inLine +BABEL_OP2_304_88661_20140109_034129_outLine +BABEL_OP2_304_88674_20131029_180931_outLine +BABEL_OP2_304_88686_20131124_012926_outLine +BABEL_OP2_304_88938_20140226_001937_inLine +BABEL_OP2_304_89575_20140213_180857_inLine +BABEL_OP2_304_89575_20140213_180857_outLine +BABEL_OP2_304_89695_20140107_232450_inLine +BABEL_OP2_304_89695_20140107_232450_outLine +BABEL_OP2_304_89718_20131112_202612_outLine +BABEL_OP2_304_89877_20140114_202105_inLine +BABEL_OP2_304_89877_20140114_202105_outLine +BABEL_OP2_304_89888_20131215_015024_outLine +BABEL_OP2_304_89943_20131214_030426_inLine +BABEL_OP2_304_89943_20131214_030426_outLine +BABEL_OP2_304_91252_20131113_035252_inLine +BABEL_OP2_304_91319_20140222_040737_inLine +BABEL_OP2_304_91319_20140222_040737_outLine +BABEL_OP2_304_91336_20140109_002119_inLine +BABEL_OP2_304_91336_20140109_002119_outLine +BABEL_OP2_304_91463_20140212_010126_inLine +BABEL_OP2_304_91463_20140212_012624_inLine +BABEL_OP2_304_91825_20131214_190413_inLine +BABEL_OP2_304_91825_20131214_190413_outLine +BABEL_OP2_304_91825_20131214_191357_inLine +BABEL_OP2_304_91825_20131214_191357_outLine +BABEL_OP2_304_92065_20140213_212712_inLine +BABEL_OP2_304_92065_20140213_212712_outLine +BABEL_OP2_304_92065_20140213_213512_inLine +BABEL_OP2_304_92065_20140213_213512_outLine +BABEL_OP2_304_92065_20140213_214440_inLine +BABEL_OP2_304_92065_20140213_214440_outLine +BABEL_OP2_304_92176_20140114_011149_inLine +BABEL_OP2_304_92176_20140114_011149_outLine +BABEL_OP2_304_92356_20140225_005836_inLine +BABEL_OP2_304_92356_20140225_005836_outLine +BABEL_OP2_304_92459_20131207_233730_outLine +BABEL_OP2_304_92527_20131217_074850_inLine +BABEL_OP2_304_92527_20131217_074850_outLine +BABEL_OP2_304_92740_20140112_014905_inLine +BABEL_OP2_304_92740_20140112_014905_outLine +BABEL_OP2_304_92809_20131213_011040_inLine +BABEL_OP2_304_93153_20140207_015821_inLine +BABEL_OP2_304_93153_20140207_015821_outLine +BABEL_OP2_304_93490_20140216_035543_inLine +BABEL_OP2_304_93490_20140216_035543_outLine +BABEL_OP2_304_93964_20140108_222311_inLine +BABEL_OP2_304_94212_20131020_203106_inLine +BABEL_OP2_304_94253_20131216_233242_inLine +BABEL_OP2_304_94253_20131216_233242_outLine +BABEL_OP2_304_94713_20131125_035926_outLine +BABEL_OP2_304_95446_20140221_224816_inLine +BABEL_OP2_304_95446_20140221_224816_outLine +BABEL_OP2_304_95937_20131108_201706_inLine +BABEL_OP2_304_96077_20131107_020023_outLine +BABEL_OP2_304_96446_20131204_220739_inLine +BABEL_OP2_304_96446_20131204_220739_outLine +BABEL_OP2_304_96525_20140216_231544_inLine +BABEL_OP2_304_96525_20140216_231544_outLine +BABEL_OP2_304_96584_20140228_045227_inLine +BABEL_OP2_304_96584_20140228_045227_outLine +BABEL_OP2_304_96910_20131203_185444_inLine +BABEL_OP2_304_96910_20131203_185444_outLine +BABEL_OP2_304_97264_20140225_193258_inLine +BABEL_OP2_304_97264_20140225_193258_outLine +BABEL_OP2_304_97376_20140121_204102_inLine +BABEL_OP2_304_97376_20140121_204102_outLine +BABEL_OP2_304_97461_20140111_022155_inLine +BABEL_OP2_304_97461_20140111_022155_outLine +BABEL_OP2_304_97988_20140212_022710_inLine +BABEL_OP2_304_97988_20140212_022710_outLine +BABEL_OP2_304_98390_20140123_203258_inLine +BABEL_OP2_304_98390_20140123_203258_outLine +BABEL_OP2_304_98580_20140115_202801_inLine +BABEL_OP2_304_98580_20140115_202801_outLine +BABEL_OP2_304_99202_20140121_200458_outLine +BABEL_OP2_304_99487_20131211_225837_outLine +BABEL_OP2_304_99516_20131202_003142_inLine +BABEL_OP2_304_99594_20140107_024518_inLine +BABEL_OP2_304_99594_20140107_024518_outLine +BABEL_OP2_304_99813_20140115_223643_inLine +BABEL_OP2_304_99813_20140115_223643_outLine +BABEL_OP2_304_99887_20140220_032712_inLine +BABEL_OP2_304_99887_20140220_032712_outLine +BABEL_OP2_304_99955_20140303_223109_inLine +BABEL_OP2_304_99955_20140303_223109_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/dev.2h.list b/egs/babel/s5d/conf/lists/305-guarani/dev.2h.list new file mode 100644 index 00000000000..4e8210eeac3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/dev.2h.list @@ -0,0 +1,124 @@ +BABEL_OP3_305_13483_20150218_082518_inLine +BABEL_OP3_305_13483_20150218_082518_outLine +BABEL_OP3_305_18992_20140612_060247_inLine +BABEL_OP3_305_18992_20140612_060247_outLine +BABEL_OP3_305_20721_20150114_090748_inLine +BABEL_OP3_305_20721_20150114_090748_outLine +BABEL_OP3_305_21004_20150217_083755_inLine +BABEL_OP3_305_21004_20150217_083755_outLine +BABEL_OP3_305_21624_20150222_054542_inLine +BABEL_OP3_305_21624_20150222_054542_outLine +BABEL_OP3_305_22034_20141017_000534_inLine +BABEL_OP3_305_22034_20141017_000534_outLine +BABEL_OP3_305_22288_20140611_014728_inLine +BABEL_OP3_305_22288_20140611_014728_outLine +BABEL_OP3_305_22446_20140619_021336_inLine +BABEL_OP3_305_22446_20140619_021336_outLine +BABEL_OP3_305_23006_20140807_062702_inLine +BABEL_OP3_305_23006_20140807_062702_outLine +BABEL_OP3_305_23239_20150208_054506_inLine +BABEL_OP3_305_23239_20150208_054506_outLine +BABEL_OP3_305_24253_20150219_085207_inLine +BABEL_OP3_305_24253_20150219_085207_outLine +BABEL_OP3_305_27046_20140614_013755_inLine +BABEL_OP3_305_27046_20140614_013755_outLine +BABEL_OP3_305_30645_20140619_062447_inLine +BABEL_OP3_305_30645_20140619_062447_outLine +BABEL_OP3_305_32097_20140615_023706_inLine +BABEL_OP3_305_32097_20140615_023706_outLine +BABEL_OP3_305_32169_20140612_043749_inLine +BABEL_OP3_305_32169_20140612_043749_outLine +BABEL_OP3_305_34208_20140612_034755_inLine +BABEL_OP3_305_34208_20140612_034755_outLine +BABEL_OP3_305_37064_20140917_032644_inLine +BABEL_OP3_305_37064_20140917_032644_outLine +BABEL_OP3_305_38963_20140611_064935_inLine +BABEL_OP3_305_38963_20140611_064935_outLine +BABEL_OP3_305_39307_20140823_040640_inLine +BABEL_OP3_305_39307_20140823_040640_outLine +BABEL_OP3_305_39555_20141022_235815_inLine +BABEL_OP3_305_39555_20141022_235815_outLine +BABEL_OP3_305_39555_20141023_010258_inLine +BABEL_OP3_305_39555_20141023_010258_outLine +BABEL_OP3_305_41685_20150320_083024_inLine +BABEL_OP3_305_41685_20150320_083024_outLine +BABEL_OP3_305_43395_20150303_092614_inLine +BABEL_OP3_305_43395_20150303_092614_outLine +BABEL_OP3_305_44619_20140621_050143_inLine +BABEL_OP3_305_44619_20140621_050143_outLine +BABEL_OP3_305_45235_20141022_025027_inLine +BABEL_OP3_305_45235_20141022_025027_outLine +BABEL_OP3_305_46169_20150122_044028_inLine +BABEL_OP3_305_46169_20150122_044028_outLine +BABEL_OP3_305_46389_20141017_013950_inLine +BABEL_OP3_305_46389_20141017_013950_outLine +BABEL_OP3_305_46550_20140906_022304_inLine +BABEL_OP3_305_46550_20140906_022304_outLine +BABEL_OP3_305_46550_20140906_023533_inLine +BABEL_OP3_305_46550_20140906_023533_outLine +BABEL_OP3_305_46808_20140613_063242_inLine +BABEL_OP3_305_46808_20140613_063242_outLine +BABEL_OP3_305_47283_20140827_041341_inLine +BABEL_OP3_305_47283_20140827_041341_outLine +BABEL_OP3_305_49870_20140612_044921_inLine +BABEL_OP3_305_49870_20140612_044921_outLine +BABEL_OP3_305_50090_20150206_002321_inLine +BABEL_OP3_305_50090_20150206_002321_outLine +BABEL_OP3_305_50810_20140619_063147_inLine +BABEL_OP3_305_50810_20140619_063147_outLine +BABEL_OP3_305_50962_20140621_015129_inLine +BABEL_OP3_305_50962_20140621_015129_outLine +BABEL_OP3_305_51156_20140613_063549_inLine +BABEL_OP3_305_51156_20140613_063549_outLine +BABEL_OP3_305_52717_20140619_062206_inLine +BABEL_OP3_305_52717_20140619_062206_outLine +BABEL_OP3_305_53441_20140612_055846_inLine +BABEL_OP3_305_53441_20140612_055846_outLine +BABEL_OP3_305_56019_20150221_084856_inLine +BABEL_OP3_305_56019_20150221_084856_outLine +BABEL_OP3_305_58107_20150201_050424_inLine +BABEL_OP3_305_58107_20150201_050424_outLine +BABEL_OP3_305_58717_20150201_022141_inLine +BABEL_OP3_305_58717_20150201_022141_outLine +BABEL_OP3_305_61971_20150328_064233_inLine +BABEL_OP3_305_61971_20150328_064233_outLine +BABEL_OP3_305_66305_20150220_030810_inLine +BABEL_OP3_305_66305_20150220_030810_outLine +BABEL_OP3_305_67659_20140808_040651_inLine +BABEL_OP3_305_67659_20140808_040651_outLine +BABEL_OP3_305_73430_20150218_080038_inLine +BABEL_OP3_305_73430_20150218_080038_outLine +BABEL_OP3_305_73511_20150213_081754_inLine +BABEL_OP3_305_73511_20150213_081754_outLine +BABEL_OP3_305_76756_20150206_024436_inLine +BABEL_OP3_305_76756_20150206_024436_outLine +BABEL_OP3_305_78161_20150312_093559_inLine +BABEL_OP3_305_78161_20150312_093559_outLine +BABEL_OP3_305_78609_20141021_002844_inLine +BABEL_OP3_305_78609_20141021_002844_outLine +BABEL_OP3_305_81229_20140904_012832_inLine +BABEL_OP3_305_81229_20140904_012832_outLine +BABEL_OP3_305_81287_20150215_053321_inLine +BABEL_OP3_305_81287_20150215_053321_outLine +BABEL_OP3_305_81424_20150213_073659_inLine +BABEL_OP3_305_81424_20150213_073659_outLine +BABEL_OP3_305_84029_20140613_050741_inLine +BABEL_OP3_305_84029_20140613_050741_outLine +BABEL_OP3_305_84541_20140612_075946_inLine +BABEL_OP3_305_84541_20140612_075946_outLine +BABEL_OP3_305_84768_20140619_061958_inLine +BABEL_OP3_305_84768_20140619_061958_outLine +BABEL_OP3_305_86885_20140612_074001_inLine +BABEL_OP3_305_86885_20140612_074001_outLine +BABEL_OP3_305_88686_20140906_002505_inLine +BABEL_OP3_305_88686_20140906_002505_outLine +BABEL_OP3_305_90737_20141020_235210_inLine +BABEL_OP3_305_90737_20141020_235210_outLine +BABEL_OP3_305_91383_20150307_051712_inLine +BABEL_OP3_305_91383_20150307_051712_outLine +BABEL_OP3_305_96446_20140620_020014_inLine +BABEL_OP3_305_96446_20140620_020014_outLine +BABEL_OP3_305_97588_20140806_063029_inLine +BABEL_OP3_305_97588_20140806_063029_outLine +BABEL_OP3_305_97911_20150304_082443_inLine +BABEL_OP3_305_97911_20150304_082443_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/dev.list b/egs/babel/s5d/conf/lists/305-guarani/dev.list new file mode 100644 index 00000000000..4e8210eeac3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/dev.list @@ -0,0 +1,124 @@ +BABEL_OP3_305_13483_20150218_082518_inLine +BABEL_OP3_305_13483_20150218_082518_outLine +BABEL_OP3_305_18992_20140612_060247_inLine +BABEL_OP3_305_18992_20140612_060247_outLine +BABEL_OP3_305_20721_20150114_090748_inLine +BABEL_OP3_305_20721_20150114_090748_outLine +BABEL_OP3_305_21004_20150217_083755_inLine +BABEL_OP3_305_21004_20150217_083755_outLine +BABEL_OP3_305_21624_20150222_054542_inLine +BABEL_OP3_305_21624_20150222_054542_outLine +BABEL_OP3_305_22034_20141017_000534_inLine +BABEL_OP3_305_22034_20141017_000534_outLine +BABEL_OP3_305_22288_20140611_014728_inLine +BABEL_OP3_305_22288_20140611_014728_outLine +BABEL_OP3_305_22446_20140619_021336_inLine +BABEL_OP3_305_22446_20140619_021336_outLine +BABEL_OP3_305_23006_20140807_062702_inLine +BABEL_OP3_305_23006_20140807_062702_outLine +BABEL_OP3_305_23239_20150208_054506_inLine +BABEL_OP3_305_23239_20150208_054506_outLine +BABEL_OP3_305_24253_20150219_085207_inLine +BABEL_OP3_305_24253_20150219_085207_outLine +BABEL_OP3_305_27046_20140614_013755_inLine +BABEL_OP3_305_27046_20140614_013755_outLine +BABEL_OP3_305_30645_20140619_062447_inLine +BABEL_OP3_305_30645_20140619_062447_outLine +BABEL_OP3_305_32097_20140615_023706_inLine +BABEL_OP3_305_32097_20140615_023706_outLine +BABEL_OP3_305_32169_20140612_043749_inLine +BABEL_OP3_305_32169_20140612_043749_outLine +BABEL_OP3_305_34208_20140612_034755_inLine +BABEL_OP3_305_34208_20140612_034755_outLine +BABEL_OP3_305_37064_20140917_032644_inLine +BABEL_OP3_305_37064_20140917_032644_outLine +BABEL_OP3_305_38963_20140611_064935_inLine +BABEL_OP3_305_38963_20140611_064935_outLine +BABEL_OP3_305_39307_20140823_040640_inLine +BABEL_OP3_305_39307_20140823_040640_outLine +BABEL_OP3_305_39555_20141022_235815_inLine +BABEL_OP3_305_39555_20141022_235815_outLine +BABEL_OP3_305_39555_20141023_010258_inLine +BABEL_OP3_305_39555_20141023_010258_outLine +BABEL_OP3_305_41685_20150320_083024_inLine +BABEL_OP3_305_41685_20150320_083024_outLine +BABEL_OP3_305_43395_20150303_092614_inLine +BABEL_OP3_305_43395_20150303_092614_outLine +BABEL_OP3_305_44619_20140621_050143_inLine +BABEL_OP3_305_44619_20140621_050143_outLine +BABEL_OP3_305_45235_20141022_025027_inLine +BABEL_OP3_305_45235_20141022_025027_outLine +BABEL_OP3_305_46169_20150122_044028_inLine +BABEL_OP3_305_46169_20150122_044028_outLine +BABEL_OP3_305_46389_20141017_013950_inLine +BABEL_OP3_305_46389_20141017_013950_outLine +BABEL_OP3_305_46550_20140906_022304_inLine +BABEL_OP3_305_46550_20140906_022304_outLine +BABEL_OP3_305_46550_20140906_023533_inLine +BABEL_OP3_305_46550_20140906_023533_outLine +BABEL_OP3_305_46808_20140613_063242_inLine +BABEL_OP3_305_46808_20140613_063242_outLine +BABEL_OP3_305_47283_20140827_041341_inLine +BABEL_OP3_305_47283_20140827_041341_outLine +BABEL_OP3_305_49870_20140612_044921_inLine +BABEL_OP3_305_49870_20140612_044921_outLine +BABEL_OP3_305_50090_20150206_002321_inLine +BABEL_OP3_305_50090_20150206_002321_outLine +BABEL_OP3_305_50810_20140619_063147_inLine +BABEL_OP3_305_50810_20140619_063147_outLine +BABEL_OP3_305_50962_20140621_015129_inLine +BABEL_OP3_305_50962_20140621_015129_outLine +BABEL_OP3_305_51156_20140613_063549_inLine +BABEL_OP3_305_51156_20140613_063549_outLine +BABEL_OP3_305_52717_20140619_062206_inLine +BABEL_OP3_305_52717_20140619_062206_outLine +BABEL_OP3_305_53441_20140612_055846_inLine +BABEL_OP3_305_53441_20140612_055846_outLine +BABEL_OP3_305_56019_20150221_084856_inLine +BABEL_OP3_305_56019_20150221_084856_outLine +BABEL_OP3_305_58107_20150201_050424_inLine +BABEL_OP3_305_58107_20150201_050424_outLine +BABEL_OP3_305_58717_20150201_022141_inLine +BABEL_OP3_305_58717_20150201_022141_outLine +BABEL_OP3_305_61971_20150328_064233_inLine +BABEL_OP3_305_61971_20150328_064233_outLine +BABEL_OP3_305_66305_20150220_030810_inLine +BABEL_OP3_305_66305_20150220_030810_outLine +BABEL_OP3_305_67659_20140808_040651_inLine +BABEL_OP3_305_67659_20140808_040651_outLine +BABEL_OP3_305_73430_20150218_080038_inLine +BABEL_OP3_305_73430_20150218_080038_outLine +BABEL_OP3_305_73511_20150213_081754_inLine +BABEL_OP3_305_73511_20150213_081754_outLine +BABEL_OP3_305_76756_20150206_024436_inLine +BABEL_OP3_305_76756_20150206_024436_outLine +BABEL_OP3_305_78161_20150312_093559_inLine +BABEL_OP3_305_78161_20150312_093559_outLine +BABEL_OP3_305_78609_20141021_002844_inLine +BABEL_OP3_305_78609_20141021_002844_outLine +BABEL_OP3_305_81229_20140904_012832_inLine +BABEL_OP3_305_81229_20140904_012832_outLine +BABEL_OP3_305_81287_20150215_053321_inLine +BABEL_OP3_305_81287_20150215_053321_outLine +BABEL_OP3_305_81424_20150213_073659_inLine +BABEL_OP3_305_81424_20150213_073659_outLine +BABEL_OP3_305_84029_20140613_050741_inLine +BABEL_OP3_305_84029_20140613_050741_outLine +BABEL_OP3_305_84541_20140612_075946_inLine +BABEL_OP3_305_84541_20140612_075946_outLine +BABEL_OP3_305_84768_20140619_061958_inLine +BABEL_OP3_305_84768_20140619_061958_outLine +BABEL_OP3_305_86885_20140612_074001_inLine +BABEL_OP3_305_86885_20140612_074001_outLine +BABEL_OP3_305_88686_20140906_002505_inLine +BABEL_OP3_305_88686_20140906_002505_outLine +BABEL_OP3_305_90737_20141020_235210_inLine +BABEL_OP3_305_90737_20141020_235210_outLine +BABEL_OP3_305_91383_20150307_051712_inLine +BABEL_OP3_305_91383_20150307_051712_outLine +BABEL_OP3_305_96446_20140620_020014_inLine +BABEL_OP3_305_96446_20140620_020014_outLine +BABEL_OP3_305_97588_20140806_063029_inLine +BABEL_OP3_305_97588_20140806_063029_outLine +BABEL_OP3_305_97911_20150304_082443_inLine +BABEL_OP3_305_97911_20150304_082443_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/eval.list b/egs/babel/s5d/conf/lists/305-guarani/eval.list new file mode 100644 index 00000000000..517ff94f450 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/eval.list @@ -0,0 +1,186 @@ +BABEL_OP3_305_10036_20140807_033554_inLine +BABEL_OP3_305_10036_20140807_033554_outLine +BABEL_OP3_305_10188_20140614_030926_inLine +BABEL_OP3_305_10188_20140614_030926_outLine +BABEL_OP3_305_10482_20150214_093450_inLine +BABEL_OP3_305_10482_20150214_093450_outLine +BABEL_OP3_305_10638_20150318_093445_inLine +BABEL_OP3_305_10638_20150318_093445_outLine +BABEL_OP3_305_11352_20150219_080531_inLine +BABEL_OP3_305_11352_20150219_080531_outLine +BABEL_OP3_305_11528_20150328_055924_inLine +BABEL_OP3_305_11528_20150328_055924_outLine +BABEL_OP3_305_13126_20150222_063432_inLine +BABEL_OP3_305_13126_20150222_063432_outLine +BABEL_OP3_305_13586_20150122_061859_inLine +BABEL_OP3_305_13586_20150122_061859_outLine +BABEL_OP3_305_13744_20140615_004815_inLine +BABEL_OP3_305_13744_20140615_004815_outLine +BABEL_OP3_305_15163_20141021_042732_inLine +BABEL_OP3_305_15163_20141021_042732_outLine +BABEL_OP3_305_16475_20140910_050557_inLine +BABEL_OP3_305_16475_20140910_050557_outLine +BABEL_OP3_305_16726_20140620_054123_inLine +BABEL_OP3_305_16726_20140620_054123_outLine +BABEL_OP3_305_20724_20140612_032059_inLine +BABEL_OP3_305_20724_20140612_032059_outLine +BABEL_OP3_305_22494_20150210_082201_inLine +BABEL_OP3_305_22494_20150210_082201_outLine +BABEL_OP3_305_22643_20150331_062123_inLine +BABEL_OP3_305_22643_20150331_062123_outLine +BABEL_OP3_305_25895_20140611_072609_inLine +BABEL_OP3_305_25895_20140611_072609_outLine +BABEL_OP3_305_26206_20150212_091700_inLine +BABEL_OP3_305_26206_20150212_091700_outLine +BABEL_OP3_305_26388_20140731_014108_inLine +BABEL_OP3_305_26388_20140731_014108_outLine +BABEL_OP3_305_27082_20141016_051726_inLine +BABEL_OP3_305_27082_20141016_051726_outLine +BABEL_OP3_305_28775_20140621_055220_inLine +BABEL_OP3_305_28775_20140621_055220_outLine +BABEL_OP3_305_28945_20140906_010750_inLine +BABEL_OP3_305_28945_20140906_010750_outLine +BABEL_OP3_305_29023_20140821_052317_inLine +BABEL_OP3_305_29023_20140821_052317_outLine +BABEL_OP3_305_29023_20140821_053525_inLine +BABEL_OP3_305_29023_20140821_053525_outLine +BABEL_OP3_305_29765_20150307_085516_inLine +BABEL_OP3_305_29765_20150307_085516_outLine +BABEL_OP3_305_30461_20150310_062851_inLine +BABEL_OP3_305_30461_20150310_062851_outLine +BABEL_OP3_305_30653_20150219_091045_inLine +BABEL_OP3_305_30653_20150219_091045_outLine +BABEL_OP3_305_31484_20150212_074454_inLine +BABEL_OP3_305_31484_20150212_074454_outLine +BABEL_OP3_305_31628_20150208_021858_inLine +BABEL_OP3_305_31628_20150208_021858_outLine +BABEL_OP3_305_32301_20150212_011150_inLine +BABEL_OP3_305_32301_20150212_011150_outLine +BABEL_OP3_305_32328_20150122_041147_inLine +BABEL_OP3_305_32328_20150122_041147_outLine +BABEL_OP3_305_35139_20140822_065230_inLine +BABEL_OP3_305_35139_20140822_065230_outLine +BABEL_OP3_305_35181_20150221_032331_inLine +BABEL_OP3_305_35181_20150221_032331_outLine +BABEL_OP3_305_36505_20141211_012908_inLine +BABEL_OP3_305_36505_20141211_012908_outLine +BABEL_OP3_305_36505_20141211_014026_inLine +BABEL_OP3_305_36505_20141211_014026_outLine +BABEL_OP3_305_37684_20140612_021940_inLine +BABEL_OP3_305_37684_20140612_021940_outLine +BABEL_OP3_305_41174_20140904_033334_inLine +BABEL_OP3_305_41174_20140904_033334_outLine +BABEL_OP3_305_41920_20140618_052053_inLine +BABEL_OP3_305_41920_20140618_052053_outLine +BABEL_OP3_305_42600_20140731_005108_inLine +BABEL_OP3_305_42600_20140731_005108_outLine +BABEL_OP3_305_43788_20150208_044657_inLine +BABEL_OP3_305_43788_20150208_044657_outLine +BABEL_OP3_305_46315_20150214_012323_inLine +BABEL_OP3_305_46315_20150214_012323_outLine +BABEL_OP3_305_46625_20140618_065851_inLine +BABEL_OP3_305_46625_20140618_065851_outLine +BABEL_OP3_305_48758_20150220_092254_inLine +BABEL_OP3_305_48758_20150220_092254_outLine +BABEL_OP3_305_49216_20140615_041916_inLine +BABEL_OP3_305_49216_20140615_041916_outLine +BABEL_OP3_305_49637_20140619_051340_inLine +BABEL_OP3_305_49637_20140619_051340_outLine +BABEL_OP3_305_50175_20140620_063847_inLine +BABEL_OP3_305_50175_20140620_063847_outLine +BABEL_OP3_305_50630_20150211_101833_inLine +BABEL_OP3_305_50630_20150211_101833_outLine +BABEL_OP3_305_51417_20141028_072402_inLine +BABEL_OP3_305_51417_20141028_072402_outLine +BABEL_OP3_305_52438_20140621_014747_inLine +BABEL_OP3_305_52438_20140621_014747_outLine +BABEL_OP3_305_52804_20140822_074104_inLine +BABEL_OP3_305_52804_20140822_074104_outLine +BABEL_OP3_305_53758_20140611_060640_inLine +BABEL_OP3_305_53758_20140611_060640_outLine +BABEL_OP3_305_56468_20150327_024417_inLine +BABEL_OP3_305_56468_20150327_024417_outLine +BABEL_OP3_305_56677_20150226_094545_inLine +BABEL_OP3_305_56677_20150226_094545_outLine +BABEL_OP3_305_58821_20150217_093203_inLine +BABEL_OP3_305_58821_20150217_093203_outLine +BABEL_OP3_305_59163_20140614_065953_inLine +BABEL_OP3_305_59163_20140614_065953_outLine +BABEL_OP3_305_60538_20140619_023839_inLine +BABEL_OP3_305_60538_20140619_023839_outLine +BABEL_OP3_305_60661_20140822_055802_inLine +BABEL_OP3_305_60661_20140822_055802_outLine +BABEL_OP3_305_61011_20140911_035151_inLine +BABEL_OP3_305_61011_20140911_035151_outLine +BABEL_OP3_305_63484_20140614_064915_inLine +BABEL_OP3_305_63484_20140614_064915_outLine +BABEL_OP3_305_66916_20141021_002433_inLine +BABEL_OP3_305_66916_20141021_002433_outLine +BABEL_OP3_305_67152_20150228_091753_inLine +BABEL_OP3_305_67152_20150228_091753_outLine +BABEL_OP3_305_67894_20140614_021409_inLine +BABEL_OP3_305_67894_20140614_021409_outLine +BABEL_OP3_305_69633_20150211_042219_inLine +BABEL_OP3_305_69633_20150211_042219_outLine +BABEL_OP3_305_70386_20140823_041550_inLine +BABEL_OP3_305_70386_20140823_041550_outLine +BABEL_OP3_305_71614_20150220_005206_inLine +BABEL_OP3_305_71614_20150220_005206_outLine +BABEL_OP3_305_72007_20150218_073351_inLine +BABEL_OP3_305_72007_20150218_073351_outLine +BABEL_OP3_305_73072_20140620_003027_inLine +BABEL_OP3_305_73072_20140620_003027_outLine +BABEL_OP3_305_73622_20140731_060846_inLine +BABEL_OP3_305_73622_20140731_060846_outLine +BABEL_OP3_305_75930_20140613_043849_inLine +BABEL_OP3_305_75930_20140613_043849_outLine +BABEL_OP3_305_75930_20140613_045058_inLine +BABEL_OP3_305_75930_20140613_045058_outLine +BABEL_OP3_305_76218_20140912_034653_inLine +BABEL_OP3_305_76218_20140912_034653_outLine +BABEL_OP3_305_77112_20141017_061539_inLine +BABEL_OP3_305_77112_20141017_061539_outLine +BABEL_OP3_305_78398_20140807_031509_inLine +BABEL_OP3_305_78398_20140807_031509_outLine +BABEL_OP3_305_78543_20150307_102417_inLine +BABEL_OP3_305_78543_20150307_102417_outLine +BABEL_OP3_305_78604_20140801_052426_inLine +BABEL_OP3_305_78604_20140801_052426_outLine +BABEL_OP3_305_79107_20150301_081556_inLine +BABEL_OP3_305_79107_20150301_081556_outLine +BABEL_OP3_305_80383_20140612_015419_inLine +BABEL_OP3_305_80383_20140612_015419_outLine +BABEL_OP3_305_81404_20140821_230151_inLine +BABEL_OP3_305_81404_20140821_230151_outLine +BABEL_OP3_305_83775_20140808_011711_inLine +BABEL_OP3_305_83775_20140808_011711_outLine +BABEL_OP3_305_84370_20150301_074935_inLine +BABEL_OP3_305_84370_20150301_074935_outLine +BABEL_OP3_305_84466_20150220_080109_inLine +BABEL_OP3_305_84466_20150220_080109_outLine +BABEL_OP3_305_86676_20150207_083733_inLine +BABEL_OP3_305_86676_20150207_083733_outLine +BABEL_OP3_305_87074_20140821_062711_inLine +BABEL_OP3_305_87074_20140821_062711_outLine +BABEL_OP3_305_87693_20140913_020630_inLine +BABEL_OP3_305_87693_20140913_020630_outLine +BABEL_OP3_305_88372_20150301_060900_inLine +BABEL_OP3_305_88372_20150301_060900_outLine +BABEL_OP3_305_88661_20150207_095906_inLine +BABEL_OP3_305_88661_20150207_095906_outLine +BABEL_OP3_305_88982_20150122_052417_inLine +BABEL_OP3_305_88982_20150122_052417_outLine +BABEL_OP3_305_88988_20150318_090506_inLine +BABEL_OP3_305_88988_20150318_090506_outLine +BABEL_OP3_305_89059_20141028_073001_inLine +BABEL_OP3_305_89059_20141028_073001_outLine +BABEL_OP3_305_89226_20140614_041314_inLine +BABEL_OP3_305_89226_20140614_041314_outLine +BABEL_OP3_305_90935_20140808_010525_inLine +BABEL_OP3_305_90935_20140808_010525_outLine +BABEL_OP3_305_93007_20150311_015743_inLine +BABEL_OP3_305_93007_20150311_015743_outLine +BABEL_OP3_305_95663_20140806_050031_inLine +BABEL_OP3_305_95663_20140806_050031_outLine +BABEL_OP3_305_96910_20140911_043108_inLine +BABEL_OP3_305_96910_20140911_043108_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/sub-train.list b/egs/babel/s5d/conf/lists/305-guarani/sub-train.list new file mode 100644 index 00000000000..ff3a375b9e6 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/sub-train.list @@ -0,0 +1,134 @@ +BABEL_OP3_305_11419_20140620_004343_inLine +BABEL_OP3_305_11419_20140620_004343_outLine +BABEL_OP3_305_12242_20140808_034042_inLine +BABEL_OP3_305_12242_20140808_034042_outLine +BABEL_OP3_305_12242_20140808_035409_inLine +BABEL_OP3_305_12242_20140808_035409_outLine +BABEL_OP3_305_14814_20140621_005436_inLine +BABEL_OP3_305_14814_20140621_005436_outLine +BABEL_OP3_305_14814_20140621_011333_inLine +BABEL_OP3_305_14814_20140621_011333_outLine +BABEL_OP3_305_15926_20150211_090843_inLine +BABEL_OP3_305_15926_20150211_090843_outLine +BABEL_OP3_305_17032_20150213_094305_inLine +BABEL_OP3_305_17032_20150213_094305_outLine +BABEL_OP3_305_17032_20150213_095552_inLine +BABEL_OP3_305_17032_20150213_095552_outLine +BABEL_OP3_305_20454_20140619_022112_inLine +BABEL_OP3_305_20454_20140619_022112_outLine +BABEL_OP3_305_21543_20141018_050405_inLine +BABEL_OP3_305_21543_20141018_050405_outLine +BABEL_OP3_305_21794_20141021_022208_inLine +BABEL_OP3_305_21794_20141021_022208_outLine +BABEL_OP3_305_30180_20140906_231005_inLine +BABEL_OP3_305_30180_20140906_231005_outLine +BABEL_OP3_305_33111_20150228_023906_inLine +BABEL_OP3_305_33111_20150228_023906_outLine +BABEL_OP3_305_33149_20141021_034616_inLine +BABEL_OP3_305_33149_20141021_034616_outLine +BABEL_OP3_305_33251_20150130_021517_inLine +BABEL_OP3_305_33251_20150130_021517_outLine +BABEL_OP3_305_34629_20150327_010455_inLine +BABEL_OP3_305_34629_20150327_010455_outLine +BABEL_OP3_305_35467_20140806_032442_inLine +BABEL_OP3_305_35467_20140806_032442_outLine +BABEL_OP3_305_35706_20150221_093541_inLine +BABEL_OP3_305_35706_20150221_093541_outLine +BABEL_OP3_305_37007_20150331_081658_inLine +BABEL_OP3_305_37007_20150331_081658_outLine +BABEL_OP3_305_38664_20140807_042817_inLine +BABEL_OP3_305_38664_20140807_042817_outLine +BABEL_OP3_305_43368_20140822_071919_inLine +BABEL_OP3_305_43368_20140822_071919_outLine +BABEL_OP3_305_45486_20150331_070439_inLine +BABEL_OP3_305_45486_20150331_070439_outLine +BABEL_OP3_305_46681_20140729_053142_inLine +BABEL_OP3_305_46681_20140729_053142_outLine +BABEL_OP3_305_46688_20140620_060408_inLine +BABEL_OP3_305_46688_20140620_060408_outLine +BABEL_OP3_305_46757_20150211_011836_inLine +BABEL_OP3_305_46757_20150211_011836_outLine +BABEL_OP3_305_46757_20150211_013224_inLine +BABEL_OP3_305_46757_20150211_013224_outLine +BABEL_OP3_305_48844_20140621_034908_inLine +BABEL_OP3_305_48844_20140621_034908_outLine +BABEL_OP3_305_48844_20140621_035628_inLine +BABEL_OP3_305_48844_20140621_035628_outLine +BABEL_OP3_305_49768_20140731_031152_inLine +BABEL_OP3_305_49768_20140731_031152_outLine +BABEL_OP3_305_51015_20150211_235649_inLine +BABEL_OP3_305_51015_20150211_235649_outLine +BABEL_OP3_305_51611_20140619_070031_inLine +BABEL_OP3_305_51611_20140619_070031_outLine +BABEL_OP3_305_51611_20140619_071006_inLine +BABEL_OP3_305_51611_20140619_071006_outLine +BABEL_OP3_305_52725_20150227_111722_inLine +BABEL_OP3_305_52725_20150227_111722_outLine +BABEL_OP3_305_55815_20140612_000452_inLine +BABEL_OP3_305_55815_20140612_000452_outLine +BABEL_OP3_305_55818_20140620_003329_inLine +BABEL_OP3_305_55818_20140620_003329_outLine +BABEL_OP3_305_56198_20140904_224843_inLine +BABEL_OP3_305_56198_20140904_224843_outLine +BABEL_OP3_305_57116_20140618_021028_inLine +BABEL_OP3_305_57116_20140618_021028_outLine +BABEL_OP3_305_57654_20140917_034820_inLine +BABEL_OP3_305_57654_20140917_034820_outLine +BABEL_OP3_305_58061_20150326_103607_inLine +BABEL_OP3_305_58061_20150326_103607_outLine +BABEL_OP3_305_58734_20140620_003259_inLine +BABEL_OP3_305_58734_20140620_003259_outLine +BABEL_OP3_305_62158_20150313_013514_inLine +BABEL_OP3_305_62158_20150313_013514_outLine +BABEL_OP3_305_62734_20140821_221916_inLine +BABEL_OP3_305_62734_20140821_221916_outLine +BABEL_OP3_305_62852_20140618_072924_inLine +BABEL_OP3_305_62852_20140618_072924_outLine +BABEL_OP3_305_65466_20150222_074001_inLine +BABEL_OP3_305_65466_20150222_074001_outLine +BABEL_OP3_305_66967_20140618_044613_inLine +BABEL_OP3_305_66967_20140618_044613_outLine +BABEL_OP3_305_67373_20140822_005349_inLine +BABEL_OP3_305_67373_20140822_005349_outLine +BABEL_OP3_305_67389_20150317_083510_inLine +BABEL_OP3_305_67389_20150317_083510_outLine +BABEL_OP3_305_68068_20150206_100103_inLine +BABEL_OP3_305_68068_20150206_100103_outLine +BABEL_OP3_305_69090_20141018_010121_inLine +BABEL_OP3_305_69090_20141018_010121_outLine +BABEL_OP3_305_70251_20140618_233739_inLine +BABEL_OP3_305_70251_20140618_233739_outLine +BABEL_OP3_305_71333_20140808_025232_inLine +BABEL_OP3_305_71333_20140808_025232_outLine +BABEL_OP3_305_73301_20140808_235747_inLine +BABEL_OP3_305_73301_20140808_235747_outLine +BABEL_OP3_305_77225_20140612_003002_inLine +BABEL_OP3_305_77225_20140612_003002_outLine +BABEL_OP3_305_79028_20140621_005114_inLine +BABEL_OP3_305_79028_20140621_005114_outLine +BABEL_OP3_305_82626_20150307_100633_inLine +BABEL_OP3_305_82626_20150307_100633_outLine +BABEL_OP3_305_83436_20140619_060309_inLine +BABEL_OP3_305_83436_20140619_060309_outLine +BABEL_OP3_305_83935_20150213_091523_inLine +BABEL_OP3_305_83935_20150213_091523_outLine +BABEL_OP3_305_84055_20150221_083133_inLine +BABEL_OP3_305_84055_20150221_083133_outLine +BABEL_OP3_305_84079_20140613_053813_inLine +BABEL_OP3_305_84079_20140613_053813_outLine +BABEL_OP3_305_84605_20140903_033325_inLine +BABEL_OP3_305_84605_20140903_033325_outLine +BABEL_OP3_305_84605_20140903_034415_inLine +BABEL_OP3_305_84605_20140903_034415_outLine +BABEL_OP3_305_86433_20150211_094926_inLine +BABEL_OP3_305_86433_20150211_094926_outLine +BABEL_OP3_305_92941_20140911_002247_inLine +BABEL_OP3_305_92941_20140911_002247_outLine +BABEL_OP3_305_95269_20140912_000910_inLine +BABEL_OP3_305_95269_20140912_000910_outLine +BABEL_OP3_305_96041_20140611_065313_inLine +BABEL_OP3_305_96041_20140611_065313_outLine +BABEL_OP3_305_97220_20150303_234352_inLine +BABEL_OP3_305_97220_20150303_234352_outLine +BABEL_OP3_305_98192_20150306_053152_inLine +BABEL_OP3_305_98192_20150306_053152_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list new file mode 100644 index 00000000000..165c7e95f06 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list @@ -0,0 +1,392 @@ +BABEL_OP3_305_10901_20141017_014336_inLine +BABEL_OP3_305_10901_20141017_014336_outLine +BABEL_OP3_305_11723_20141021_053536_inLine +BABEL_OP3_305_11723_20141021_053536_outLine +BABEL_OP3_305_12851_20140618_061651_inLine +BABEL_OP3_305_12851_20140618_061651_outLine +BABEL_OP3_305_13040_20140621_000510_inLine +BABEL_OP3_305_13040_20140621_000510_outLine +BABEL_OP3_305_13189_20141107_012921_inLine +BABEL_OP3_305_13189_20141107_012921_outLine +BABEL_OP3_305_13664_20140615_011412_inLine +BABEL_OP3_305_13664_20140615_011412_outLine +BABEL_OP3_305_14158_20150207_011013_inLine +BABEL_OP3_305_14158_20150207_011013_outLine +BABEL_OP3_305_15617_20150318_012704_inLine +BABEL_OP3_305_15617_20150318_012704_outLine +BABEL_OP3_305_15638_20150212_081118_inLine +BABEL_OP3_305_15638_20150212_081118_outLine +BABEL_OP3_305_16249_20140615_022748_inLine +BABEL_OP3_305_16249_20140615_022748_outLine +BABEL_OP3_305_16886_20141016_232346_inLine +BABEL_OP3_305_16886_20141016_232346_outLine +BABEL_OP3_305_17115_20150310_055940_inLine +BABEL_OP3_305_17115_20150310_055940_outLine +BABEL_OP3_305_17511_20150118_093132_inLine +BABEL_OP3_305_17511_20150118_093132_outLine +BABEL_OP3_305_17511_20150118_094117_inLine +BABEL_OP3_305_17511_20150118_094117_outLine +BABEL_OP3_305_17881_20150220_094906_inLine +BABEL_OP3_305_17881_20150220_094906_outLine +BABEL_OP3_305_17890_20150212_094355_inLine +BABEL_OP3_305_17890_20150212_094355_outLine +BABEL_OP3_305_17923_20140801_003933_inLine +BABEL_OP3_305_17923_20140801_003933_outLine +BABEL_OP3_305_18037_20140611_044623_inLine +BABEL_OP3_305_18037_20140611_044623_outLine +BABEL_OP3_305_18566_20150219_072100_inLine +BABEL_OP3_305_18566_20150219_072100_outLine +BABEL_OP3_305_19101_20150123_042130_inLine +BABEL_OP3_305_19101_20150123_042130_outLine +BABEL_OP3_305_19101_20150123_043206_inLine +BABEL_OP3_305_19101_20150123_043206_outLine +BABEL_OP3_305_19621_20150122_072624_inLine +BABEL_OP3_305_19621_20150122_072624_outLine +BABEL_OP3_305_20922_20141107_000604_inLine +BABEL_OP3_305_20922_20141107_000604_outLine +BABEL_OP3_305_21581_20140822_033738_inLine +BABEL_OP3_305_21581_20140822_033738_outLine +BABEL_OP3_305_22624_20150215_050752_inLine +BABEL_OP3_305_22624_20150215_050752_outLine +BABEL_OP3_305_22624_20150215_051632_inLine +BABEL_OP3_305_22624_20150215_051632_outLine +BABEL_OP3_305_23190_20140907_002648_inLine +BABEL_OP3_305_23190_20140907_002648_outLine +BABEL_OP3_305_23195_20150328_071332_inLine +BABEL_OP3_305_23195_20150328_071332_outLine +BABEL_OP3_305_23752_20150319_043326_inLine +BABEL_OP3_305_23752_20150319_043326_outLine +BABEL_OP3_305_24323_20141021_014706_inLine +BABEL_OP3_305_24323_20141021_014706_outLine +BABEL_OP3_305_24586_20150227_100127_inLine +BABEL_OP3_305_24586_20150227_100127_outLine +BABEL_OP3_305_24589_20140822_030512_inLine +BABEL_OP3_305_24589_20140822_030512_outLine +BABEL_OP3_305_24924_20150306_061542_inLine +BABEL_OP3_305_24924_20150306_061542_outLine +BABEL_OP3_305_25220_20150311_004737_inLine +BABEL_OP3_305_25220_20150311_004737_outLine +BABEL_OP3_305_25412_20150123_041255_inLine +BABEL_OP3_305_25412_20150123_041255_outLine +BABEL_OP3_305_27042_20150124_044459_inLine +BABEL_OP3_305_27042_20150124_044459_outLine +BABEL_OP3_305_27125_20140618_065021_inLine +BABEL_OP3_305_27125_20140618_065021_outLine +BABEL_OP3_305_28303_20140806_030759_inLine +BABEL_OP3_305_28303_20140806_030759_outLine +BABEL_OP3_305_28477_20141107_050727_inLine +BABEL_OP3_305_28477_20141107_050727_outLine +BABEL_OP3_305_28606_20150213_101119_inLine +BABEL_OP3_305_28606_20150213_101119_outLine +BABEL_OP3_305_29072_20150212_084053_inLine +BABEL_OP3_305_29072_20150212_084053_outLine +BABEL_OP3_305_30280_20150310_080905_inLine +BABEL_OP3_305_30280_20150310_080905_outLine +BABEL_OP3_305_30869_20141030_043630_inLine +BABEL_OP3_305_30869_20141030_043630_outLine +BABEL_OP3_305_31668_20150313_021804_inLine +BABEL_OP3_305_31668_20150313_021804_outLine +BABEL_OP3_305_32708_20140822_052506_inLine +BABEL_OP3_305_32708_20140822_052506_outLine +BABEL_OP3_305_33355_20140619_231328_inLine +BABEL_OP3_305_33355_20140619_231328_outLine +BABEL_OP3_305_33635_20141021_015047_inLine +BABEL_OP3_305_33635_20141021_015047_outLine +BABEL_OP3_305_34145_20150211_103633_inLine +BABEL_OP3_305_34145_20150211_103633_outLine +BABEL_OP3_305_34410_20150319_085843_inLine +BABEL_OP3_305_34410_20150319_085843_outLine +BABEL_OP3_305_35008_20150214_095953_inLine +BABEL_OP3_305_35008_20150214_095953_outLine +BABEL_OP3_305_35609_20150310_091253_inLine +BABEL_OP3_305_35609_20150310_091253_outLine +BABEL_OP3_305_36147_20140612_063038_inLine +BABEL_OP3_305_36147_20140612_063038_outLine +BABEL_OP3_305_37285_20150213_015416_inLine +BABEL_OP3_305_37285_20150213_015416_outLine +BABEL_OP3_305_38741_20140906_040000_inLine +BABEL_OP3_305_38741_20140906_040000_outLine +BABEL_OP3_305_39638_20150328_073733_inLine +BABEL_OP3_305_39638_20150328_073733_outLine +BABEL_OP3_305_39920_20150301_070243_inLine +BABEL_OP3_305_39920_20150301_070243_outLine +BABEL_OP3_305_40092_20140611_040031_inLine +BABEL_OP3_305_40092_20140611_040031_outLine +BABEL_OP3_305_40565_20150210_092106_inLine +BABEL_OP3_305_40565_20150210_092106_outLine +BABEL_OP3_305_41334_20150305_082911_inLine +BABEL_OP3_305_41334_20150305_082911_outLine +BABEL_OP3_305_42231_20150217_080721_inLine +BABEL_OP3_305_42231_20150217_080721_outLine +BABEL_OP3_305_42434_20140822_053733_inLine +BABEL_OP3_305_42434_20140822_053733_outLine +BABEL_OP3_305_42497_20140823_034443_inLine +BABEL_OP3_305_42497_20140823_034443_outLine +BABEL_OP3_305_43789_20141017_015101_inLine +BABEL_OP3_305_43789_20141017_015101_outLine +BABEL_OP3_305_43990_20150312_102420_inLine +BABEL_OP3_305_43990_20150312_102420_outLine +BABEL_OP3_305_44868_20150206_083108_inLine +BABEL_OP3_305_44868_20150206_083108_outLine +BABEL_OP3_305_44961_20140619_013154_inLine +BABEL_OP3_305_44961_20140619_013154_outLine +BABEL_OP3_305_46558_20140905_012017_inLine +BABEL_OP3_305_46558_20140905_012017_outLine +BABEL_OP3_305_46558_20140905_013000_inLine +BABEL_OP3_305_46558_20140905_013000_outLine +BABEL_OP3_305_46589_20150207_091824_inLine +BABEL_OP3_305_46589_20150207_091824_outLine +BABEL_OP3_305_46702_20140619_050719_inLine +BABEL_OP3_305_46702_20140619_050719_outLine +BABEL_OP3_305_47823_20150214_081513_inLine +BABEL_OP3_305_47823_20150214_081513_outLine +BABEL_OP3_305_49641_20140613_041400_inLine +BABEL_OP3_305_49641_20140613_041400_outLine +BABEL_OP3_305_49902_20140809_050813_inLine +BABEL_OP3_305_49902_20140809_050813_outLine +BABEL_OP3_305_50186_20140619_044546_inLine +BABEL_OP3_305_50186_20140619_044546_outLine +BABEL_OP3_305_50186_20140619_045904_inLine +BABEL_OP3_305_50186_20140619_045904_outLine +BABEL_OP3_305_50565_20140612_072129_inLine +BABEL_OP3_305_50565_20140612_072129_outLine +BABEL_OP3_305_50745_20150219_082842_inLine +BABEL_OP3_305_50745_20150219_082842_outLine +BABEL_OP3_305_51819_20150210_085538_inLine +BABEL_OP3_305_51819_20150210_085538_outLine +BABEL_OP3_305_52404_20150208_070706_inLine +BABEL_OP3_305_52404_20150208_070706_outLine +BABEL_OP3_305_52818_20150206_104316_inLine +BABEL_OP3_305_52818_20150206_104316_outLine +BABEL_OP3_305_52854_20140620_010725_inLine +BABEL_OP3_305_52854_20140620_010725_outLine +BABEL_OP3_305_53144_20150220_084533_inLine +BABEL_OP3_305_53144_20150220_084533_outLine +BABEL_OP3_305_54594_20150114_073509_inLine +BABEL_OP3_305_54594_20150114_073509_outLine +BABEL_OP3_305_55042_20140614_022059_inLine +BABEL_OP3_305_55042_20140614_022059_outLine +BABEL_OP3_305_55106_20150221_080452_inLine +BABEL_OP3_305_55106_20150221_080452_outLine +BABEL_OP3_305_57609_20150127_040742_inLine +BABEL_OP3_305_57609_20150127_040742_outLine +BABEL_OP3_305_57935_20150203_072757_inLine +BABEL_OP3_305_57935_20150203_072757_outLine +BABEL_OP3_305_59549_20140620_001253_inLine +BABEL_OP3_305_59549_20140620_001253_outLine +BABEL_OP3_305_59720_20140807_043323_inLine +BABEL_OP3_305_59720_20140807_043323_outLine +BABEL_OP3_305_60115_20150211_025109_inLine +BABEL_OP3_305_60115_20150211_025109_outLine +BABEL_OP3_305_60282_20140612_025229_inLine +BABEL_OP3_305_60282_20140612_025229_outLine +BABEL_OP3_305_60477_20150304_092057_inLine +BABEL_OP3_305_60477_20150304_092057_outLine +BABEL_OP3_305_60626_20141018_012739_inLine +BABEL_OP3_305_60626_20141018_012739_outLine +BABEL_OP3_305_60650_20150331_055502_inLine +BABEL_OP3_305_60650_20150331_055502_outLine +BABEL_OP3_305_60830_20141017_004525_inLine +BABEL_OP3_305_60830_20141017_004525_outLine +BABEL_OP3_305_60830_20141017_053807_inLine +BABEL_OP3_305_60830_20141017_053807_outLine +BABEL_OP3_305_61348_20141017_014818_inLine +BABEL_OP3_305_61348_20141017_014818_outLine +BABEL_OP3_305_61348_20141017_060653_inLine +BABEL_OP3_305_61348_20141017_060653_outLine +BABEL_OP3_305_61873_20150123_024415_inLine +BABEL_OP3_305_61873_20150123_024415_outLine +BABEL_OP3_305_62200_20141017_014602_inLine +BABEL_OP3_305_62200_20141017_014602_outLine +BABEL_OP3_305_62471_20140619_072350_inLine +BABEL_OP3_305_62471_20140619_072350_outLine +BABEL_OP3_305_63084_20150207_074116_inLine +BABEL_OP3_305_63084_20150207_074116_outLine +BABEL_OP3_305_64469_20140620_063122_inLine +BABEL_OP3_305_64469_20140620_063122_outLine +BABEL_OP3_305_64768_20140822_043008_inLine +BABEL_OP3_305_64768_20140822_043008_outLine +BABEL_OP3_305_64902_20150220_102326_inLine +BABEL_OP3_305_64902_20150220_102326_outLine +BABEL_OP3_305_65477_20141016_234600_inLine +BABEL_OP3_305_65477_20141016_234600_outLine +BABEL_OP3_305_65477_20141016_235812_inLine +BABEL_OP3_305_65477_20141016_235812_outLine +BABEL_OP3_305_65692_20150127_044937_inLine +BABEL_OP3_305_65692_20150127_044937_outLine +BABEL_OP3_305_66045_20140822_062953_inLine +BABEL_OP3_305_66045_20140822_062953_outLine +BABEL_OP3_305_66177_20150221_091456_inLine +BABEL_OP3_305_66177_20150221_091456_outLine +BABEL_OP3_305_66975_20140615_024703_inLine +BABEL_OP3_305_66975_20140615_024703_outLine +BABEL_OP3_305_67053_20150312_031258_inLine +BABEL_OP3_305_67053_20150312_031258_outLine +BABEL_OP3_305_67283_20140618_075016_inLine +BABEL_OP3_305_67283_20140618_075016_outLine +BABEL_OP3_305_67842_20140906_014501_inLine +BABEL_OP3_305_67842_20140906_014501_outLine +BABEL_OP3_305_68244_20150208_045135_inLine +BABEL_OP3_305_68244_20150208_045135_outLine +BABEL_OP3_305_68668_20140614_053023_inLine +BABEL_OP3_305_68668_20140614_053023_outLine +BABEL_OP3_305_69574_20140618_231512_inLine +BABEL_OP3_305_69574_20140618_231512_outLine +BABEL_OP3_305_70282_20150127_012555_inLine +BABEL_OP3_305_70282_20150127_012555_outLine +BABEL_OP3_305_70794_20140614_073231_inLine +BABEL_OP3_305_70794_20140614_073231_outLine +BABEL_OP3_305_70986_20150320_092518_inLine +BABEL_OP3_305_70986_20150320_092518_outLine +BABEL_OP3_305_71189_20150227_092723_inLine +BABEL_OP3_305_71189_20150227_092723_outLine +BABEL_OP3_305_71278_20140614_040622_inLine +BABEL_OP3_305_71278_20140614_040622_outLine +BABEL_OP3_305_71282_20141028_054244_inLine +BABEL_OP3_305_71282_20141028_054244_outLine +BABEL_OP3_305_71566_20150217_074338_inLine +BABEL_OP3_305_71566_20150217_074338_outLine +BABEL_OP3_305_72110_20150214_074424_inLine +BABEL_OP3_305_72110_20150214_074424_outLine +BABEL_OP3_305_72903_20140612_021516_inLine +BABEL_OP3_305_72903_20140612_021516_outLine +BABEL_OP3_305_74667_20140904_050532_inLine +BABEL_OP3_305_74667_20140904_050532_outLine +BABEL_OP3_305_74763_20140612_011204_inLine +BABEL_OP3_305_74763_20140612_011204_outLine +BABEL_OP3_305_74921_20150208_081422_inLine +BABEL_OP3_305_74921_20150208_081422_outLine +BABEL_OP3_305_75064_20140621_012128_inLine +BABEL_OP3_305_75064_20140621_012128_outLine +BABEL_OP3_305_75223_20140618_232223_inLine +BABEL_OP3_305_75223_20140618_232223_outLine +BABEL_OP3_305_76126_20150122_051345_inLine +BABEL_OP3_305_76126_20150122_051345_outLine +BABEL_OP3_305_76437_20140615_023448_inLine +BABEL_OP3_305_76437_20140615_023448_outLine +BABEL_OP3_305_77744_20140821_052246_inLine +BABEL_OP3_305_77744_20140821_052246_outLine +BABEL_OP3_305_77909_20140613_035103_inLine +BABEL_OP3_305_77909_20140613_035103_outLine +BABEL_OP3_305_78016_20140821_222210_inLine +BABEL_OP3_305_78016_20140821_222210_outLine +BABEL_OP3_305_79167_20150208_063508_inLine +BABEL_OP3_305_79167_20150208_063508_outLine +BABEL_OP3_305_79571_20150211_095226_inLine +BABEL_OP3_305_79571_20150211_095226_outLine +BABEL_OP3_305_79751_20140821_233858_inLine +BABEL_OP3_305_79751_20140821_233858_outLine +BABEL_OP3_305_80241_20140612_015921_inLine +BABEL_OP3_305_80241_20140612_015921_outLine +BABEL_OP3_305_80577_20150221_073930_inLine +BABEL_OP3_305_80577_20150221_073930_outLine +BABEL_OP3_305_81213_20140822_005322_inLine +BABEL_OP3_305_81213_20140822_005322_outLine +BABEL_OP3_305_82622_20140619_013825_inLine +BABEL_OP3_305_82622_20140619_013825_outLine +BABEL_OP3_305_84061_20140807_063818_inLine +BABEL_OP3_305_84061_20140807_063818_outLine +BABEL_OP3_305_84177_20141021_041721_inLine +BABEL_OP3_305_84177_20141021_041721_outLine +BABEL_OP3_305_84177_20141021_043314_inLine +BABEL_OP3_305_84177_20141021_043314_outLine +BABEL_OP3_305_84327_20150213_084928_inLine +BABEL_OP3_305_84327_20150213_084928_outLine +BABEL_OP3_305_84823_20150122_033347_inLine +BABEL_OP3_305_84823_20150122_033347_outLine +BABEL_OP3_305_84838_20141022_010706_inLine +BABEL_OP3_305_84838_20141022_010706_outLine +BABEL_OP3_305_85047_20150122_030457_inLine +BABEL_OP3_305_85047_20150122_030457_outLine +BABEL_OP3_305_85254_20150303_094409_inLine +BABEL_OP3_305_85254_20150303_094409_outLine +BABEL_OP3_305_85260_20140611_042042_inLine +BABEL_OP3_305_85260_20140611_042042_outLine +BABEL_OP3_305_85322_20140822_031748_inLine +BABEL_OP3_305_85322_20140822_031748_outLine +BABEL_OP3_305_85340_20140826_232921_inLine +BABEL_OP3_305_85340_20140826_232921_outLine +BABEL_OP3_305_85519_20141022_033125_inLine +BABEL_OP3_305_85519_20141022_033125_outLine +BABEL_OP3_305_86100_20140612_024853_inLine +BABEL_OP3_305_86100_20140612_024853_outLine +BABEL_OP3_305_86321_20150218_084806_inLine +BABEL_OP3_305_86321_20150218_084806_outLine +BABEL_OP3_305_86597_20150305_020834_inLine +BABEL_OP3_305_86597_20150305_020834_outLine +BABEL_OP3_305_86830_20141028_051738_inLine +BABEL_OP3_305_86830_20141028_051738_outLine +BABEL_OP3_305_86878_20150115_115301_inLine +BABEL_OP3_305_86878_20150115_115301_outLine +BABEL_OP3_305_87179_20141021_010357_inLine +BABEL_OP3_305_87179_20141021_010357_outLine +BABEL_OP3_305_88669_20150226_082958_inLine +BABEL_OP3_305_88669_20150226_082958_outLine +BABEL_OP3_305_89330_20150305_075359_inLine +BABEL_OP3_305_89330_20150305_075359_outLine +BABEL_OP3_305_89372_20140806_054633_inLine +BABEL_OP3_305_89372_20140806_054633_outLine +BABEL_OP3_305_89650_20140606_064449_inLine +BABEL_OP3_305_89650_20140606_064449_outLine +BABEL_OP3_305_90572_20150221_011508_inLine +BABEL_OP3_305_90572_20150221_011508_outLine +BABEL_OP3_305_90739_20140910_010202_inLine +BABEL_OP3_305_90739_20140910_010202_outLine +BABEL_OP3_305_90739_20140910_011127_inLine +BABEL_OP3_305_90739_20140910_011127_outLine +BABEL_OP3_305_90777_20140910_031558_inLine +BABEL_OP3_305_90777_20140910_031558_outLine +BABEL_OP3_305_90930_20140612_073132_inLine +BABEL_OP3_305_90930_20140612_073132_outLine +BABEL_OP3_305_91252_20140612_013640_inLine +BABEL_OP3_305_91252_20140612_013640_outLine +BABEL_OP3_305_91463_20150204_001924_inLine +BABEL_OP3_305_91463_20150204_001924_outLine +BABEL_OP3_305_91977_20150210_012536_inLine +BABEL_OP3_305_91977_20150210_012536_outLine +BABEL_OP3_305_92281_20150312_104117_inLine +BABEL_OP3_305_92281_20150312_104117_outLine +BABEL_OP3_305_92509_20140620_020408_inLine +BABEL_OP3_305_92509_20140620_020408_outLine +BABEL_OP3_305_92605_20150312_090817_inLine +BABEL_OP3_305_92605_20150312_090817_outLine +BABEL_OP3_305_92740_20150210_020753_inLine +BABEL_OP3_305_92740_20150210_020753_outLine +BABEL_OP3_305_93320_20150305_072620_inLine +BABEL_OP3_305_93320_20150305_072620_outLine +BABEL_OP3_305_93681_20141018_044334_inLine +BABEL_OP3_305_93681_20141018_044334_outLine +BABEL_OP3_305_93861_20150123_004904_inLine +BABEL_OP3_305_93861_20150123_004904_outLine +BABEL_OP3_305_94253_20140902_015125_inLine +BABEL_OP3_305_94253_20140902_015125_outLine +BABEL_OP3_305_94587_20150214_091538_inLine +BABEL_OP3_305_94587_20150214_091538_outLine +BABEL_OP3_305_94713_20150114_082431_inLine +BABEL_OP3_305_94713_20150114_082431_outLine +BABEL_OP3_305_95598_20140615_025323_inLine +BABEL_OP3_305_95598_20140615_025323_outLine +BABEL_OP3_305_95903_20150222_060503_inLine +BABEL_OP3_305_95903_20150222_060503_outLine +BABEL_OP3_305_95942_20150227_105233_inLine +BABEL_OP3_305_95942_20150227_105233_outLine +BABEL_OP3_305_96504_20140808_230422_inLine +BABEL_OP3_305_96504_20140808_230422_outLine +BABEL_OP3_305_96504_20140808_231336_inLine +BABEL_OP3_305_96504_20140808_231336_outLine +BABEL_OP3_305_97772_20140618_074519_inLine +BABEL_OP3_305_97772_20140618_074519_outLine +BABEL_OP3_305_97896_20140904_071346_inLine +BABEL_OP3_305_97896_20140904_071346_outLine +BABEL_OP3_305_98255_20150115_095803_inLine +BABEL_OP3_305_98255_20150115_095803_outLine +BABEL_OP3_305_98255_20150115_101856_inLine +BABEL_OP3_305_98255_20150115_101856_outLine +BABEL_OP3_305_98909_20140730_054930_inLine +BABEL_OP3_305_98909_20140730_054930_outLine +BABEL_OP3_305_98909_20140730_055859_inLine +BABEL_OP3_305_98909_20140730_055859_outLine +BABEL_OP3_305_99289_20150227_102036_inLine +BABEL_OP3_305_99289_20150227_102036_outLine +BABEL_OP3_305_99516_20140620_054149_inLine +BABEL_OP3_305_99516_20140620_054149_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/training.list b/egs/babel/s5d/conf/lists/305-guarani/training.list new file mode 100644 index 00000000000..d191e6ac974 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/training.list @@ -0,0 +1,526 @@ +BABEL_OP3_305_10901_20141017_014336_inLine +BABEL_OP3_305_10901_20141017_014336_outLine +BABEL_OP3_305_11419_20140620_004343_inLine +BABEL_OP3_305_11419_20140620_004343_outLine +BABEL_OP3_305_11723_20141021_053536_inLine +BABEL_OP3_305_11723_20141021_053536_outLine +BABEL_OP3_305_12242_20140808_034042_inLine +BABEL_OP3_305_12242_20140808_034042_outLine +BABEL_OP3_305_12242_20140808_035409_inLine +BABEL_OP3_305_12242_20140808_035409_outLine +BABEL_OP3_305_12851_20140618_061651_inLine +BABEL_OP3_305_12851_20140618_061651_outLine +BABEL_OP3_305_13040_20140621_000510_inLine +BABEL_OP3_305_13040_20140621_000510_outLine +BABEL_OP3_305_13189_20141107_012921_inLine +BABEL_OP3_305_13189_20141107_012921_outLine +BABEL_OP3_305_13664_20140615_011412_inLine +BABEL_OP3_305_13664_20140615_011412_outLine +BABEL_OP3_305_14158_20150207_011013_inLine +BABEL_OP3_305_14158_20150207_011013_outLine +BABEL_OP3_305_14814_20140621_005436_inLine +BABEL_OP3_305_14814_20140621_005436_outLine +BABEL_OP3_305_14814_20140621_011333_inLine +BABEL_OP3_305_14814_20140621_011333_outLine +BABEL_OP3_305_15617_20150318_012704_inLine +BABEL_OP3_305_15617_20150318_012704_outLine +BABEL_OP3_305_15638_20150212_081118_inLine +BABEL_OP3_305_15638_20150212_081118_outLine +BABEL_OP3_305_15926_20150211_090843_inLine +BABEL_OP3_305_15926_20150211_090843_outLine +BABEL_OP3_305_16249_20140615_022748_inLine +BABEL_OP3_305_16249_20140615_022748_outLine +BABEL_OP3_305_16886_20141016_232346_inLine +BABEL_OP3_305_16886_20141016_232346_outLine +BABEL_OP3_305_17032_20150213_094305_inLine +BABEL_OP3_305_17032_20150213_094305_outLine +BABEL_OP3_305_17032_20150213_095552_inLine +BABEL_OP3_305_17032_20150213_095552_outLine +BABEL_OP3_305_17115_20150310_055940_inLine +BABEL_OP3_305_17115_20150310_055940_outLine +BABEL_OP3_305_17511_20150118_093132_inLine +BABEL_OP3_305_17511_20150118_093132_outLine +BABEL_OP3_305_17511_20150118_094117_inLine +BABEL_OP3_305_17511_20150118_094117_outLine +BABEL_OP3_305_17881_20150220_094906_inLine +BABEL_OP3_305_17881_20150220_094906_outLine +BABEL_OP3_305_17890_20150212_094355_inLine +BABEL_OP3_305_17890_20150212_094355_outLine +BABEL_OP3_305_17923_20140801_003933_inLine +BABEL_OP3_305_17923_20140801_003933_outLine +BABEL_OP3_305_18037_20140611_044623_inLine +BABEL_OP3_305_18037_20140611_044623_outLine +BABEL_OP3_305_18566_20150219_072100_inLine +BABEL_OP3_305_18566_20150219_072100_outLine +BABEL_OP3_305_19101_20150123_042130_inLine +BABEL_OP3_305_19101_20150123_042130_outLine +BABEL_OP3_305_19101_20150123_043206_inLine +BABEL_OP3_305_19101_20150123_043206_outLine +BABEL_OP3_305_19621_20150122_072624_inLine +BABEL_OP3_305_19621_20150122_072624_outLine +BABEL_OP3_305_20454_20140619_022112_inLine +BABEL_OP3_305_20454_20140619_022112_outLine +BABEL_OP3_305_20922_20141107_000604_inLine +BABEL_OP3_305_20922_20141107_000604_outLine +BABEL_OP3_305_21543_20141018_050405_inLine +BABEL_OP3_305_21543_20141018_050405_outLine +BABEL_OP3_305_21581_20140822_033738_inLine +BABEL_OP3_305_21581_20140822_033738_outLine +BABEL_OP3_305_21794_20141021_022208_inLine +BABEL_OP3_305_21794_20141021_022208_outLine +BABEL_OP3_305_22624_20150215_050752_inLine +BABEL_OP3_305_22624_20150215_050752_outLine +BABEL_OP3_305_22624_20150215_051632_inLine +BABEL_OP3_305_22624_20150215_051632_outLine +BABEL_OP3_305_23190_20140907_002648_inLine +BABEL_OP3_305_23190_20140907_002648_outLine +BABEL_OP3_305_23195_20150328_071332_inLine +BABEL_OP3_305_23195_20150328_071332_outLine +BABEL_OP3_305_23752_20150319_043326_inLine +BABEL_OP3_305_23752_20150319_043326_outLine +BABEL_OP3_305_24323_20141021_014706_inLine +BABEL_OP3_305_24323_20141021_014706_outLine +BABEL_OP3_305_24586_20150227_100127_inLine +BABEL_OP3_305_24586_20150227_100127_outLine +BABEL_OP3_305_24589_20140822_030512_inLine +BABEL_OP3_305_24589_20140822_030512_outLine +BABEL_OP3_305_24924_20150306_061542_inLine +BABEL_OP3_305_24924_20150306_061542_outLine +BABEL_OP3_305_25220_20150311_004737_inLine +BABEL_OP3_305_25220_20150311_004737_outLine +BABEL_OP3_305_25412_20150123_041255_inLine +BABEL_OP3_305_25412_20150123_041255_outLine +BABEL_OP3_305_27042_20150124_044459_inLine +BABEL_OP3_305_27042_20150124_044459_outLine +BABEL_OP3_305_27125_20140618_065021_inLine +BABEL_OP3_305_27125_20140618_065021_outLine +BABEL_OP3_305_28303_20140806_030759_inLine +BABEL_OP3_305_28303_20140806_030759_outLine +BABEL_OP3_305_28477_20141107_050727_inLine +BABEL_OP3_305_28477_20141107_050727_outLine +BABEL_OP3_305_28606_20150213_101119_inLine +BABEL_OP3_305_28606_20150213_101119_outLine +BABEL_OP3_305_29072_20150212_084053_inLine +BABEL_OP3_305_29072_20150212_084053_outLine +BABEL_OP3_305_30180_20140906_231005_inLine +BABEL_OP3_305_30180_20140906_231005_outLine +BABEL_OP3_305_30280_20150310_080905_inLine +BABEL_OP3_305_30280_20150310_080905_outLine +BABEL_OP3_305_30869_20141030_043630_inLine +BABEL_OP3_305_30869_20141030_043630_outLine +BABEL_OP3_305_31668_20150313_021804_inLine +BABEL_OP3_305_31668_20150313_021804_outLine +BABEL_OP3_305_32708_20140822_052506_inLine +BABEL_OP3_305_32708_20140822_052506_outLine +BABEL_OP3_305_33111_20150228_023906_inLine +BABEL_OP3_305_33111_20150228_023906_outLine +BABEL_OP3_305_33149_20141021_034616_inLine +BABEL_OP3_305_33149_20141021_034616_outLine +BABEL_OP3_305_33251_20150130_021517_inLine +BABEL_OP3_305_33251_20150130_021517_outLine +BABEL_OP3_305_33355_20140619_231328_inLine +BABEL_OP3_305_33355_20140619_231328_outLine +BABEL_OP3_305_33635_20141021_015047_inLine +BABEL_OP3_305_33635_20141021_015047_outLine +BABEL_OP3_305_34145_20150211_103633_inLine +BABEL_OP3_305_34145_20150211_103633_outLine +BABEL_OP3_305_34410_20150319_085843_inLine +BABEL_OP3_305_34410_20150319_085843_outLine +BABEL_OP3_305_34629_20150327_010455_inLine +BABEL_OP3_305_34629_20150327_010455_outLine +BABEL_OP3_305_35008_20150214_095953_inLine +BABEL_OP3_305_35008_20150214_095953_outLine +BABEL_OP3_305_35467_20140806_032442_inLine +BABEL_OP3_305_35467_20140806_032442_outLine +BABEL_OP3_305_35609_20150310_091253_inLine +BABEL_OP3_305_35609_20150310_091253_outLine +BABEL_OP3_305_35706_20150221_093541_inLine +BABEL_OP3_305_35706_20150221_093541_outLine +BABEL_OP3_305_36147_20140612_063038_inLine +BABEL_OP3_305_36147_20140612_063038_outLine +BABEL_OP3_305_37007_20150331_081658_inLine +BABEL_OP3_305_37007_20150331_081658_outLine +BABEL_OP3_305_37285_20150213_015416_inLine +BABEL_OP3_305_37285_20150213_015416_outLine +BABEL_OP3_305_38664_20140807_042817_inLine +BABEL_OP3_305_38664_20140807_042817_outLine +BABEL_OP3_305_38741_20140906_040000_inLine +BABEL_OP3_305_38741_20140906_040000_outLine +BABEL_OP3_305_39638_20150328_073733_inLine +BABEL_OP3_305_39638_20150328_073733_outLine +BABEL_OP3_305_39920_20150301_070243_inLine +BABEL_OP3_305_39920_20150301_070243_outLine +BABEL_OP3_305_40092_20140611_040031_inLine +BABEL_OP3_305_40092_20140611_040031_outLine +BABEL_OP3_305_40565_20150210_092106_inLine +BABEL_OP3_305_40565_20150210_092106_outLine +BABEL_OP3_305_41334_20150305_082911_inLine +BABEL_OP3_305_41334_20150305_082911_outLine +BABEL_OP3_305_42231_20150217_080721_inLine +BABEL_OP3_305_42231_20150217_080721_outLine +BABEL_OP3_305_42434_20140822_053733_inLine +BABEL_OP3_305_42434_20140822_053733_outLine +BABEL_OP3_305_42497_20140823_034443_inLine +BABEL_OP3_305_42497_20140823_034443_outLine +BABEL_OP3_305_43368_20140822_071919_inLine +BABEL_OP3_305_43368_20140822_071919_outLine +BABEL_OP3_305_43789_20141017_015101_inLine +BABEL_OP3_305_43789_20141017_015101_outLine +BABEL_OP3_305_43990_20150312_102420_inLine +BABEL_OP3_305_43990_20150312_102420_outLine +BABEL_OP3_305_44868_20150206_083108_inLine +BABEL_OP3_305_44868_20150206_083108_outLine +BABEL_OP3_305_44961_20140619_013154_inLine +BABEL_OP3_305_44961_20140619_013154_outLine +BABEL_OP3_305_45486_20150331_070439_inLine +BABEL_OP3_305_45486_20150331_070439_outLine +BABEL_OP3_305_46558_20140905_012017_inLine +BABEL_OP3_305_46558_20140905_012017_outLine +BABEL_OP3_305_46558_20140905_013000_inLine +BABEL_OP3_305_46558_20140905_013000_outLine +BABEL_OP3_305_46589_20150207_091824_inLine +BABEL_OP3_305_46589_20150207_091824_outLine +BABEL_OP3_305_46681_20140729_053142_inLine +BABEL_OP3_305_46681_20140729_053142_outLine +BABEL_OP3_305_46688_20140620_060408_inLine +BABEL_OP3_305_46688_20140620_060408_outLine +BABEL_OP3_305_46702_20140619_050719_inLine +BABEL_OP3_305_46702_20140619_050719_outLine +BABEL_OP3_305_46757_20150211_011836_inLine +BABEL_OP3_305_46757_20150211_011836_outLine +BABEL_OP3_305_46757_20150211_013224_inLine +BABEL_OP3_305_46757_20150211_013224_outLine +BABEL_OP3_305_47823_20150214_081513_inLine +BABEL_OP3_305_47823_20150214_081513_outLine +BABEL_OP3_305_48844_20140621_034908_inLine +BABEL_OP3_305_48844_20140621_034908_outLine +BABEL_OP3_305_48844_20140621_035628_inLine +BABEL_OP3_305_48844_20140621_035628_outLine +BABEL_OP3_305_49641_20140613_041400_inLine +BABEL_OP3_305_49641_20140613_041400_outLine +BABEL_OP3_305_49768_20140731_031152_inLine +BABEL_OP3_305_49768_20140731_031152_outLine +BABEL_OP3_305_49902_20140809_050813_inLine +BABEL_OP3_305_49902_20140809_050813_outLine +BABEL_OP3_305_50186_20140619_044546_inLine +BABEL_OP3_305_50186_20140619_044546_outLine +BABEL_OP3_305_50186_20140619_045904_inLine +BABEL_OP3_305_50186_20140619_045904_outLine +BABEL_OP3_305_50565_20140612_072129_inLine +BABEL_OP3_305_50565_20140612_072129_outLine +BABEL_OP3_305_50745_20150219_082842_inLine +BABEL_OP3_305_50745_20150219_082842_outLine +BABEL_OP3_305_51015_20150211_235649_inLine +BABEL_OP3_305_51015_20150211_235649_outLine +BABEL_OP3_305_51611_20140619_070031_inLine +BABEL_OP3_305_51611_20140619_070031_outLine +BABEL_OP3_305_51611_20140619_071006_inLine +BABEL_OP3_305_51611_20140619_071006_outLine +BABEL_OP3_305_51819_20150210_085538_inLine +BABEL_OP3_305_51819_20150210_085538_outLine +BABEL_OP3_305_52404_20150208_070706_inLine +BABEL_OP3_305_52404_20150208_070706_outLine +BABEL_OP3_305_52725_20150227_111722_inLine +BABEL_OP3_305_52725_20150227_111722_outLine +BABEL_OP3_305_52818_20150206_104316_inLine +BABEL_OP3_305_52818_20150206_104316_outLine +BABEL_OP3_305_52854_20140620_010725_inLine +BABEL_OP3_305_52854_20140620_010725_outLine +BABEL_OP3_305_53144_20150220_084533_inLine +BABEL_OP3_305_53144_20150220_084533_outLine +BABEL_OP3_305_54594_20150114_073509_inLine +BABEL_OP3_305_54594_20150114_073509_outLine +BABEL_OP3_305_55042_20140614_022059_inLine +BABEL_OP3_305_55042_20140614_022059_outLine +BABEL_OP3_305_55106_20150221_080452_inLine +BABEL_OP3_305_55106_20150221_080452_outLine +BABEL_OP3_305_55815_20140612_000452_inLine +BABEL_OP3_305_55815_20140612_000452_outLine +BABEL_OP3_305_55818_20140620_003329_inLine +BABEL_OP3_305_55818_20140620_003329_outLine +BABEL_OP3_305_56198_20140904_224843_inLine +BABEL_OP3_305_56198_20140904_224843_outLine +BABEL_OP3_305_57116_20140618_021028_inLine +BABEL_OP3_305_57116_20140618_021028_outLine +BABEL_OP3_305_57609_20150127_040742_inLine +BABEL_OP3_305_57609_20150127_040742_outLine +BABEL_OP3_305_57654_20140917_034820_inLine +BABEL_OP3_305_57654_20140917_034820_outLine +BABEL_OP3_305_57935_20150203_072757_inLine +BABEL_OP3_305_57935_20150203_072757_outLine +BABEL_OP3_305_58061_20150326_103607_inLine +BABEL_OP3_305_58061_20150326_103607_outLine +BABEL_OP3_305_58734_20140620_003259_inLine +BABEL_OP3_305_58734_20140620_003259_outLine +BABEL_OP3_305_59549_20140620_001253_inLine +BABEL_OP3_305_59549_20140620_001253_outLine +BABEL_OP3_305_59720_20140807_043323_inLine +BABEL_OP3_305_59720_20140807_043323_outLine +BABEL_OP3_305_60115_20150211_025109_inLine +BABEL_OP3_305_60115_20150211_025109_outLine +BABEL_OP3_305_60282_20140612_025229_inLine +BABEL_OP3_305_60282_20140612_025229_outLine +BABEL_OP3_305_60477_20150304_092057_inLine +BABEL_OP3_305_60477_20150304_092057_outLine +BABEL_OP3_305_60626_20141018_012739_inLine +BABEL_OP3_305_60626_20141018_012739_outLine +BABEL_OP3_305_60650_20150331_055502_inLine +BABEL_OP3_305_60650_20150331_055502_outLine +BABEL_OP3_305_60830_20141017_004525_inLine +BABEL_OP3_305_60830_20141017_004525_outLine +BABEL_OP3_305_60830_20141017_053807_inLine +BABEL_OP3_305_60830_20141017_053807_outLine +BABEL_OP3_305_61348_20141017_014818_inLine +BABEL_OP3_305_61348_20141017_014818_outLine +BABEL_OP3_305_61348_20141017_060653_inLine +BABEL_OP3_305_61348_20141017_060653_outLine +BABEL_OP3_305_61873_20150123_024415_inLine +BABEL_OP3_305_61873_20150123_024415_outLine +BABEL_OP3_305_62158_20150313_013514_inLine +BABEL_OP3_305_62158_20150313_013514_outLine +BABEL_OP3_305_62200_20141017_014602_inLine +BABEL_OP3_305_62200_20141017_014602_outLine +BABEL_OP3_305_62471_20140619_072350_inLine +BABEL_OP3_305_62471_20140619_072350_outLine +BABEL_OP3_305_62734_20140821_221916_inLine +BABEL_OP3_305_62734_20140821_221916_outLine +BABEL_OP3_305_62852_20140618_072924_inLine +BABEL_OP3_305_62852_20140618_072924_outLine +BABEL_OP3_305_63084_20150207_074116_inLine +BABEL_OP3_305_63084_20150207_074116_outLine +BABEL_OP3_305_64469_20140620_063122_inLine +BABEL_OP3_305_64469_20140620_063122_outLine +BABEL_OP3_305_64768_20140822_043008_inLine +BABEL_OP3_305_64768_20140822_043008_outLine +BABEL_OP3_305_64902_20150220_102326_inLine +BABEL_OP3_305_64902_20150220_102326_outLine +BABEL_OP3_305_65466_20150222_074001_inLine +BABEL_OP3_305_65466_20150222_074001_outLine +BABEL_OP3_305_65477_20141016_234600_inLine +BABEL_OP3_305_65477_20141016_234600_outLine +BABEL_OP3_305_65477_20141016_235812_inLine +BABEL_OP3_305_65477_20141016_235812_outLine +BABEL_OP3_305_65692_20150127_044937_inLine +BABEL_OP3_305_65692_20150127_044937_outLine +BABEL_OP3_305_66045_20140822_062953_inLine +BABEL_OP3_305_66045_20140822_062953_outLine +BABEL_OP3_305_66177_20150221_091456_inLine +BABEL_OP3_305_66177_20150221_091456_outLine +BABEL_OP3_305_66967_20140618_044613_inLine +BABEL_OP3_305_66967_20140618_044613_outLine +BABEL_OP3_305_66975_20140615_024703_inLine +BABEL_OP3_305_66975_20140615_024703_outLine +BABEL_OP3_305_67053_20150312_031258_inLine +BABEL_OP3_305_67053_20150312_031258_outLine +BABEL_OP3_305_67283_20140618_075016_inLine +BABEL_OP3_305_67283_20140618_075016_outLine +BABEL_OP3_305_67373_20140822_005349_inLine +BABEL_OP3_305_67373_20140822_005349_outLine +BABEL_OP3_305_67389_20150317_083510_inLine +BABEL_OP3_305_67389_20150317_083510_outLine +BABEL_OP3_305_67842_20140906_014501_inLine +BABEL_OP3_305_67842_20140906_014501_outLine +BABEL_OP3_305_68068_20150206_100103_inLine +BABEL_OP3_305_68068_20150206_100103_outLine +BABEL_OP3_305_68244_20150208_045135_inLine +BABEL_OP3_305_68244_20150208_045135_outLine +BABEL_OP3_305_68668_20140614_053023_inLine +BABEL_OP3_305_68668_20140614_053023_outLine +BABEL_OP3_305_69090_20141018_010121_inLine +BABEL_OP3_305_69090_20141018_010121_outLine +BABEL_OP3_305_69574_20140618_231512_inLine +BABEL_OP3_305_69574_20140618_231512_outLine +BABEL_OP3_305_70251_20140618_233739_inLine +BABEL_OP3_305_70251_20140618_233739_outLine +BABEL_OP3_305_70282_20150127_012555_inLine +BABEL_OP3_305_70282_20150127_012555_outLine +BABEL_OP3_305_70794_20140614_073231_inLine +BABEL_OP3_305_70794_20140614_073231_outLine +BABEL_OP3_305_70986_20150320_092518_inLine +BABEL_OP3_305_70986_20150320_092518_outLine +BABEL_OP3_305_71189_20150227_092723_inLine +BABEL_OP3_305_71189_20150227_092723_outLine +BABEL_OP3_305_71278_20140614_040622_inLine +BABEL_OP3_305_71278_20140614_040622_outLine +BABEL_OP3_305_71282_20141028_054244_inLine +BABEL_OP3_305_71282_20141028_054244_outLine +BABEL_OP3_305_71333_20140808_025232_inLine +BABEL_OP3_305_71333_20140808_025232_outLine +BABEL_OP3_305_71566_20150217_074338_inLine +BABEL_OP3_305_71566_20150217_074338_outLine +BABEL_OP3_305_72110_20150214_074424_inLine +BABEL_OP3_305_72110_20150214_074424_outLine +BABEL_OP3_305_72903_20140612_021516_inLine +BABEL_OP3_305_72903_20140612_021516_outLine +BABEL_OP3_305_73301_20140808_235747_inLine +BABEL_OP3_305_73301_20140808_235747_outLine +BABEL_OP3_305_74667_20140904_050532_inLine +BABEL_OP3_305_74667_20140904_050532_outLine +BABEL_OP3_305_74763_20140612_011204_inLine +BABEL_OP3_305_74763_20140612_011204_outLine +BABEL_OP3_305_74921_20150208_081422_inLine +BABEL_OP3_305_74921_20150208_081422_outLine +BABEL_OP3_305_75064_20140621_012128_inLine +BABEL_OP3_305_75064_20140621_012128_outLine +BABEL_OP3_305_75223_20140618_232223_inLine +BABEL_OP3_305_75223_20140618_232223_outLine +BABEL_OP3_305_76126_20150122_051345_inLine +BABEL_OP3_305_76126_20150122_051345_outLine +BABEL_OP3_305_76437_20140615_023448_inLine +BABEL_OP3_305_76437_20140615_023448_outLine +BABEL_OP3_305_77225_20140612_003002_inLine +BABEL_OP3_305_77225_20140612_003002_outLine +BABEL_OP3_305_77744_20140821_052246_inLine +BABEL_OP3_305_77744_20140821_052246_outLine +BABEL_OP3_305_77909_20140613_035103_inLine +BABEL_OP3_305_77909_20140613_035103_outLine +BABEL_OP3_305_78016_20140821_222210_inLine +BABEL_OP3_305_78016_20140821_222210_outLine +BABEL_OP3_305_79028_20140621_005114_inLine +BABEL_OP3_305_79028_20140621_005114_outLine +BABEL_OP3_305_79167_20150208_063508_inLine +BABEL_OP3_305_79167_20150208_063508_outLine +BABEL_OP3_305_79571_20150211_095226_inLine +BABEL_OP3_305_79571_20150211_095226_outLine +BABEL_OP3_305_79751_20140821_233858_inLine +BABEL_OP3_305_79751_20140821_233858_outLine +BABEL_OP3_305_80241_20140612_015921_inLine +BABEL_OP3_305_80241_20140612_015921_outLine +BABEL_OP3_305_80577_20150221_073930_inLine +BABEL_OP3_305_80577_20150221_073930_outLine +BABEL_OP3_305_81213_20140822_005322_inLine +BABEL_OP3_305_81213_20140822_005322_outLine +BABEL_OP3_305_82622_20140619_013825_inLine +BABEL_OP3_305_82622_20140619_013825_outLine +BABEL_OP3_305_82626_20150307_100633_inLine +BABEL_OP3_305_82626_20150307_100633_outLine +BABEL_OP3_305_83436_20140619_060309_inLine +BABEL_OP3_305_83436_20140619_060309_outLine +BABEL_OP3_305_83935_20150213_091523_inLine +BABEL_OP3_305_83935_20150213_091523_outLine +BABEL_OP3_305_84055_20150221_083133_inLine +BABEL_OP3_305_84055_20150221_083133_outLine +BABEL_OP3_305_84061_20140807_063818_inLine +BABEL_OP3_305_84061_20140807_063818_outLine +BABEL_OP3_305_84079_20140613_053813_inLine +BABEL_OP3_305_84079_20140613_053813_outLine +BABEL_OP3_305_84177_20141021_041721_inLine +BABEL_OP3_305_84177_20141021_041721_outLine +BABEL_OP3_305_84177_20141021_043314_inLine +BABEL_OP3_305_84177_20141021_043314_outLine +BABEL_OP3_305_84327_20150213_084928_inLine +BABEL_OP3_305_84327_20150213_084928_outLine +BABEL_OP3_305_84605_20140903_033325_inLine +BABEL_OP3_305_84605_20140903_033325_outLine +BABEL_OP3_305_84605_20140903_034415_inLine +BABEL_OP3_305_84605_20140903_034415_outLine +BABEL_OP3_305_84823_20150122_033347_inLine +BABEL_OP3_305_84823_20150122_033347_outLine +BABEL_OP3_305_84838_20141022_010706_inLine +BABEL_OP3_305_84838_20141022_010706_outLine +BABEL_OP3_305_85047_20150122_030457_inLine +BABEL_OP3_305_85047_20150122_030457_outLine +BABEL_OP3_305_85254_20150303_094409_inLine +BABEL_OP3_305_85254_20150303_094409_outLine +BABEL_OP3_305_85260_20140611_042042_inLine +BABEL_OP3_305_85260_20140611_042042_outLine +BABEL_OP3_305_85322_20140822_031748_inLine +BABEL_OP3_305_85322_20140822_031748_outLine +BABEL_OP3_305_85340_20140826_232921_inLine +BABEL_OP3_305_85340_20140826_232921_outLine +BABEL_OP3_305_85519_20141022_033125_inLine +BABEL_OP3_305_85519_20141022_033125_outLine +BABEL_OP3_305_86100_20140612_024853_inLine +BABEL_OP3_305_86100_20140612_024853_outLine +BABEL_OP3_305_86321_20150218_084806_inLine +BABEL_OP3_305_86321_20150218_084806_outLine +BABEL_OP3_305_86433_20150211_094926_inLine +BABEL_OP3_305_86433_20150211_094926_outLine +BABEL_OP3_305_86597_20150305_020834_inLine +BABEL_OP3_305_86597_20150305_020834_outLine +BABEL_OP3_305_86830_20141028_051738_inLine +BABEL_OP3_305_86830_20141028_051738_outLine +BABEL_OP3_305_86878_20150115_115301_inLine +BABEL_OP3_305_86878_20150115_115301_outLine +BABEL_OP3_305_87179_20141021_010357_inLine +BABEL_OP3_305_87179_20141021_010357_outLine +BABEL_OP3_305_88669_20150226_082958_inLine +BABEL_OP3_305_88669_20150226_082958_outLine +BABEL_OP3_305_89330_20150305_075359_inLine +BABEL_OP3_305_89330_20150305_075359_outLine +BABEL_OP3_305_89372_20140806_054633_inLine +BABEL_OP3_305_89372_20140806_054633_outLine +BABEL_OP3_305_89650_20140606_064449_inLine +BABEL_OP3_305_89650_20140606_064449_outLine +BABEL_OP3_305_90572_20150221_011508_inLine +BABEL_OP3_305_90572_20150221_011508_outLine +BABEL_OP3_305_90739_20140910_010202_inLine +BABEL_OP3_305_90739_20140910_010202_outLine +BABEL_OP3_305_90739_20140910_011127_inLine +BABEL_OP3_305_90739_20140910_011127_outLine +BABEL_OP3_305_90777_20140910_031558_inLine +BABEL_OP3_305_90777_20140910_031558_outLine +BABEL_OP3_305_90930_20140612_073132_inLine +BABEL_OP3_305_90930_20140612_073132_outLine +BABEL_OP3_305_91252_20140612_013640_inLine +BABEL_OP3_305_91252_20140612_013640_outLine +BABEL_OP3_305_91463_20150204_001924_inLine +BABEL_OP3_305_91463_20150204_001924_outLine +BABEL_OP3_305_91977_20150210_012536_inLine +BABEL_OP3_305_91977_20150210_012536_outLine +BABEL_OP3_305_92281_20150312_104117_inLine +BABEL_OP3_305_92281_20150312_104117_outLine +BABEL_OP3_305_92509_20140620_020408_inLine +BABEL_OP3_305_92509_20140620_020408_outLine +BABEL_OP3_305_92605_20150312_090817_inLine +BABEL_OP3_305_92605_20150312_090817_outLine +BABEL_OP3_305_92740_20150210_020753_inLine +BABEL_OP3_305_92740_20150210_020753_outLine +BABEL_OP3_305_92941_20140911_002247_inLine +BABEL_OP3_305_92941_20140911_002247_outLine +BABEL_OP3_305_93320_20150305_072620_inLine +BABEL_OP3_305_93320_20150305_072620_outLine +BABEL_OP3_305_93681_20141018_044334_inLine +BABEL_OP3_305_93681_20141018_044334_outLine +BABEL_OP3_305_93861_20150123_004904_inLine +BABEL_OP3_305_93861_20150123_004904_outLine +BABEL_OP3_305_94253_20140902_015125_inLine +BABEL_OP3_305_94253_20140902_015125_outLine +BABEL_OP3_305_94587_20150214_091538_inLine +BABEL_OP3_305_94587_20150214_091538_outLine +BABEL_OP3_305_94713_20150114_082431_inLine +BABEL_OP3_305_94713_20150114_082431_outLine +BABEL_OP3_305_95269_20140912_000910_inLine +BABEL_OP3_305_95269_20140912_000910_outLine +BABEL_OP3_305_95598_20140615_025323_inLine +BABEL_OP3_305_95598_20140615_025323_outLine +BABEL_OP3_305_95903_20150222_060503_inLine +BABEL_OP3_305_95903_20150222_060503_outLine +BABEL_OP3_305_95942_20150227_105233_inLine +BABEL_OP3_305_95942_20150227_105233_outLine +BABEL_OP3_305_96041_20140611_065313_inLine +BABEL_OP3_305_96041_20140611_065313_outLine +BABEL_OP3_305_96504_20140808_230422_inLine +BABEL_OP3_305_96504_20140808_230422_outLine +BABEL_OP3_305_96504_20140808_231336_inLine +BABEL_OP3_305_96504_20140808_231336_outLine +BABEL_OP3_305_97220_20150303_234352_inLine +BABEL_OP3_305_97220_20150303_234352_outLine +BABEL_OP3_305_97772_20140618_074519_inLine +BABEL_OP3_305_97772_20140618_074519_outLine +BABEL_OP3_305_97896_20140904_071346_inLine +BABEL_OP3_305_97896_20140904_071346_outLine +BABEL_OP3_305_98192_20150306_053152_inLine +BABEL_OP3_305_98192_20150306_053152_outLine +BABEL_OP3_305_98255_20150115_095803_inLine +BABEL_OP3_305_98255_20150115_095803_outLine +BABEL_OP3_305_98255_20150115_101856_inLine +BABEL_OP3_305_98255_20150115_101856_outLine +BABEL_OP3_305_98909_20140730_054930_inLine +BABEL_OP3_305_98909_20140730_054930_outLine +BABEL_OP3_305_98909_20140730_055859_inLine +BABEL_OP3_305_98909_20140730_055859_outLine +BABEL_OP3_305_99289_20150227_102036_inLine +BABEL_OP3_305_99289_20150227_102036_outLine +BABEL_OP3_305_99516_20140620_054149_inLine +BABEL_OP3_305_99516_20140620_054149_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list b/egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list new file mode 100644 index 00000000000..3b4e995995f --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list @@ -0,0 +1,525 @@ +BABEL_OP3_305_10002_20150327_045715_inLine +BABEL_OP3_305_10002_20150327_045715_outLine +BABEL_OP3_305_12846_20150711_092831_inLine +BABEL_OP3_305_12846_20150711_092831_outLine +BABEL_OP3_305_13561_20150122_000259_inLine +BABEL_OP3_305_13561_20150122_000259_outLine +BABEL_OP3_305_13792_20140619_010014_inLine +BABEL_OP3_305_13792_20140619_010014_outLine +BABEL_OP3_305_13909_20150709_071634_inLine +BABEL_OP3_305_13909_20150709_071634_outLine +BABEL_OP3_305_13929_20150429_060818_inLine +BABEL_OP3_305_13929_20150429_060818_outLine +BABEL_OP3_305_14179_20150211_104346_inLine +BABEL_OP3_305_14537_20150507_004514_inLine +BABEL_OP3_305_14537_20150507_004514_outLine +BABEL_OP3_305_14560_20150208_054722_inLine +BABEL_OP3_305_14575_20150501_043914_inLine +BABEL_OP3_305_14575_20150501_043914_outLine +BABEL_OP3_305_14807_20150124_062928_inLine +BABEL_OP3_305_14807_20150124_062928_outLine +BABEL_OP3_305_14875_20140808_063210_inLine +BABEL_OP3_305_14972_20150123_045130_inLine +BABEL_OP3_305_14972_20150123_045130_outLine +BABEL_OP3_305_15324_20150226_034700_inLine +BABEL_OP3_305_15324_20150226_034700_outLine +BABEL_OP3_305_15382_20150211_004401_inLine +BABEL_OP3_305_15382_20150211_004401_outLine +BABEL_OP3_305_15466_20150319_020617_inLine +BABEL_OP3_305_15466_20150319_020617_outLine +BABEL_OP3_305_15702_20150207_022910_inLine +BABEL_OP3_305_15869_20140613_063410_inLine +BABEL_OP3_305_15869_20140613_063410_outLine +BABEL_OP3_305_15985_20150712_053914_inLine +BABEL_OP3_305_15985_20150712_053914_outLine +BABEL_OP3_305_16056_20140618_060252_inLine +BABEL_OP3_305_16056_20140618_060252_outLine +BABEL_OP3_305_16802_20140613_064802_outLine +BABEL_OP3_305_16838_20150428_014210_inLine +BABEL_OP3_305_16838_20150428_014210_outLine +BABEL_OP3_305_16938_20150127_074437_inLine +BABEL_OP3_305_16938_20150127_074437_outLine +BABEL_OP3_305_17472_20150226_001559_inLine +BABEL_OP3_305_17472_20150226_001559_outLine +BABEL_OP3_305_17520_20150123_072609_inLine +BABEL_OP3_305_17520_20150123_072609_outLine +BABEL_OP3_305_17573_20150225_054303_inLine +BABEL_OP3_305_17573_20150225_054303_outLine +BABEL_OP3_305_17751_20150709_064430_inLine +BABEL_OP3_305_17751_20150709_064430_outLine +BABEL_OP3_305_19545_20150224_095516_inLine +BABEL_OP3_305_19545_20150224_095516_outLine +BABEL_OP3_305_19589_20150605_040559_inLine +BABEL_OP3_305_19589_20150605_040559_outLine +BABEL_OP3_305_19722_20140620_011143_inLine +BABEL_OP3_305_19722_20140620_011143_outLine +BABEL_OP3_305_19722_20140620_012427_inLine +BABEL_OP3_305_19722_20140620_012427_outLine +BABEL_OP3_305_20738_20150303_004715_inLine +BABEL_OP3_305_20738_20150303_004715_outLine +BABEL_OP3_305_21029_20140823_005012_outLine +BABEL_OP3_305_21426_20150317_013855_inLine +BABEL_OP3_305_21426_20150317_013855_outLine +BABEL_OP3_305_22170_20150219_024431_inLine +BABEL_OP3_305_22170_20150219_024431_outLine +BABEL_OP3_305_23395_20150124_020906_inLine +BABEL_OP3_305_23395_20150124_020906_outLine +BABEL_OP3_305_24037_20150408_020032_inLine +BABEL_OP3_305_24037_20150408_020032_outLine +BABEL_OP3_305_24270_20150127_065231_inLine +BABEL_OP3_305_24270_20150127_065231_outLine +BABEL_OP3_305_24648_20150720_024919_inLine +BABEL_OP3_305_24648_20150720_024919_outLine +BABEL_OP3_305_25698_20150713_041848_inLine +BABEL_OP3_305_25698_20150713_041848_outLine +BABEL_OP3_305_26074_20150123_054227_inLine +BABEL_OP3_305_26074_20150123_054227_outLine +BABEL_OP3_305_26507_20150228_085010_inLine +BABEL_OP3_305_26507_20150228_085010_outLine +BABEL_OP3_305_26869_20140611_062738_inLine +BABEL_OP3_305_26869_20140611_062738_outLine +BABEL_OP3_305_26999_20150211_213027_outLine +BABEL_OP3_305_27203_20150203_021148_inLine +BABEL_OP3_305_27203_20150203_021148_outLine +BABEL_OP3_305_28522_20150210_024545_inLine +BABEL_OP3_305_28522_20150210_024545_outLine +BABEL_OP3_305_28595_20150311_092304_inLine +BABEL_OP3_305_28595_20150311_092304_outLine +BABEL_OP3_305_28644_20150501_021643_inLine +BABEL_OP3_305_28644_20150501_021643_outLine +BABEL_OP3_305_28814_20141028_061920_inLine +BABEL_OP3_305_28814_20141028_061920_outLine +BABEL_OP3_305_29039_20150225_033135_inLine +BABEL_OP3_305_29039_20150225_033135_outLine +BABEL_OP3_305_29135_20140620_020910_inLine +BABEL_OP3_305_29135_20140620_020910_outLine +BABEL_OP3_305_29643_20150712_020443_inLine +BABEL_OP3_305_29643_20150712_020443_outLine +BABEL_OP3_305_29911_20150425_022101_inLine +BABEL_OP3_305_29911_20150425_022101_outLine +BABEL_OP3_305_30084_20150711_110851_inLine +BABEL_OP3_305_30084_20150711_110851_outLine +BABEL_OP3_305_30253_20150226_074731_inLine +BABEL_OP3_305_30345_20150801_030841_inLine +BABEL_OP3_305_30345_20150801_030841_outLine +BABEL_OP3_305_30395_20140913_031713_inLine +BABEL_OP3_305_30395_20140913_031713_outLine +BABEL_OP3_305_30395_20140913_033401_inLine +BABEL_OP3_305_30395_20140913_033401_outLine +BABEL_OP3_305_31109_20150201_061030_inLine +BABEL_OP3_305_31131_20150318_083818_inLine +BABEL_OP3_305_31184_20141016_042343_inLine +BABEL_OP3_305_31184_20141016_042343_outLine +BABEL_OP3_305_31490_20140618_043106_inLine +BABEL_OP3_305_31490_20140618_043106_outLine +BABEL_OP3_305_32171_20150313_090240_inLine +BABEL_OP3_305_32171_20150313_090240_outLine +BABEL_OP3_305_32244_20150508_010834_inLine +BABEL_OP3_305_32244_20150508_010834_outLine +BABEL_OP3_305_32630_20150508_025319_inLine +BABEL_OP3_305_32630_20150508_025319_outLine +BABEL_OP3_305_32959_20150218_010038_inLine +BABEL_OP3_305_32959_20150218_010038_outLine +BABEL_OP3_305_32961_20150312_083747_inLine +BABEL_OP3_305_32961_20150312_083747_outLine +BABEL_OP3_305_33216_20150305_093049_outLine +BABEL_OP3_305_34482_20140612_002439_inLine +BABEL_OP3_305_34482_20140612_002439_outLine +BABEL_OP3_305_34688_20140620_051303_inLine +BABEL_OP3_305_34688_20140620_051303_outLine +BABEL_OP3_305_34899_20150708_044950_inLine +BABEL_OP3_305_34899_20150708_044950_outLine +BABEL_OP3_305_34903_20150513_000213_inLine +BABEL_OP3_305_34903_20150513_000213_outLine +BABEL_OP3_305_35838_20150505_025409_inLine +BABEL_OP3_305_35838_20150505_025409_outLine +BABEL_OP3_305_36642_20150529_004314_inLine +BABEL_OP3_305_36642_20150529_004314_outLine +BABEL_OP3_305_37229_20150711_062628_inLine +BABEL_OP3_305_37229_20150711_062628_outLine +BABEL_OP3_305_37776_20141021_051359_inLine +BABEL_OP3_305_37776_20141021_051359_outLine +BABEL_OP3_305_38554_20140618_050525_inLine +BABEL_OP3_305_38554_20140618_050525_outLine +BABEL_OP3_305_38689_20150215_061537_inLine +BABEL_OP3_305_38689_20150215_061537_outLine +BABEL_OP3_305_38750_20150512_033350_outLine +BABEL_OP3_305_38878_20150226_001924_inLine +BABEL_OP3_305_38878_20150226_001924_outLine +BABEL_OP3_305_38979_20150222_070549_inLine +BABEL_OP3_305_38979_20150222_071202_inLine +BABEL_OP3_305_39006_20150305_001413_inLine +BABEL_OP3_305_39006_20150305_001413_outLine +BABEL_OP3_305_40330_20140613_044545_inLine +BABEL_OP3_305_40330_20140613_044545_outLine +BABEL_OP3_305_40648_20150425_034647_inLine +BABEL_OP3_305_40648_20150425_034647_outLine +BABEL_OP3_305_41720_20150327_013143_inLine +BABEL_OP3_305_42029_20141107_005557_inLine +BABEL_OP3_305_42029_20141107_005557_outLine +BABEL_OP3_305_42126_20150428_014342_inLine +BABEL_OP3_305_42126_20150428_014342_outLine +BABEL_OP3_305_42126_20150428_021652_inLine +BABEL_OP3_305_42126_20150428_021652_outLine +BABEL_OP3_305_42619_20150211_044149_inLine +BABEL_OP3_305_42619_20150211_044149_outLine +BABEL_OP3_305_42834_20150212_100155_inLine +BABEL_OP3_305_42834_20150212_100155_outLine +BABEL_OP3_305_42848_20150711_053624_inLine +BABEL_OP3_305_42848_20150711_053624_outLine +BABEL_OP3_305_43157_20150313_015446_inLine +BABEL_OP3_305_43157_20150313_015446_outLine +BABEL_OP3_305_43285_20150210_022647_inLine +BABEL_OP3_305_43323_20150719_100142_inLine +BABEL_OP3_305_43323_20150719_100142_outLine +BABEL_OP3_305_43794_20150712_055921_inLine +BABEL_OP3_305_43794_20150712_055921_outLine +BABEL_OP3_305_44309_20150221_054810_inLine +BABEL_OP3_305_44309_20150221_054810_outLine +BABEL_OP3_305_44681_20150506_011354_inLine +BABEL_OP3_305_44681_20150506_011354_outLine +BABEL_OP3_305_45699_20140621_010650_inLine +BABEL_OP3_305_45699_20140621_010650_outLine +BABEL_OP3_305_45771_20150509_034615_inLine +BABEL_OP3_305_45771_20150509_034615_outLine +BABEL_OP3_305_46974_20150214_020116_inLine +BABEL_OP3_305_46974_20150214_020116_outLine +BABEL_OP3_305_47309_20150409_072623_inLine +BABEL_OP3_305_47309_20150409_072623_outLine +BABEL_OP3_305_47405_20140612_010358_inLine +BABEL_OP3_305_47405_20140612_010358_outLine +BABEL_OP3_305_47451_20150226_004537_inLine +BABEL_OP3_305_47451_20150226_004537_outLine +BABEL_OP3_305_47866_20150221_013305_inLine +BABEL_OP3_305_47866_20150221_013305_outLine +BABEL_OP3_305_47866_20150221_014014_inLine +BABEL_OP3_305_47866_20150221_014014_outLine +BABEL_OP3_305_48016_20150306_064336_inLine +BABEL_OP3_305_48016_20150306_064336_outLine +BABEL_OP3_305_48299_20150325_094035_inLine +BABEL_OP3_305_48299_20150325_094035_outLine +BABEL_OP3_305_49775_20140618_071800_inLine +BABEL_OP3_305_49775_20140618_071800_outLine +BABEL_OP3_305_49912_20150713_052104_inLine +BABEL_OP3_305_49912_20150713_052104_outLine +BABEL_OP3_305_49945_20150507_042152_inLine +BABEL_OP3_305_49945_20150507_042152_outLine +BABEL_OP3_305_50726_20140620_231413_inLine +BABEL_OP3_305_50726_20140620_231413_outLine +BABEL_OP3_305_50779_20150124_073920_inLine +BABEL_OP3_305_50779_20150124_073920_outLine +BABEL_OP3_305_51414_20150508_035339_inLine +BABEL_OP3_305_51414_20150508_035339_outLine +BABEL_OP3_305_52058_20150425_021345_inLine +BABEL_OP3_305_52058_20150425_021345_outLine +BABEL_OP3_305_52070_20150708_053057_inLine +BABEL_OP3_305_52070_20150708_053057_outLine +BABEL_OP3_305_53063_20150227_005949_inLine +BABEL_OP3_305_53063_20150227_005949_outLine +BABEL_OP3_305_54066_20150314_023944_inLine +BABEL_OP3_305_55136_20150720_024100_inLine +BABEL_OP3_305_55136_20150720_024100_outLine +BABEL_OP3_305_56057_20140614_044506_inLine +BABEL_OP3_305_56057_20140614_044506_outLine +BABEL_OP3_305_56326_20150422_010519_inLine +BABEL_OP3_305_56326_20150422_010519_outLine +BABEL_OP3_305_56345_20150327_043440_inLine +BABEL_OP3_305_56465_20150306_050918_inLine +BABEL_OP3_305_56465_20150306_050918_outLine +BABEL_OP3_305_56674_20150501_040501_inLine +BABEL_OP3_305_56674_20150501_040501_outLine +BABEL_OP3_305_56684_20150801_003245_inLine +BABEL_OP3_305_56684_20150801_003245_outLine +BABEL_OP3_305_56951_20150501_022425_inLine +BABEL_OP3_305_56951_20150501_022425_outLine +BABEL_OP3_305_57093_20150122_014446_inLine +BABEL_OP3_305_57093_20150122_021223_inLine +BABEL_OP3_305_57782_20150310_044823_inLine +BABEL_OP3_305_57782_20150310_044823_outLine +BABEL_OP3_305_58047_20150124_055910_inLine +BABEL_OP3_305_58047_20150124_055910_outLine +BABEL_OP3_305_58313_20150124_015438_inLine +BABEL_OP3_305_58313_20150124_015438_outLine +BABEL_OP3_305_58489_20150217_090604_inLine +BABEL_OP3_305_58850_20141017_005516_inLine +BABEL_OP3_305_58850_20141017_010823_inLine +BABEL_OP3_305_59028_20150709_062445_inLine +BABEL_OP3_305_59028_20150709_062445_outLine +BABEL_OP3_305_59028_20150712_043120_inLine +BABEL_OP3_305_59028_20150712_043120_outLine +BABEL_OP3_305_59078_20150127_073310_inLine +BABEL_OP3_305_59078_20150127_073310_outLine +BABEL_OP3_305_59509_20150206_012130_inLine +BABEL_OP3_305_59509_20150206_013211_inLine +BABEL_OP3_305_59747_20140620_004831_inLine +BABEL_OP3_305_59747_20140620_004831_outLine +BABEL_OP3_305_60307_20150310_094538_inLine +BABEL_OP3_305_60307_20150310_094538_outLine +BABEL_OP3_305_60436_20150529_012621_inLine +BABEL_OP3_305_60436_20150529_012621_outLine +BABEL_OP3_305_60458_20150508_023847_inLine +BABEL_OP3_305_60458_20150508_023847_outLine +BABEL_OP3_305_60498_20150508_031033_inLine +BABEL_OP3_305_60498_20150508_031033_outLine +BABEL_OP3_305_60508_20140822_014453_inLine +BABEL_OP3_305_60778_20150425_005047_inLine +BABEL_OP3_305_60778_20150425_005047_outLine +BABEL_OP3_305_60836_20140809_005847_inLine +BABEL_OP3_305_61219_20140730_063954_inLine +BABEL_OP3_305_61219_20140730_063954_outLine +BABEL_OP3_305_61225_20140620_001221_inLine +BABEL_OP3_305_61225_20140620_001221_outLine +BABEL_OP3_305_61438_20150423_020808_inLine +BABEL_OP3_305_61438_20150423_020808_outLine +BABEL_OP3_305_61731_20140621_035703_inLine +BABEL_OP3_305_61731_20140621_035703_outLine +BABEL_OP3_305_61731_20140621_041145_inLine +BABEL_OP3_305_61731_20140621_041145_outLine +BABEL_OP3_305_62323_20140612_010032_inLine +BABEL_OP3_305_62323_20140612_010032_outLine +BABEL_OP3_305_62362_20150712_082552_inLine +BABEL_OP3_305_62362_20150712_082552_outLine +BABEL_OP3_305_62430_20150219_045422_inLine +BABEL_OP3_305_62430_20150219_045422_outLine +BABEL_OP3_305_62545_20150424_004115_inLine +BABEL_OP3_305_62545_20150424_004115_outLine +BABEL_OP3_305_63094_20150712_100827_inLine +BABEL_OP3_305_63094_20150712_100827_outLine +BABEL_OP3_305_63265_20140611_234727_inLine +BABEL_OP3_305_63265_20140611_234727_outLine +BABEL_OP3_305_63265_20140611_235803_inLine +BABEL_OP3_305_63265_20140611_235803_outLine +BABEL_OP3_305_63307_20150122_063418_inLine +BABEL_OP3_305_63307_20150122_063418_outLine +BABEL_OP3_305_63307_20150122_065933_inLine +BABEL_OP3_305_63307_20150122_065933_outLine +BABEL_OP3_305_63309_20150319_003832_inLine +BABEL_OP3_305_63309_20150319_003832_outLine +BABEL_OP3_305_63336_20140614_051945_inLine +BABEL_OP3_305_63336_20140614_051945_outLine +BABEL_OP3_305_63490_20150408_025018_inLine +BABEL_OP3_305_63490_20150408_025018_outLine +BABEL_OP3_305_63490_20150408_025711_inLine +BABEL_OP3_305_63490_20150408_025711_outLine +BABEL_OP3_305_63730_20150305_010517_inLine +BABEL_OP3_305_63730_20150305_010517_outLine +BABEL_OP3_305_63906_20150221_045610_inLine +BABEL_OP3_305_64259_20150719_092713_inLine +BABEL_OP3_305_64259_20150719_092713_outLine +BABEL_OP3_305_65077_20140801_012944_inLine +BABEL_OP3_305_65077_20140801_012944_outLine +BABEL_OP3_305_65561_20150214_033031_inLine +BABEL_OP3_305_65561_20150214_033031_outLine +BABEL_OP3_305_65639_20150428_024614_inLine +BABEL_OP3_305_65639_20150428_024614_outLine +BABEL_OP3_305_66001_20140620_042612_inLine +BABEL_OP3_305_66001_20140620_042612_outLine +BABEL_OP3_305_66361_20150320_085921_inLine +BABEL_OP3_305_66361_20150320_085921_outLine +BABEL_OP3_305_66959_20150225_060511_inLine +BABEL_OP3_305_66959_20150225_060511_outLine +BABEL_OP3_305_66971_20150507_025406_inLine +BABEL_OP3_305_66971_20150507_025406_outLine +BABEL_OP3_305_67085_20150522_035734_inLine +BABEL_OP3_305_67085_20150522_035734_outLine +BABEL_OP3_305_68924_20150203_052345_inLine +BABEL_OP3_305_68924_20150203_052345_outLine +BABEL_OP3_305_69107_20150123_023939_inLine +BABEL_OP3_305_69107_20150123_023939_outLine +BABEL_OP3_305_69474_20150217_095752_inLine +BABEL_OP3_305_69474_20150217_095752_outLine +BABEL_OP3_305_70216_20150418_044143_inLine +BABEL_OP3_305_70216_20150418_044143_outLine +BABEL_OP3_305_70216_20150418_045222_inLine +BABEL_OP3_305_70216_20150418_045222_outLine +BABEL_OP3_305_70343_20150213_010739_inLine +BABEL_OP3_305_70343_20150213_010739_outLine +BABEL_OP3_305_71067_20150206_022645_inLine +BABEL_OP3_305_71067_20150206_022645_outLine +BABEL_OP3_305_71704_20140730_042541_inLine +BABEL_OP3_305_71704_20140730_042541_outLine +BABEL_OP3_305_72040_20140905_002224_inLine +BABEL_OP3_305_72040_20140905_002224_outLine +BABEL_OP3_305_72952_20150712_063306_inLine +BABEL_OP3_305_72952_20150712_063306_outLine +BABEL_OP3_305_73299_20150712_044814_inLine +BABEL_OP3_305_73299_20150712_044814_outLine +BABEL_OP3_305_73305_20150328_030752_inLine +BABEL_OP3_305_73305_20150328_030752_outLine +BABEL_OP3_305_73814_20150207_014107_inLine +BABEL_OP3_305_74226_20150211_232229_inLine +BABEL_OP3_305_74226_20150211_232229_outLine +BABEL_OP3_305_74886_20140620_052822_inLine +BABEL_OP3_305_74886_20140620_052822_outLine +BABEL_OP3_305_75342_20150513_235657_inLine +BABEL_OP3_305_75342_20150513_235657_outLine +BABEL_OP3_305_75366_20150310_042904_inLine +BABEL_OP3_305_75366_20150310_042904_outLine +BABEL_OP3_305_75460_20150711_021713_inLine +BABEL_OP3_305_75460_20150711_021713_outLine +BABEL_OP3_305_76730_20140729_052201_inLine +BABEL_OP3_305_76773_20140823_031314_inLine +BABEL_OP3_305_76773_20140823_031314_outLine +BABEL_OP3_305_76902_20150320_043734_inLine +BABEL_OP3_305_76902_20150320_043734_outLine +BABEL_OP3_305_77730_20140730_051628_inLine +BABEL_OP3_305_77730_20140730_051628_outLine +BABEL_OP3_305_77832_20150317_003741_inLine +BABEL_OP3_305_77832_20150317_003741_outLine +BABEL_OP3_305_78116_20150213_013547_inLine +BABEL_OP3_305_78116_20150213_013547_outLine +BABEL_OP3_305_78194_20140618_010449_inLine +BABEL_OP3_305_78194_20140618_010449_outLine +BABEL_OP3_305_78254_20140801_003005_inLine +BABEL_OP3_305_78254_20140801_003005_outLine +BABEL_OP3_305_78454_20150127_025616_inLine +BABEL_OP3_305_78454_20150127_025616_outLine +BABEL_OP3_305_78511_20150225_034550_inLine +BABEL_OP3_305_78511_20150225_034550_outLine +BABEL_OP3_305_78877_20150428_004749_inLine +BABEL_OP3_305_78877_20150428_004749_outLine +BABEL_OP3_305_79429_20150319_013246_inLine +BABEL_OP3_305_79429_20150319_013246_outLine +BABEL_OP3_305_79660_20150712_042549_inLine +BABEL_OP3_305_79660_20150712_042549_outLine +BABEL_OP3_305_79898_20150307_091426_inLine +BABEL_OP3_305_79898_20150307_091426_outLine +BABEL_OP3_305_79898_20150307_093317_inLine +BABEL_OP3_305_79898_20150307_093317_outLine +BABEL_OP3_305_80559_20140731_042258_inLine +BABEL_OP3_305_80559_20140731_042258_outLine +BABEL_OP3_305_80989_20150712_091615_inLine +BABEL_OP3_305_80989_20150712_091615_outLine +BABEL_OP3_305_81433_20150127_070550_inLine +BABEL_OP3_305_81433_20150127_070550_outLine +BABEL_OP3_305_81810_20150208_075542_inLine +BABEL_OP3_305_81810_20150208_075542_outLine +BABEL_OP3_305_82145_20150301_063108_inLine +BABEL_OP3_305_82145_20150301_063108_outLine +BABEL_OP3_305_82145_20150301_064502_inLine +BABEL_OP3_305_82145_20150301_064502_outLine +BABEL_OP3_305_82425_20140620_053637_inLine +BABEL_OP3_305_82425_20140620_053637_outLine +BABEL_OP3_305_82742_20150124_054325_inLine +BABEL_OP3_305_82863_20141021_023356_inLine +BABEL_OP3_305_82863_20141021_023356_outLine +BABEL_OP3_305_83545_20150605_042852_inLine +BABEL_OP3_305_83545_20150605_042852_outLine +BABEL_OP3_305_83771_20150509_012937_inLine +BABEL_OP3_305_83771_20150509_012937_outLine +BABEL_OP3_305_83771_20150509_013635_inLine +BABEL_OP3_305_83771_20150509_013635_outLine +BABEL_OP3_305_83813_20150429_053518_inLine +BABEL_OP3_305_83813_20150429_053518_outLine +BABEL_OP3_305_84125_20140614_072153_inLine +BABEL_OP3_305_84125_20140614_072153_outLine +BABEL_OP3_305_84583_20150123_062012_inLine +BABEL_OP3_305_84583_20150123_062012_outLine +BABEL_OP3_305_85010_20150327_022501_inLine +BABEL_OP3_305_85010_20150327_022501_outLine +BABEL_OP3_305_85048_20150124_074706_inLine +BABEL_OP3_305_85048_20150124_074706_outLine +BABEL_OP3_305_85246_20150317_090655_inLine +BABEL_OP3_305_85246_20150317_090655_outLine +BABEL_OP3_305_85246_20150317_091545_inLine +BABEL_OP3_305_85246_20150317_091545_outLine +BABEL_OP3_305_85647_20150124_020413_inLine +BABEL_OP3_305_85647_20150124_020413_outLine +BABEL_OP3_305_85647_20150124_021612_inLine +BABEL_OP3_305_85647_20150124_021612_outLine +BABEL_OP3_305_86628_20150709_074216_inLine +BABEL_OP3_305_86628_20150709_074216_outLine +BABEL_OP3_305_86826_20150711_081659_inLine +BABEL_OP3_305_86826_20150711_081659_outLine +BABEL_OP3_305_87629_20150123_042545_inLine +BABEL_OP3_305_87629_20150123_042545_outLine +BABEL_OP3_305_87731_20150720_063702_inLine +BABEL_OP3_305_87731_20150720_063702_outLine +BABEL_OP3_305_87884_20150218_232216_inLine +BABEL_OP3_305_87884_20150218_232216_outLine +BABEL_OP3_305_88445_20150208_035054_inLine +BABEL_OP3_305_88673_20150719_085433_inLine +BABEL_OP3_305_88673_20150719_085433_outLine +BABEL_OP3_305_89203_20150802_011814_inLine +BABEL_OP3_305_89203_20150802_011814_outLine +BABEL_OP3_305_89888_20140730_034633_inLine +BABEL_OP3_305_89888_20140730_034633_outLine +BABEL_OP3_305_90417_20150721_002233_inLine +BABEL_OP3_305_90417_20150721_002233_outLine +BABEL_OP3_305_90740_20150326_035521_inLine +BABEL_OP3_305_90740_20150326_035521_outLine +BABEL_OP3_305_91189_20150708_000127_inLine +BABEL_OP3_305_91189_20150708_000127_outLine +BABEL_OP3_305_91336_20150122_061156_inLine +BABEL_OP3_305_91336_20150122_061156_outLine +BABEL_OP3_305_91411_20150425_040553_inLine +BABEL_OP3_305_91411_20150425_040553_outLine +BABEL_OP3_305_91760_20150508_021256_inLine +BABEL_OP3_305_91760_20150508_021256_outLine +BABEL_OP3_305_91891_20150214_084516_inLine +BABEL_OP3_305_91891_20150214_084516_outLine +BABEL_OP3_305_91930_20150219_015305_inLine +BABEL_OP3_305_91930_20150219_015305_outLine +BABEL_OP3_305_91930_20150219_015722_inLine +BABEL_OP3_305_91930_20150219_015722_outLine +BABEL_OP3_305_92065_20150227_020241_inLine +BABEL_OP3_305_92077_20150528_054240_inLine +BABEL_OP3_305_92077_20150528_054240_outLine +BABEL_OP3_305_92096_20150224_003248_inLine +BABEL_OP3_305_92096_20150224_003248_outLine +BABEL_OP3_305_92252_20140619_005416_inLine +BABEL_OP3_305_92252_20140619_005416_outLine +BABEL_OP3_305_92792_20150220_033116_inLine +BABEL_OP3_305_93222_20150325_030317_inLine +BABEL_OP3_305_93222_20150325_030317_outLine +BABEL_OP3_305_93515_20150528_011902_inLine +BABEL_OP3_305_93515_20150528_011902_outLine +BABEL_OP3_305_93604_20150522_043957_inLine +BABEL_OP3_305_93604_20150522_043957_outLine +BABEL_OP3_305_93632_20150730_105129_inLine +BABEL_OP3_305_93632_20150730_105129_outLine +BABEL_OP3_305_93964_20150122_021516_inLine +BABEL_OP3_305_93964_20150122_021516_outLine +BABEL_OP3_305_93964_20150122_024514_inLine +BABEL_OP3_305_93964_20150122_024514_outLine +BABEL_OP3_305_93964_20150122_025759_inLine +BABEL_OP3_305_93964_20150122_025759_outLine +BABEL_OP3_305_94035_20150429_013519_inLine +BABEL_OP3_305_94035_20150429_013519_outLine +BABEL_OP3_305_94212_20150425_011456_inLine +BABEL_OP3_305_94212_20150425_011456_outLine +BABEL_OP3_305_94442_20150507_034412_inLine +BABEL_OP3_305_94442_20150507_034412_outLine +BABEL_OP3_305_94803_20150317_093455_inLine +BABEL_OP3_305_94803_20150317_093455_outLine +BABEL_OP3_305_94869_20140619_054259_inLine +BABEL_OP3_305_94891_20150720_080832_inLine +BABEL_OP3_305_94891_20150720_080832_outLine +BABEL_OP3_305_94923_20150123_064032_inLine +BABEL_OP3_305_94923_20150123_064032_outLine +BABEL_OP3_305_95028_20150320_013045_inLine +BABEL_OP3_305_95028_20150320_013944_inLine +BABEL_OP3_305_95294_20150207_015416_inLine +BABEL_OP3_305_95294_20150207_020517_inLine +BABEL_OP3_305_95571_20150326_084852_inLine +BABEL_OP3_305_95571_20150326_084852_outLine +BABEL_OP3_305_96405_20140621_013139_inLine +BABEL_OP3_305_96405_20140621_013139_outLine +BABEL_OP3_305_96405_20140621_015225_inLine +BABEL_OP3_305_96405_20140621_015225_outLine +BABEL_OP3_305_96584_20141107_045031_inLine +BABEL_OP3_305_96584_20141107_045031_outLine +BABEL_OP3_305_96808_20150507_011006_inLine +BABEL_OP3_305_96808_20150507_011006_outLine +BABEL_OP3_305_96940_20150320_051125_inLine +BABEL_OP3_305_96940_20150320_051125_outLine +BABEL_OP3_305_97136_20150224_085912_inLine +BABEL_OP3_305_97136_20150224_085912_outLine +BABEL_OP3_305_97731_20150731_083617_inLine +BABEL_OP3_305_97731_20150731_083617_outLine +BABEL_OP3_305_98390_20140619_004932_inLine +BABEL_OP3_305_98390_20140619_004932_outLine +BABEL_OP3_305_99813_20150127_075030_inLine +BABEL_OP3_305_99813_20150127_075030_outLine +BABEL_OP3_305_99887_20141028_055805_inLine +BABEL_OP3_305_99887_20141028_055805_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/dev.2h.list b/egs/babel/s5d/conf/lists/306-igbo/dev.2h.list new file mode 100644 index 00000000000..cf0824db01d --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/dev.2h.list @@ -0,0 +1,136 @@ +BABEL_OP3_306_10036_20140729_233849_inLine +BABEL_OP3_306_10036_20140729_233849_outLine +BABEL_OP3_306_10036_20140729_234612_inLine +BABEL_OP3_306_10036_20140729_234612_outLine +BABEL_OP3_306_11681_20140620_015031_inLine +BABEL_OP3_306_11681_20140620_015031_outLine +BABEL_OP3_306_11681_20140620_020405_inLine +BABEL_OP3_306_11681_20140620_020405_outLine +BABEL_OP3_306_13427_20140810_232413_inLine +BABEL_OP3_306_13427_20140810_232413_outLine +BABEL_OP3_306_13744_20150303_033441_inLine +BABEL_OP3_306_13744_20150303_033441_outLine +BABEL_OP3_306_19722_20150304_045710_inLine +BABEL_OP3_306_19782_20141026_011352_inLine +BABEL_OP3_306_19782_20141026_011352_outLine +BABEL_OP3_306_19818_20140801_211130_inLine +BABEL_OP3_306_19818_20140801_211130_outLine +BABEL_OP3_306_21807_20150310_215245_inLine +BABEL_OP3_306_21807_20150310_215245_outLine +BABEL_OP3_306_23098_20150410_035508_inLine +BABEL_OP3_306_23098_20150410_035508_outLine +BABEL_OP3_306_25961_20140607_021757_inLine +BABEL_OP3_306_25961_20140607_021757_outLine +BABEL_OP3_306_28419_20140606_201307_inLine +BABEL_OP3_306_28419_20140606_201307_outLine +BABEL_OP3_306_29023_20140614_002447_inLine +BABEL_OP3_306_29023_20140614_002447_outLine +BABEL_OP3_306_33497_20140730_031414_inLine +BABEL_OP3_306_33497_20140730_031414_outLine +BABEL_OP3_306_33497_20140803_034655_inLine +BABEL_OP3_306_33497_20140803_034655_outLine +BABEL_OP3_306_34197_20140520_215059_inLine +BABEL_OP3_306_34197_20140520_215059_outLine +BABEL_OP3_306_35420_20140527_001314_inLine +BABEL_OP3_306_35420_20140527_001314_outLine +BABEL_OP3_306_36990_20140803_235016_inLine +BABEL_OP3_306_36990_20140803_235016_outLine +BABEL_OP3_306_36990_20140804_000605_inLine +BABEL_OP3_306_36990_20140804_000605_outLine +BABEL_OP3_306_39744_20140514_001627_inLine +BABEL_OP3_306_39744_20140514_001627_outLine +BABEL_OP3_306_40740_20141030_012619_inLine +BABEL_OP3_306_40740_20141030_012619_outLine +BABEL_OP3_306_44347_20141028_001614_inLine +BABEL_OP3_306_44347_20141028_001614_outLine +BABEL_OP3_306_47882_20140524_204056_inLine +BABEL_OP3_306_47882_20140524_204056_outLine +BABEL_OP3_306_50427_20140805_190819_inLine +BABEL_OP3_306_50427_20140805_190819_outLine +BABEL_OP3_306_50726_20140521_235356_inLine +BABEL_OP3_306_50726_20140521_235356_outLine +BABEL_OP3_306_51417_20141103_210924_inLine +BABEL_OP3_306_51417_20141103_210924_outLine +BABEL_OP3_306_52301_20140607_003158_inLine +BABEL_OP3_306_52301_20140607_003158_outLine +BABEL_OP3_306_53842_20140905_005627_inLine +BABEL_OP3_306_53842_20140905_005627_outLine +BABEL_OP3_306_54530_20141006_030910_inLine +BABEL_OP3_306_54530_20141006_030910_outLine +BABEL_OP3_306_56677_20141007_020945_inLine +BABEL_OP3_306_56677_20141007_020945_outLine +BABEL_OP3_306_57141_20141026_224125_inLine +BABEL_OP3_306_57141_20141026_224125_outLine +BABEL_OP3_306_58107_20140805_204322_inLine +BABEL_OP3_306_58107_20140805_204322_outLine +BABEL_OP3_306_58585_20141028_233305_inLine +BABEL_OP3_306_58585_20141028_233305_outLine +BABEL_OP3_306_59635_20141031_194036_inLine +BABEL_OP3_306_59635_20141031_194036_outLine +BABEL_OP3_306_60508_20140521_055301_inLine +BABEL_OP3_306_60508_20140521_055301_outLine +BABEL_OP3_306_60778_20140527_195205_inLine +BABEL_OP3_306_60778_20140527_195205_outLine +BABEL_OP3_306_63334_20150216_005033_inLine +BABEL_OP3_306_63334_20150216_005033_outLine +BABEL_OP3_306_63490_20140524_215813_inLine +BABEL_OP3_306_63490_20140524_215813_outLine +BABEL_OP3_306_64722_20141223_013811_inLine +BABEL_OP3_306_64722_20141223_013811_outLine +BABEL_OP3_306_66959_20141031_215547_inLine +BABEL_OP3_306_66959_20141031_215547_outLine +BABEL_OP3_306_68289_20141113_024309_inLine +BABEL_OP3_306_68289_20141113_024309_outLine +BABEL_OP3_306_69636_20140804_020846_inLine +BABEL_OP3_306_69636_20140804_020846_outLine +BABEL_OP3_306_71047_20141028_021029_inLine +BABEL_OP3_306_71047_20141028_021029_outLine +BABEL_OP3_306_71460_20150215_025120_inLine +BABEL_OP3_306_71460_20150215_025120_outLine +BABEL_OP3_306_76756_20140803_011009_inLine +BABEL_OP3_306_76756_20140803_011009_outLine +BABEL_OP3_306_76756_20140803_011841_inLine +BABEL_OP3_306_76756_20140803_011841_outLine +BABEL_OP3_306_76756_20140803_012244_inLine +BABEL_OP3_306_76756_20140803_012244_outLine +BABEL_OP3_306_77112_20140609_224704_inLine +BABEL_OP3_306_77112_20140609_224704_outLine +BABEL_OP3_306_77803_20140517_202422_inLine +BABEL_OP3_306_77803_20140517_202422_outLine +BABEL_OP3_306_79451_20140608_012042_inLine +BABEL_OP3_306_79451_20140608_012042_outLine +BABEL_OP3_306_79723_20150331_184104_inLine +BABEL_OP3_306_79723_20150331_184104_outLine +BABEL_OP3_306_79995_20141025_230126_inLine +BABEL_OP3_306_79995_20141025_230126_outLine +BABEL_OP3_306_82145_20141223_031926_inLine +BABEL_OP3_306_82145_20141223_031926_outLine +BABEL_OP3_306_83455_20140804_235008_inLine +BABEL_OP3_306_83455_20140804_235008_outLine +BABEL_OP3_306_83643_20150404_031037_inLine +BABEL_OP3_306_83643_20150404_031037_outLine +BABEL_OP3_306_84079_20150402_221122_inLine +BABEL_OP3_306_84079_20150402_221122_outLine +BABEL_OP3_306_87280_20141026_002639_inLine +BABEL_OP3_306_87280_20141026_002639_outLine +BABEL_OP3_306_87298_20140609_033909_inLine +BABEL_OP3_306_87298_20140609_033909_outLine +BABEL_OP3_306_87313_20140802_000850_inLine +BABEL_OP3_306_87313_20140802_000850_outLine +BABEL_OP3_306_87313_20140802_001509_inLine +BABEL_OP3_306_87313_20140802_001509_outLine +BABEL_OP3_306_87313_20140802_002411_inLine +BABEL_OP3_306_87313_20140802_002411_outLine +BABEL_OP3_306_88925_20141025_235636_inLine +BABEL_OP3_306_88925_20141025_235636_outLine +BABEL_OP3_306_92176_20140803_000102_inLine +BABEL_OP3_306_92176_20140803_000102_outLine +BABEL_OP3_306_94035_20140528_224527_inLine +BABEL_OP3_306_94035_20140528_224527_outLine +BABEL_OP3_306_94212_20140525_012758_inLine +BABEL_OP3_306_94212_20140525_012758_outLine +BABEL_OP3_306_95077_20141031_230550_inLine +BABEL_OP3_306_95077_20141031_230550_outLine +BABEL_OP3_306_95294_20140808_012803_inLine +BABEL_OP3_306_95663_20140513_213124_inLine +BABEL_OP3_306_95663_20140513_213124_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/dev.list b/egs/babel/s5d/conf/lists/306-igbo/dev.list new file mode 100644 index 00000000000..cf0824db01d --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/dev.list @@ -0,0 +1,136 @@ +BABEL_OP3_306_10036_20140729_233849_inLine +BABEL_OP3_306_10036_20140729_233849_outLine +BABEL_OP3_306_10036_20140729_234612_inLine +BABEL_OP3_306_10036_20140729_234612_outLine +BABEL_OP3_306_11681_20140620_015031_inLine +BABEL_OP3_306_11681_20140620_015031_outLine +BABEL_OP3_306_11681_20140620_020405_inLine +BABEL_OP3_306_11681_20140620_020405_outLine +BABEL_OP3_306_13427_20140810_232413_inLine +BABEL_OP3_306_13427_20140810_232413_outLine +BABEL_OP3_306_13744_20150303_033441_inLine +BABEL_OP3_306_13744_20150303_033441_outLine +BABEL_OP3_306_19722_20150304_045710_inLine +BABEL_OP3_306_19782_20141026_011352_inLine +BABEL_OP3_306_19782_20141026_011352_outLine +BABEL_OP3_306_19818_20140801_211130_inLine +BABEL_OP3_306_19818_20140801_211130_outLine +BABEL_OP3_306_21807_20150310_215245_inLine +BABEL_OP3_306_21807_20150310_215245_outLine +BABEL_OP3_306_23098_20150410_035508_inLine +BABEL_OP3_306_23098_20150410_035508_outLine +BABEL_OP3_306_25961_20140607_021757_inLine +BABEL_OP3_306_25961_20140607_021757_outLine +BABEL_OP3_306_28419_20140606_201307_inLine +BABEL_OP3_306_28419_20140606_201307_outLine +BABEL_OP3_306_29023_20140614_002447_inLine +BABEL_OP3_306_29023_20140614_002447_outLine +BABEL_OP3_306_33497_20140730_031414_inLine +BABEL_OP3_306_33497_20140730_031414_outLine +BABEL_OP3_306_33497_20140803_034655_inLine +BABEL_OP3_306_33497_20140803_034655_outLine +BABEL_OP3_306_34197_20140520_215059_inLine +BABEL_OP3_306_34197_20140520_215059_outLine +BABEL_OP3_306_35420_20140527_001314_inLine +BABEL_OP3_306_35420_20140527_001314_outLine +BABEL_OP3_306_36990_20140803_235016_inLine +BABEL_OP3_306_36990_20140803_235016_outLine +BABEL_OP3_306_36990_20140804_000605_inLine +BABEL_OP3_306_36990_20140804_000605_outLine +BABEL_OP3_306_39744_20140514_001627_inLine +BABEL_OP3_306_39744_20140514_001627_outLine +BABEL_OP3_306_40740_20141030_012619_inLine +BABEL_OP3_306_40740_20141030_012619_outLine +BABEL_OP3_306_44347_20141028_001614_inLine +BABEL_OP3_306_44347_20141028_001614_outLine +BABEL_OP3_306_47882_20140524_204056_inLine +BABEL_OP3_306_47882_20140524_204056_outLine +BABEL_OP3_306_50427_20140805_190819_inLine +BABEL_OP3_306_50427_20140805_190819_outLine +BABEL_OP3_306_50726_20140521_235356_inLine +BABEL_OP3_306_50726_20140521_235356_outLine +BABEL_OP3_306_51417_20141103_210924_inLine +BABEL_OP3_306_51417_20141103_210924_outLine +BABEL_OP3_306_52301_20140607_003158_inLine +BABEL_OP3_306_52301_20140607_003158_outLine +BABEL_OP3_306_53842_20140905_005627_inLine +BABEL_OP3_306_53842_20140905_005627_outLine +BABEL_OP3_306_54530_20141006_030910_inLine +BABEL_OP3_306_54530_20141006_030910_outLine +BABEL_OP3_306_56677_20141007_020945_inLine +BABEL_OP3_306_56677_20141007_020945_outLine +BABEL_OP3_306_57141_20141026_224125_inLine +BABEL_OP3_306_57141_20141026_224125_outLine +BABEL_OP3_306_58107_20140805_204322_inLine +BABEL_OP3_306_58107_20140805_204322_outLine +BABEL_OP3_306_58585_20141028_233305_inLine +BABEL_OP3_306_58585_20141028_233305_outLine +BABEL_OP3_306_59635_20141031_194036_inLine +BABEL_OP3_306_59635_20141031_194036_outLine +BABEL_OP3_306_60508_20140521_055301_inLine +BABEL_OP3_306_60508_20140521_055301_outLine +BABEL_OP3_306_60778_20140527_195205_inLine +BABEL_OP3_306_60778_20140527_195205_outLine +BABEL_OP3_306_63334_20150216_005033_inLine +BABEL_OP3_306_63334_20150216_005033_outLine +BABEL_OP3_306_63490_20140524_215813_inLine +BABEL_OP3_306_63490_20140524_215813_outLine +BABEL_OP3_306_64722_20141223_013811_inLine +BABEL_OP3_306_64722_20141223_013811_outLine +BABEL_OP3_306_66959_20141031_215547_inLine +BABEL_OP3_306_66959_20141031_215547_outLine +BABEL_OP3_306_68289_20141113_024309_inLine +BABEL_OP3_306_68289_20141113_024309_outLine +BABEL_OP3_306_69636_20140804_020846_inLine +BABEL_OP3_306_69636_20140804_020846_outLine +BABEL_OP3_306_71047_20141028_021029_inLine +BABEL_OP3_306_71047_20141028_021029_outLine +BABEL_OP3_306_71460_20150215_025120_inLine +BABEL_OP3_306_71460_20150215_025120_outLine +BABEL_OP3_306_76756_20140803_011009_inLine +BABEL_OP3_306_76756_20140803_011009_outLine +BABEL_OP3_306_76756_20140803_011841_inLine +BABEL_OP3_306_76756_20140803_011841_outLine +BABEL_OP3_306_76756_20140803_012244_inLine +BABEL_OP3_306_76756_20140803_012244_outLine +BABEL_OP3_306_77112_20140609_224704_inLine +BABEL_OP3_306_77112_20140609_224704_outLine +BABEL_OP3_306_77803_20140517_202422_inLine +BABEL_OP3_306_77803_20140517_202422_outLine +BABEL_OP3_306_79451_20140608_012042_inLine +BABEL_OP3_306_79451_20140608_012042_outLine +BABEL_OP3_306_79723_20150331_184104_inLine +BABEL_OP3_306_79723_20150331_184104_outLine +BABEL_OP3_306_79995_20141025_230126_inLine +BABEL_OP3_306_79995_20141025_230126_outLine +BABEL_OP3_306_82145_20141223_031926_inLine +BABEL_OP3_306_82145_20141223_031926_outLine +BABEL_OP3_306_83455_20140804_235008_inLine +BABEL_OP3_306_83455_20140804_235008_outLine +BABEL_OP3_306_83643_20150404_031037_inLine +BABEL_OP3_306_83643_20150404_031037_outLine +BABEL_OP3_306_84079_20150402_221122_inLine +BABEL_OP3_306_84079_20150402_221122_outLine +BABEL_OP3_306_87280_20141026_002639_inLine +BABEL_OP3_306_87280_20141026_002639_outLine +BABEL_OP3_306_87298_20140609_033909_inLine +BABEL_OP3_306_87298_20140609_033909_outLine +BABEL_OP3_306_87313_20140802_000850_inLine +BABEL_OP3_306_87313_20140802_000850_outLine +BABEL_OP3_306_87313_20140802_001509_inLine +BABEL_OP3_306_87313_20140802_001509_outLine +BABEL_OP3_306_87313_20140802_002411_inLine +BABEL_OP3_306_87313_20140802_002411_outLine +BABEL_OP3_306_88925_20141025_235636_inLine +BABEL_OP3_306_88925_20141025_235636_outLine +BABEL_OP3_306_92176_20140803_000102_inLine +BABEL_OP3_306_92176_20140803_000102_outLine +BABEL_OP3_306_94035_20140528_224527_inLine +BABEL_OP3_306_94035_20140528_224527_outLine +BABEL_OP3_306_94212_20140525_012758_inLine +BABEL_OP3_306_94212_20140525_012758_outLine +BABEL_OP3_306_95077_20141031_230550_inLine +BABEL_OP3_306_95077_20141031_230550_outLine +BABEL_OP3_306_95294_20140808_012803_inLine +BABEL_OP3_306_95663_20140513_213124_inLine +BABEL_OP3_306_95663_20140513_213124_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/eval.list b/egs/babel/s5d/conf/lists/306-igbo/eval.list new file mode 100644 index 00000000000..c9db48fb2e1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/eval.list @@ -0,0 +1,194 @@ +BABEL_OP3_306_11673_20140513_040551_inLine +BABEL_OP3_306_11673_20140513_040551_outLine +BABEL_OP3_306_12321_20141027_232351_inLine +BABEL_OP3_306_12321_20141027_232351_outLine +BABEL_OP3_306_12635_20141101_005451_inLine +BABEL_OP3_306_12635_20141101_005451_outLine +BABEL_OP3_306_13490_20140802_230433_inLine +BABEL_OP3_306_13490_20140802_230433_outLine +BABEL_OP3_306_13490_20140802_232130_inLine +BABEL_OP3_306_13490_20140802_232130_outLine +BABEL_OP3_306_13586_20140802_035824_inLine +BABEL_OP3_306_13586_20140802_035824_outLine +BABEL_OP3_306_13792_20140531_014010_inLine +BABEL_OP3_306_13792_20140531_014010_outLine +BABEL_OP3_306_14537_20150311_192951_inLine +BABEL_OP3_306_14537_20150311_192951_outLine +BABEL_OP3_306_15730_20140521_222017_inLine +BABEL_OP3_306_15730_20140521_222017_outLine +BABEL_OP3_306_15848_20140510_004027_inLine +BABEL_OP3_306_15848_20140510_004027_outLine +BABEL_OP3_306_18924_20140814_021546_inLine +BABEL_OP3_306_18924_20140814_021546_outLine +BABEL_OP3_306_20916_20140520_205947_inLine +BABEL_OP3_306_20916_20140520_205947_outLine +BABEL_OP3_306_21206_20140621_194701_inLine +BABEL_OP3_306_21206_20140621_194701_outLine +BABEL_OP3_306_22641_20150312_020316_inLine +BABEL_OP3_306_22641_20150312_020316_outLine +BABEL_OP3_306_23628_20140603_213715_inLine +BABEL_OP3_306_23628_20140603_213715_outLine +BABEL_OP3_306_26999_20140729_223316_inLine +BABEL_OP3_306_26999_20140729_223316_outLine +BABEL_OP3_306_28775_20140620_234019_inLine +BABEL_OP3_306_28775_20140620_234019_outLine +BABEL_OP3_306_29135_20140509_234939_inLine +BABEL_OP3_306_29135_20140509_234939_outLine +BABEL_OP3_306_29352_20150316_234927_inLine +BABEL_OP3_306_29352_20150316_234927_outLine +BABEL_OP3_306_30058_20141221_102805_inLine +BABEL_OP3_306_30058_20141221_102805_outLine +BABEL_OP3_306_30345_20141029_013617_inLine +BABEL_OP3_306_30345_20141029_013617_outLine +BABEL_OP3_306_31490_20150416_203824_inLine +BABEL_OP3_306_31490_20150416_203824_outLine +BABEL_OP3_306_32301_20140924_003519_inLine +BABEL_OP3_306_32301_20140924_003519_outLine +BABEL_OP3_306_32328_20141029_221831_inLine +BABEL_OP3_306_32328_20141029_221831_outLine +BABEL_OP3_306_33273_20141016_012203_inLine +BABEL_OP3_306_33273_20141016_012203_outLine +BABEL_OP3_306_34903_20140812_000146_inLine +BABEL_OP3_306_34903_20140812_000146_outLine +BABEL_OP3_306_35788_20150410_005320_inLine +BABEL_OP3_306_35788_20150410_005320_outLine +BABEL_OP3_306_36341_20140509_022205_inLine +BABEL_OP3_306_36341_20140509_022205_outLine +BABEL_OP3_306_36341_20140509_022936_inLine +BABEL_OP3_306_36341_20140509_022936_outLine +BABEL_OP3_306_37064_20140606_222758_inLine +BABEL_OP3_306_37064_20140606_222758_outLine +BABEL_OP3_306_38689_20141009_214007_inLine +BABEL_OP3_306_38689_20141009_214007_outLine +BABEL_OP3_306_39159_20140509_230506_inLine +BABEL_OP3_306_39159_20140509_230506_outLine +BABEL_OP3_306_39927_20150216_011520_inLine +BABEL_OP3_306_39927_20150216_011520_outLine +BABEL_OP3_306_41174_20140730_214115_inLine +BABEL_OP3_306_41174_20140730_214115_outLine +BABEL_OP3_306_41542_20141031_044512_inLine +BABEL_OP3_306_41542_20141031_044512_outLine +BABEL_OP3_306_42834_20140813_025421_inLine +BABEL_OP3_306_42834_20140813_025421_outLine +BABEL_OP3_306_42942_20141010_020223_inLine +BABEL_OP3_306_42942_20141010_020223_outLine +BABEL_OP3_306_43646_20140510_012702_inLine +BABEL_OP3_306_43646_20140510_012702_outLine +BABEL_OP3_306_46333_20150412_023828_inLine +BABEL_OP3_306_46333_20150412_023828_outLine +BABEL_OP3_306_47215_20140714_024322_inLine +BABEL_OP3_306_47215_20140714_024322_outLine +BABEL_OP3_306_48399_20140531_222338_inLine +BABEL_OP3_306_48399_20140531_222338_outLine +BABEL_OP3_306_49216_20140512_234713_inLine +BABEL_OP3_306_49216_20140512_234713_outLine +BABEL_OP3_306_51407_20140808_210301_inLine +BABEL_OP3_306_51407_20140808_210301_outLine +BABEL_OP3_306_51407_20140808_211334_inLine +BABEL_OP3_306_51407_20140808_211334_outLine +BABEL_OP3_306_51955_20140604_224650_inLine +BABEL_OP3_306_51955_20140604_224650_outLine +BABEL_OP3_306_52694_20140811_233144_inLine +BABEL_OP3_306_52694_20140811_233144_outLine +BABEL_OP3_306_53917_20141031_222826_inLine +BABEL_OP3_306_53917_20141031_222826_outLine +BABEL_OP3_306_56429_20140622_011257_inLine +BABEL_OP3_306_56429_20140622_011257_outLine +BABEL_OP3_306_56606_20150403_212810_inLine +BABEL_OP3_306_56606_20150403_212810_outLine +BABEL_OP3_306_56743_20140802_030717_inLine +BABEL_OP3_306_56743_20140802_030717_outLine +BABEL_OP3_306_57035_20150410_033837_inLine +BABEL_OP3_306_57035_20150410_033837_outLine +BABEL_OP3_306_57093_20140728_215709_inLine +BABEL_OP3_306_57093_20140728_215709_outLine +BABEL_OP3_306_57093_20140728_221243_inLine +BABEL_OP3_306_57093_20140728_221243_outLine +BABEL_OP3_306_57093_20140729_003342_inLine +BABEL_OP3_306_57093_20140729_003342_outLine +BABEL_OP3_306_57116_20140517_222852_inLine +BABEL_OP3_306_57116_20140517_222852_outLine +BABEL_OP3_306_59928_20140610_024019_inLine +BABEL_OP3_306_59928_20140610_024019_outLine +BABEL_OP3_306_60706_20140531_003048_inLine +BABEL_OP3_306_60706_20140531_003048_outLine +BABEL_OP3_306_61684_20150420_023032_inLine +BABEL_OP3_306_61684_20150420_023032_outLine +BABEL_OP3_306_62545_20140527_204602_inLine +BABEL_OP3_306_62545_20140527_204602_outLine +BABEL_OP3_306_62835_20140905_002934_inLine +BABEL_OP3_306_62835_20140905_002934_outLine +BABEL_OP3_306_63081_20140509_000544_inLine +BABEL_OP3_306_63081_20140509_000544_outLine +BABEL_OP3_306_63445_20140521_030723_inLine +BABEL_OP3_306_63445_20140521_030723_outLine +BABEL_OP3_306_63481_20140522_195610_inLine +BABEL_OP3_306_63481_20140522_195610_outLine +BABEL_OP3_306_64494_20140605_043852_inLine +BABEL_OP3_306_64494_20140605_043852_outLine +BABEL_OP3_306_66026_20141101_233612_inLine +BABEL_OP3_306_66026_20141101_233612_outLine +BABEL_OP3_306_67283_20140606_231809_inLine +BABEL_OP3_306_67283_20140606_231809_outLine +BABEL_OP3_306_69992_20150421_045903_inLine +BABEL_OP3_306_69992_20150421_045903_outLine +BABEL_OP3_306_70452_20140531_022425_inLine +BABEL_OP3_306_70452_20140531_022425_outLine +BABEL_OP3_306_72073_20150220_210400_inLine +BABEL_OP3_306_72073_20150220_210400_outLine +BABEL_OP3_306_73518_20141028_214326_inLine +BABEL_OP3_306_73518_20141028_214326_outLine +BABEL_OP3_306_73591_20140510_022335_inLine +BABEL_OP3_306_73591_20140510_022335_outLine +BABEL_OP3_306_73814_20140724_034710_inLine +BABEL_OP3_306_73814_20140724_034710_outLine +BABEL_OP3_306_75342_20141006_210132_inLine +BABEL_OP3_306_75342_20141006_210132_outLine +BABEL_OP3_306_75342_20141006_211900_inLine +BABEL_OP3_306_75342_20141006_211900_outLine +BABEL_OP3_306_76499_20140729_230952_inLine +BABEL_OP3_306_76499_20140729_230952_outLine +BABEL_OP3_306_78877_20140527_221925_inLine +BABEL_OP3_306_78877_20140527_221925_outLine +BABEL_OP3_306_79107_20150418_021409_inLine +BABEL_OP3_306_79107_20150418_021409_outLine +BABEL_OP3_306_84029_20150415_035216_inLine +BABEL_OP3_306_84029_20150415_035216_outLine +BABEL_OP3_306_84125_20140519_232101_inLine +BABEL_OP3_306_84125_20140519_232101_outLine +BABEL_OP3_306_84547_20140514_224528_inLine +BABEL_OP3_306_84547_20140514_224528_outLine +BABEL_OP3_306_87693_20140620_002643_inLine +BABEL_OP3_306_87693_20140620_002643_outLine +BABEL_OP3_306_88686_20150402_213711_inLine +BABEL_OP3_306_88686_20150402_213711_outLine +BABEL_OP3_306_88988_20150317_002311_inLine +BABEL_OP3_306_88988_20150317_002311_outLine +BABEL_OP3_306_90935_20140725_035705_inLine +BABEL_OP3_306_90935_20140725_035705_outLine +BABEL_OP3_306_92942_20140723_005927_inLine +BABEL_OP3_306_92942_20140723_005927_outLine +BABEL_OP3_306_93937_20150317_060204_inLine +BABEL_OP3_306_93937_20150317_060204_outLine +BABEL_OP3_306_94713_20140529_005611_inLine +BABEL_OP3_306_94713_20140529_005611_outLine +BABEL_OP3_306_95490_20140521_225751_inLine +BABEL_OP3_306_95490_20140521_225751_outLine +BABEL_OP3_306_95935_20141028_222645_inLine +BABEL_OP3_306_95935_20141028_222645_outLine +BABEL_OP3_306_96324_20140531_010613_inLine +BABEL_OP3_306_96324_20140531_010613_outLine +BABEL_OP3_306_96405_20140606_005741_inLine +BABEL_OP3_306_96405_20140606_005741_outLine +BABEL_OP3_306_96680_20140528_005805_inLine +BABEL_OP3_306_96680_20140528_005805_outLine +BABEL_OP3_306_96910_20140605_201948_inLine +BABEL_OP3_306_96910_20140605_201948_outLine +BABEL_OP3_306_96934_20140604_223915_inLine +BABEL_OP3_306_96934_20140604_223915_outLine +BABEL_OP3_306_98489_20140612_194947_inLine +BABEL_OP3_306_98489_20140612_194947_outLine +BABEL_OP3_306_98489_20140612_195637_inLine +BABEL_OP3_306_98489_20140612_195637_outLine +BABEL_OP3_306_99401_20140714_020007_inLine +BABEL_OP3_306_99401_20140714_020007_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/sub-train.list b/egs/babel/s5d/conf/lists/306-igbo/sub-train.list new file mode 100644 index 00000000000..f72794f4c94 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/sub-train.list @@ -0,0 +1,132 @@ +BABEL_OP3_306_10524_20150307_210859_inLine +BABEL_OP3_306_10524_20150307_210859_outLine +BABEL_OP3_306_14575_20140530_194144_inLine +BABEL_OP3_306_14575_20140530_194144_outLine +BABEL_OP3_306_15926_20140815_011013_inLine +BABEL_OP3_306_18490_20150402_010442_inLine +BABEL_OP3_306_18490_20150402_010442_outLine +BABEL_OP3_306_19444_20150214_045709_inLine +BABEL_OP3_306_19444_20150214_045709_outLine +BABEL_OP3_306_20721_20140529_000851_inLine +BABEL_OP3_306_20721_20140529_000851_outLine +BABEL_OP3_306_21581_20140724_022000_inLine +BABEL_OP3_306_21581_20140724_022000_outLine +BABEL_OP3_306_24037_20140524_232238_inLine +BABEL_OP3_306_24037_20140524_232238_outLine +BABEL_OP3_306_26074_20140815_015119_inLine +BABEL_OP3_306_26074_20140815_015119_outLine +BABEL_OP3_306_26478_20150317_053650_inLine +BABEL_OP3_306_26478_20150317_053650_outLine +BABEL_OP3_306_27218_20140625_013736_inLine +BABEL_OP3_306_28538_20140919_192901_inLine +BABEL_OP3_306_28538_20140919_192901_outLine +BABEL_OP3_306_28945_20140610_222125_inLine +BABEL_OP3_306_28945_20140610_222125_outLine +BABEL_OP3_306_31182_20141028_015316_inLine +BABEL_OP3_306_31182_20141028_015316_outLine +BABEL_OP3_306_31346_20141029_183248_inLine +BABEL_OP3_306_31346_20141029_183248_outLine +BABEL_OP3_306_33840_20141031_013533_inLine +BABEL_OP3_306_33840_20141031_013533_outLine +BABEL_OP3_306_33840_20141031_014151_inLine +BABEL_OP3_306_33840_20141031_014151_outLine +BABEL_OP3_306_36293_20140521_011821_inLine +BABEL_OP3_306_36293_20140521_011821_outLine +BABEL_OP3_306_40686_20140523_014206_inLine +BABEL_OP3_306_40686_20140523_014206_outLine +BABEL_OP3_306_44709_20140728_212605_inLine +BABEL_OP3_306_44709_20140728_212605_outLine +BABEL_OP3_306_48610_20140604_003825_inLine +BABEL_OP3_306_48610_20140604_003825_outLine +BABEL_OP3_306_49437_20141029_030600_inLine +BABEL_OP3_306_49437_20141029_030600_outLine +BABEL_OP3_306_50175_20150402_210041_inLine +BABEL_OP3_306_50175_20150402_210041_outLine +BABEL_OP3_306_50962_20140605_232213_inLine +BABEL_OP3_306_50962_20140605_232213_outLine +BABEL_OP3_306_55818_20140603_031605_inLine +BABEL_OP3_306_55818_20140603_031605_outLine +BABEL_OP3_306_55902_20150313_043244_inLine +BABEL_OP3_306_55902_20150313_043244_outLine +BABEL_OP3_306_55968_20140515_005800_inLine +BABEL_OP3_306_55968_20140515_005800_outLine +BABEL_OP3_306_56925_20150214_231609_inLine +BABEL_OP3_306_56925_20150214_231609_outLine +BABEL_OP3_306_59898_20150411_024935_inLine +BABEL_OP3_306_59898_20150411_024935_outLine +BABEL_OP3_306_62491_20140528_021234_inLine +BABEL_OP3_306_62491_20140528_021234_outLine +BABEL_OP3_306_62724_20141031_231843_inLine +BABEL_OP3_306_62724_20141031_231843_outLine +BABEL_OP3_306_63265_20150115_213217_inLine +BABEL_OP3_306_63265_20150115_213217_outLine +BABEL_OP3_306_63671_20150420_041005_inLine +BABEL_OP3_306_63671_20150420_041005_outLine +BABEL_OP3_306_66641_20150422_025109_inLine +BABEL_OP3_306_66641_20150422_025109_outLine +BABEL_OP3_306_67622_20140521_015356_inLine +BABEL_OP3_306_67622_20140521_015356_outLine +BABEL_OP3_306_70110_20140514_211101_inLine +BABEL_OP3_306_70110_20140514_211101_outLine +BABEL_OP3_306_70110_20140514_221144_inLine +BABEL_OP3_306_70110_20140514_221144_outLine +BABEL_OP3_306_72324_20140724_022916_inLine +BABEL_OP3_306_72324_20140724_022916_outLine +BABEL_OP3_306_72324_20140724_024048_inLine +BABEL_OP3_306_72324_20140724_024048_outLine +BABEL_OP3_306_73119_20140603_013443_inLine +BABEL_OP3_306_73119_20140603_013443_outLine +BABEL_OP3_306_74121_20140920_001224_inLine +BABEL_OP3_306_74121_20140920_001224_outLine +BABEL_OP3_306_74280_20140515_234933_inLine +BABEL_OP3_306_74280_20140515_234933_outLine +BABEL_OP3_306_78398_20140604_220522_inLine +BABEL_OP3_306_78398_20140604_220522_outLine +BABEL_OP3_306_78511_20141030_232402_inLine +BABEL_OP3_306_78511_20141030_232402_outLine +BABEL_OP3_306_80306_20140729_235651_inLine +BABEL_OP3_306_80306_20140729_235651_outLine +BABEL_OP3_306_81287_20141009_184932_inLine +BABEL_OP3_306_81287_20141009_184932_outLine +BABEL_OP3_306_82035_20140812_211933_inLine +BABEL_OP3_306_82035_20140812_211933_outLine +BABEL_OP3_306_82935_20141027_220108_inLine +BABEL_OP3_306_82935_20141027_220108_outLine +BABEL_OP3_306_82935_20141027_221034_inLine +BABEL_OP3_306_82935_20141027_221034_outLine +BABEL_OP3_306_83651_20140606_023153_inLine +BABEL_OP3_306_83651_20140606_023153_outLine +BABEL_OP3_306_84768_20150416_212057_inLine +BABEL_OP3_306_84768_20150416_212057_outLine +BABEL_OP3_306_85028_20141029_200629_inLine +BABEL_OP3_306_85028_20141029_200629_outLine +BABEL_OP3_306_85647_20140805_005301_inLine +BABEL_OP3_306_85647_20140805_005301_outLine +BABEL_OP3_306_86888_20140801_232454_inLine +BABEL_OP3_306_86888_20140801_232454_outLine +BABEL_OP3_306_89358_20141003_194649_inLine +BABEL_OP3_306_89358_20141003_194649_outLine +BABEL_OP3_306_90737_20140903_235501_inLine +BABEL_OP3_306_90737_20140903_235501_outLine +BABEL_OP3_306_91266_20150215_015545_inLine +BABEL_OP3_306_91266_20150215_015545_outLine +BABEL_OP3_306_91266_20150215_022001_inLine +BABEL_OP3_306_91266_20150215_022001_outLine +BABEL_OP3_306_92941_20140607_001711_inLine +BABEL_OP3_306_92941_20140607_001711_outLine +BABEL_OP3_306_92941_20140607_003034_inLine +BABEL_OP3_306_92941_20140607_003034_outLine +BABEL_OP3_306_93632_20141103_184555_inLine +BABEL_OP3_306_93632_20141103_184555_outLine +BABEL_OP3_306_93946_20141101_211743_inLine +BABEL_OP3_306_93946_20141101_211743_outLine +BABEL_OP3_306_93964_20140730_022556_inLine +BABEL_OP3_306_93964_20140730_022556_outLine +BABEL_OP3_306_94409_20141006_205245_inLine +BABEL_OP3_306_94409_20141006_205245_outLine +BABEL_OP3_306_95399_20140905_005504_inLine +BABEL_OP3_306_95399_20140905_005504_outLine +BABEL_OP3_306_97588_20140521_051503_inLine +BABEL_OP3_306_97588_20140521_051503_outLine +BABEL_OP3_306_99344_20140801_002154_inLine +BABEL_OP3_306_99344_20140801_002154_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list new file mode 100644 index 00000000000..7ca400d26e5 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP3_306_10188_20140511_001332_inLine +BABEL_OP3_306_10188_20140511_001332_outLine +BABEL_OP3_306_10313_20140523_024428_inLine +BABEL_OP3_306_10313_20140523_024428_outLine +BABEL_OP3_306_10319_20140522_015112_inLine +BABEL_OP3_306_10319_20140522_015112_outLine +BABEL_OP3_306_10416_20140802_195508_inLine +BABEL_OP3_306_10416_20140802_195508_outLine +BABEL_OP3_306_10974_20140805_011808_inLine +BABEL_OP3_306_10974_20140805_011808_outLine +BABEL_OP3_306_12036_20140604_193658_inLine +BABEL_OP3_306_12036_20140604_193658_outLine +BABEL_OP3_306_12242_20140601_233200_inLine +BABEL_OP3_306_12242_20140601_233200_outLine +BABEL_OP3_306_13324_20140625_222242_inLine +BABEL_OP3_306_13324_20140625_223418_inLine +BABEL_OP3_306_13561_20140802_043219_inLine +BABEL_OP3_306_13561_20140802_043219_outLine +BABEL_OP3_306_14141_20141223_040734_inLine +BABEL_OP3_306_14141_20141223_040734_outLine +BABEL_OP3_306_14229_20150304_204617_inLine +BABEL_OP3_306_14237_20140531_215051_inLine +BABEL_OP3_306_14237_20140531_215051_outLine +BABEL_OP3_306_14814_20140602_011013_inLine +BABEL_OP3_306_14814_20140602_011013_outLine +BABEL_OP3_306_15024_20140904_235714_inLine +BABEL_OP3_306_15024_20140904_235714_outLine +BABEL_OP3_306_15163_20141006_024649_inLine +BABEL_OP3_306_15163_20141006_024649_outLine +BABEL_OP3_306_15382_20140730_010226_inLine +BABEL_OP3_306_15382_20140730_010226_outLine +BABEL_OP3_306_16184_20140519_222131_inLine +BABEL_OP3_306_16184_20140519_222131_outLine +BABEL_OP3_306_16351_20140524_195830_inLine +BABEL_OP3_306_16351_20140524_195830_outLine +BABEL_OP3_306_16787_20140802_223754_inLine +BABEL_OP3_306_16787_20140802_223754_outLine +BABEL_OP3_306_16839_20141030_003721_inLine +BABEL_OP3_306_16839_20141030_003721_outLine +BABEL_OP3_306_16938_20140809_233743_inLine +BABEL_OP3_306_17472_20150318_193931_inLine +BABEL_OP3_306_17472_20150318_193931_outLine +BABEL_OP3_306_17511_20150116_221327_inLine +BABEL_OP3_306_17511_20150116_221327_outLine +BABEL_OP3_306_17881_20150304_004415_inLine +BABEL_OP3_306_17881_20150304_004415_outLine +BABEL_OP3_306_18280_20150223_175908_inLine +BABEL_OP3_306_18280_20150223_175908_outLine +BABEL_OP3_306_18370_20150223_190452_inLine +BABEL_OP3_306_18370_20150223_190452_outLine +BABEL_OP3_306_18863_20141103_232200_inLine +BABEL_OP3_306_18863_20141103_232200_outLine +BABEL_OP3_306_19767_20150317_173511_inLine +BABEL_OP3_306_19767_20150317_173511_outLine +BABEL_OP3_306_21244_20150303_021843_inLine +BABEL_OP3_306_21244_20150303_021843_outLine +BABEL_OP3_306_21892_20141031_004104_inLine +BABEL_OP3_306_21892_20141031_004104_outLine +BABEL_OP3_306_22021_20150421_200500_inLine +BABEL_OP3_306_22021_20150421_200500_outLine +BABEL_OP3_306_22494_20141004_000311_inLine +BABEL_OP3_306_22494_20141004_000311_outLine +BABEL_OP3_306_22643_20140526_192640_inLine +BABEL_OP3_306_22643_20140526_192640_outLine +BABEL_OP3_306_23355_20150306_040413_inLine +BABEL_OP3_306_23355_20150306_040413_outLine +BABEL_OP3_306_23395_20140815_012335_inLine +BABEL_OP3_306_23395_20140815_012335_outLine +BABEL_OP3_306_24270_20141009_010150_inLine +BABEL_OP3_306_24270_20141009_010150_outLine +BABEL_OP3_306_24679_20140521_043344_inLine +BABEL_OP3_306_24679_20140521_043344_outLine +BABEL_OP3_306_25767_20140603_022935_inLine +BABEL_OP3_306_25767_20140603_022935_outLine +BABEL_OP3_306_26388_20140605_212825_inLine +BABEL_OP3_306_26388_20140605_212825_outLine +BABEL_OP3_306_26574_20141028_193409_inLine +BABEL_OP3_306_26574_20141028_193409_outLine +BABEL_OP3_306_26836_20140606_012758_inLine +BABEL_OP3_306_26836_20140606_012758_outLine +BABEL_OP3_306_26869_20150311_010234_inLine +BABEL_OP3_306_26869_20150311_010234_outLine +BABEL_OP3_306_27014_20140525_005218_inLine +BABEL_OP3_306_27014_20140525_005218_outLine +BABEL_OP3_306_27367_20140524_212214_inLine +BABEL_OP3_306_27367_20140524_212214_outLine +BABEL_OP3_306_30250_20140520_201955_inLine +BABEL_OP3_306_30250_20140520_201955_outLine +BABEL_OP3_306_30395_20140620_010240_inLine +BABEL_OP3_306_30395_20140620_010240_outLine +BABEL_OP3_306_30395_20140620_011044_inLine +BABEL_OP3_306_30395_20140620_011044_outLine +BABEL_OP3_306_31074_20150120_001644_inLine +BABEL_OP3_306_31074_20150120_001644_outLine +BABEL_OP3_306_31184_20141006_222942_inLine +BABEL_OP3_306_31184_20141006_222942_outLine +BABEL_OP3_306_31992_20140714_213448_inLine +BABEL_OP3_306_31992_20140714_213448_outLine +BABEL_OP3_306_32169_20150311_001538_inLine +BABEL_OP3_306_32169_20150311_001538_outLine +BABEL_OP3_306_32832_20141027_221739_inLine +BABEL_OP3_306_32832_20141027_221739_outLine +BABEL_OP3_306_33251_20140725_025307_inLine +BABEL_OP3_306_33251_20140725_025307_outLine +BABEL_OP3_306_33476_20140730_232844_inLine +BABEL_OP3_306_33476_20140730_232844_outLine +BABEL_OP3_306_33951_20140725_061646_inLine +BABEL_OP3_306_33951_20140725_061646_outLine +BABEL_OP3_306_34564_20141026_225715_inLine +BABEL_OP3_306_34564_20141026_225715_outLine +BABEL_OP3_306_34564_20141026_230434_inLine +BABEL_OP3_306_34564_20141026_230434_outLine +BABEL_OP3_306_36059_20141223_034056_inLine +BABEL_OP3_306_36059_20141223_034056_outLine +BABEL_OP3_306_36147_20150215_051814_inLine +BABEL_OP3_306_36147_20150215_051814_outLine +BABEL_OP3_306_36364_20150123_012425_inLine +BABEL_OP3_306_36364_20150123_012425_outLine +BABEL_OP3_306_36505_20141027_211503_inLine +BABEL_OP3_306_36505_20141027_211503_outLine +BABEL_OP3_306_37007_20140527_013428_inLine +BABEL_OP3_306_37007_20140527_013428_outLine +BABEL_OP3_306_38323_20150418_203354_inLine +BABEL_OP3_306_38323_20150418_203354_outLine +BABEL_OP3_306_38554_20140517_054801_inLine +BABEL_OP3_306_38554_20140517_054801_outLine +BABEL_OP3_306_38554_20140517_055631_inLine +BABEL_OP3_306_38554_20140517_055631_outLine +BABEL_OP3_306_39555_20141030_012732_inLine +BABEL_OP3_306_39555_20141030_012732_outLine +BABEL_OP3_306_40330_20150418_213611_inLine +BABEL_OP3_306_40330_20150418_213611_outLine +BABEL_OP3_306_40713_20140605_205025_inLine +BABEL_OP3_306_40713_20140605_205025_outLine +BABEL_OP3_306_41233_20141029_235039_inLine +BABEL_OP3_306_41233_20141029_235039_outLine +BABEL_OP3_306_41233_20141030_004714_inLine +BABEL_OP3_306_41233_20141030_004714_outLine +BABEL_OP3_306_41469_20150405_025457_inLine +BABEL_OP3_306_41469_20150405_025457_outLine +BABEL_OP3_306_41592_20140731_180118_inLine +BABEL_OP3_306_41592_20140731_180118_outLine +BABEL_OP3_306_41920_20140531_032613_inLine +BABEL_OP3_306_41920_20140531_032613_outLine +BABEL_OP3_306_42126_20140528_024621_inLine +BABEL_OP3_306_42126_20140528_024621_outLine +BABEL_OP3_306_42231_20141009_191123_inLine +BABEL_OP3_306_42231_20141009_191123_outLine +BABEL_OP3_306_43286_20140522_203724_inLine +BABEL_OP3_306_43286_20140522_203724_outLine +BABEL_OP3_306_43388_20140802_221518_inLine +BABEL_OP3_306_43388_20140802_221518_outLine +BABEL_OP3_306_43388_20140802_222040_inLine +BABEL_OP3_306_43388_20140802_222040_outLine +BABEL_OP3_306_43388_20140802_222715_inLine +BABEL_OP3_306_43388_20140802_222715_outLine +BABEL_OP3_306_43784_20140608_022047_inLine +BABEL_OP3_306_43784_20140608_022047_outLine +BABEL_OP3_306_46066_20141027_233339_inLine +BABEL_OP3_306_46066_20141027_233339_outLine +BABEL_OP3_306_46310_20140602_230134_inLine +BABEL_OP3_306_46310_20140602_230134_outLine +BABEL_OP3_306_46550_20140605_222807_inLine +BABEL_OP3_306_46550_20140605_222807_outLine +BABEL_OP3_306_46625_20140606_202920_inLine +BABEL_OP3_306_46625_20140606_202920_outLine +BABEL_OP3_306_46757_20140920_030716_inLine +BABEL_OP3_306_46757_20140920_030716_outLine +BABEL_OP3_306_46905_20140528_215718_inLine +BABEL_OP3_306_46905_20140528_215718_outLine +BABEL_OP3_306_47923_20150131_000157_inLine +BABEL_OP3_306_47923_20150131_000157_outLine +BABEL_OP3_306_49502_20150403_222234_inLine +BABEL_OP3_306_49502_20150403_222234_outLine +BABEL_OP3_306_50565_20140521_040110_inLine +BABEL_OP3_306_50565_20140521_040110_outLine +BABEL_OP3_306_51156_20150116_191446_inLine +BABEL_OP3_306_51156_20150116_191446_outLine +BABEL_OP3_306_52058_20140526_231450_inLine +BABEL_OP3_306_52058_20140526_231450_outLine +BABEL_OP3_306_52265_20150320_030911_inLine +BABEL_OP3_306_52265_20150320_030911_outLine +BABEL_OP3_306_52932_20140608_003800_inLine +BABEL_OP3_306_52932_20140608_003800_outLine +BABEL_OP3_306_53206_20140523_191711_inLine +BABEL_OP3_306_53206_20140523_191711_outLine +BABEL_OP3_306_53758_20150227_224132_inLine +BABEL_OP3_306_53758_20150227_224132_outLine +BABEL_OP3_306_54160_20140602_201949_inLine +BABEL_OP3_306_54160_20140602_201949_outLine +BABEL_OP3_306_54594_20140528_232952_inLine +BABEL_OP3_306_54594_20140528_232952_outLine +BABEL_OP3_306_54697_20141027_014534_inLine +BABEL_OP3_306_54697_20141027_014534_outLine +BABEL_OP3_306_54697_20141027_015651_inLine +BABEL_OP3_306_54697_20141027_015651_outLine +BABEL_OP3_306_56306_20141111_210052_inLine +BABEL_OP3_306_56306_20141111_210052_outLine +BABEL_OP3_306_56523_20140729_211409_inLine +BABEL_OP3_306_56523_20140729_211409_outLine +BABEL_OP3_306_56826_20141005_005430_inLine +BABEL_OP3_306_56826_20141005_005430_outLine +BABEL_OP3_306_57065_20140813_021110_inLine +BABEL_OP3_306_57065_20140813_021110_outLine +BABEL_OP3_306_57654_20140622_013309_inLine +BABEL_OP3_306_57654_20140622_013309_outLine +BABEL_OP3_306_58145_20140724_045437_inLine +BABEL_OP3_306_58145_20140724_045437_outLine +BABEL_OP3_306_58489_20141026_005336_inLine +BABEL_OP3_306_58489_20141026_005336_outLine +BABEL_OP3_306_59078_20141009_004020_inLine +BABEL_OP3_306_59078_20141009_004020_outLine +BABEL_OP3_306_59509_20140805_224009_inLine +BABEL_OP3_306_59509_20140805_224009_outLine +BABEL_OP3_306_59509_20140805_224625_inLine +BABEL_OP3_306_59509_20140805_224625_outLine +BABEL_OP3_306_60310_20141004_230555_inLine +BABEL_OP3_306_60310_20141004_230555_outLine +BABEL_OP3_306_60352_20140806_021626_inLine +BABEL_OP3_306_60352_20140806_021626_outLine +BABEL_OP3_306_61219_20140603_003614_inLine +BABEL_OP3_306_61219_20140603_003614_outLine +BABEL_OP3_306_61225_20140515_013438_inLine +BABEL_OP3_306_61225_20140515_013438_outLine +BABEL_OP3_306_61435_20141029_014344_inLine +BABEL_OP3_306_61435_20141029_014344_outLine +BABEL_OP3_306_61438_20140527_213221_inLine +BABEL_OP3_306_61438_20140527_213221_outLine +BABEL_OP3_306_61888_20141104_013244_inLine +BABEL_OP3_306_61888_20141104_013244_outLine +BABEL_OP3_306_63220_20140811_231222_inLine +BABEL_OP3_306_63220_20140811_231222_outLine +BABEL_OP3_306_63766_20150226_043203_inLine +BABEL_OP3_306_63766_20150226_043203_outLine +BABEL_OP3_306_64350_20140621_190438_inLine +BABEL_OP3_306_64350_20140621_190438_outLine +BABEL_OP3_306_64398_20140731_012313_inLine +BABEL_OP3_306_64398_20140731_012313_outLine +BABEL_OP3_306_64796_20150407_014947_inLine +BABEL_OP3_306_64796_20150407_014947_outLine +BABEL_OP3_306_65723_20140604_235722_inLine +BABEL_OP3_306_65723_20140604_235722_outLine +BABEL_OP3_306_66350_20150225_020558_inLine +BABEL_OP3_306_66350_20150225_020558_outLine +BABEL_OP3_306_66916_20140522_002931_inLine +BABEL_OP3_306_66916_20140522_002931_outLine +BABEL_OP3_306_66967_20140606_235110_inLine +BABEL_OP3_306_66967_20140606_235110_outLine +BABEL_OP3_306_67373_20140624_224214_inLine +BABEL_OP3_306_67373_20140624_224214_outLine +BABEL_OP3_306_67373_20140624_225314_inLine +BABEL_OP3_306_67373_20140624_225314_outLine +BABEL_OP3_306_67401_20140815_032242_inLine +BABEL_OP3_306_67401_20140815_032242_outLine +BABEL_OP3_306_67592_20141028_032006_inLine +BABEL_OP3_306_67592_20141028_032006_outLine +BABEL_OP3_306_67726_20140523_010156_inLine +BABEL_OP3_306_67726_20140523_010156_outLine +BABEL_OP3_306_67999_20141031_215535_inLine +BABEL_OP3_306_67999_20141031_215535_outLine +BABEL_OP3_306_68059_20140802_023600_inLine +BABEL_OP3_306_68059_20140802_023600_outLine +BABEL_OP3_306_68068_20141004_180553_inLine +BABEL_OP3_306_68068_20141004_180553_outLine +BABEL_OP3_306_69578_20140729_234354_inLine +BABEL_OP3_306_69578_20140729_234354_outLine +BABEL_OP3_306_70221_20140801_213304_inLine +BABEL_OP3_306_70221_20140801_213304_outLine +BABEL_OP3_306_70293_20150216_013540_inLine +BABEL_OP3_306_70293_20150216_013540_outLine +BABEL_OP3_306_70639_20140528_035113_inLine +BABEL_OP3_306_70639_20140528_035113_outLine +BABEL_OP3_306_70794_20140520_000549_inLine +BABEL_OP3_306_70794_20140520_000549_outLine +BABEL_OP3_306_71401_20150221_015039_inLine +BABEL_OP3_306_71401_20150221_015039_outLine +BABEL_OP3_306_71976_20140529_033557_inLine +BABEL_OP3_306_71976_20140529_033557_outLine +BABEL_OP3_306_72587_20140811_040036_inLine +BABEL_OP3_306_72587_20140811_040036_outLine +BABEL_OP3_306_72844_20140518_010610_inLine +BABEL_OP3_306_72844_20140518_010610_outLine +BABEL_OP3_306_73511_20141005_215627_inLine +BABEL_OP3_306_73511_20141005_215627_outLine +BABEL_OP3_306_75505_20140512_201003_inLine +BABEL_OP3_306_75505_20140512_201003_outLine +BABEL_OP3_306_77146_20140521_002843_inLine +BABEL_OP3_306_77146_20140521_002843_outLine +BABEL_OP3_306_77225_20150327_022842_inLine +BABEL_OP3_306_77225_20150327_022842_outLine +BABEL_OP3_306_78194_20140611_232911_inLine +BABEL_OP3_306_78194_20140611_232911_outLine +BABEL_OP3_306_78254_20140602_034556_inLine +BABEL_OP3_306_78254_20140602_034556_outLine +BABEL_OP3_306_78604_20140625_020654_inLine +BABEL_OP3_306_78604_20140625_020654_outLine +BABEL_OP3_306_79139_20140725_061931_inLine +BABEL_OP3_306_79139_20140725_061931_outLine +BABEL_OP3_306_80383_20150325_015939_inLine +BABEL_OP3_306_80383_20150325_015939_outLine +BABEL_OP3_306_80781_20140729_014618_inLine +BABEL_OP3_306_80781_20140729_014618_outLine +BABEL_OP3_306_81427_20140723_232926_inLine +BABEL_OP3_306_81427_20140723_232926_outLine +BABEL_OP3_306_81581_20140529_022004_inLine +BABEL_OP3_306_81581_20140529_022004_outLine +BABEL_OP3_306_82303_20150313_044844_inLine +BABEL_OP3_306_82303_20150313_044844_outLine +BABEL_OP3_306_82637_20140514_233142_inLine +BABEL_OP3_306_82637_20140514_233142_outLine +BABEL_OP3_306_84430_20150331_191720_inLine +BABEL_OP3_306_84430_20150331_191720_outLine +BABEL_OP3_306_84609_20150401_222657_inLine +BABEL_OP3_306_84609_20150401_222657_outLine +BABEL_OP3_306_84611_20140605_003243_inLine +BABEL_OP3_306_84611_20140605_003243_outLine +BABEL_OP3_306_84815_20141101_002538_inLine +BABEL_OP3_306_84815_20141101_002538_outLine +BABEL_OP3_306_86191_20140603_042134_inLine +BABEL_OP3_306_86191_20140603_042134_outLine +BABEL_OP3_306_86433_20140816_072513_inLine +BABEL_OP3_306_86433_20140816_072513_outLine +BABEL_OP3_306_86472_20140730_223950_inLine +BABEL_OP3_306_86472_20140730_223950_outLine +BABEL_OP3_306_86845_20140524_192542_inLine +BABEL_OP3_306_86845_20140524_192542_outLine +BABEL_OP3_306_86952_20140531_040557_inLine +BABEL_OP3_306_86952_20140531_040557_outLine +BABEL_OP3_306_87179_20141029_021040_inLine +BABEL_OP3_306_87179_20141029_021040_outLine +BABEL_OP3_306_87353_20150327_191436_inLine +BABEL_OP3_306_87353_20150327_191436_outLine +BABEL_OP3_306_87884_20141101_223809_inLine +BABEL_OP3_306_87884_20141101_223809_outLine +BABEL_OP3_306_88601_20141003_171755_inLine +BABEL_OP3_306_88601_20141003_171755_outLine +BABEL_OP3_306_88661_20141005_225341_inLine +BABEL_OP3_306_88661_20141005_225341_outLine +BABEL_OP3_306_89045_20140517_213454_inLine +BABEL_OP3_306_89045_20140517_213454_outLine +BABEL_OP3_306_89059_20141104_210223_inLine +BABEL_OP3_306_89059_20141104_210223_outLine +BABEL_OP3_306_89059_20141104_211433_inLine +BABEL_OP3_306_89059_20141104_211433_outLine +BABEL_OP3_306_89457_20140730_002520_inLine +BABEL_OP3_306_89457_20140730_002520_outLine +BABEL_OP3_306_90440_20150312_002806_inLine +BABEL_OP3_306_90440_20150312_002806_outLine +BABEL_OP3_306_91463_20140729_001809_inLine +BABEL_OP3_306_91463_20140729_001809_outLine +BABEL_OP3_306_91825_20150408_204309_inLine +BABEL_OP3_306_91825_20150408_204309_outLine +BABEL_OP3_306_91891_20141009_203853_inLine +BABEL_OP3_306_91891_20141009_203853_outLine +BABEL_OP3_306_92440_20150326_232645_inLine +BABEL_OP3_306_92440_20150326_232645_outLine +BABEL_OP3_306_92509_20140521_023136_inLine +BABEL_OP3_306_92509_20140521_023136_outLine +BABEL_OP3_306_92809_20150419_011906_inLine +BABEL_OP3_306_92809_20150419_011906_outLine +BABEL_OP3_306_94253_20140606_032103_inLine +BABEL_OP3_306_94253_20140606_032103_outLine +BABEL_OP3_306_94869_20140515_230712_inLine +BABEL_OP3_306_94869_20140515_230712_outLine +BABEL_OP3_306_94978_20141115_234420_inLine +BABEL_OP3_306_94978_20141115_234420_outLine +BABEL_OP3_306_95124_20150416_012109_inLine +BABEL_OP3_306_95124_20150416_012109_outLine +BABEL_OP3_306_95598_20140509_043406_inLine +BABEL_OP3_306_95598_20140509_043406_outLine +BABEL_OP3_306_96730_20141028_230035_inLine +BABEL_OP3_306_96730_20141028_230035_outLine +BABEL_OP3_306_96820_20140802_051525_inLine +BABEL_OP3_306_96820_20140802_051525_outLine +BABEL_OP3_306_97570_20140801_224422_inLine +BABEL_OP3_306_97570_20140801_224422_outLine +BABEL_OP3_306_98311_20140604_201838_inLine +BABEL_OP3_306_98311_20140604_201838_outLine +BABEL_OP3_306_99920_20140604_212052_inLine +BABEL_OP3_306_99920_20140604_212052_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/training.list b/egs/babel/s5d/conf/lists/306-igbo/training.list new file mode 100644 index 00000000000..0504de58fb1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/training.list @@ -0,0 +1,512 @@ +BABEL_OP3_306_10188_20140511_001332_inLine +BABEL_OP3_306_10188_20140511_001332_outLine +BABEL_OP3_306_10313_20140523_024428_inLine +BABEL_OP3_306_10313_20140523_024428_outLine +BABEL_OP3_306_10319_20140522_015112_inLine +BABEL_OP3_306_10319_20140522_015112_outLine +BABEL_OP3_306_10416_20140802_195508_inLine +BABEL_OP3_306_10416_20140802_195508_outLine +BABEL_OP3_306_10524_20150307_210859_inLine +BABEL_OP3_306_10524_20150307_210859_outLine +BABEL_OP3_306_10974_20140805_011808_inLine +BABEL_OP3_306_10974_20140805_011808_outLine +BABEL_OP3_306_12036_20140604_193658_inLine +BABEL_OP3_306_12036_20140604_193658_outLine +BABEL_OP3_306_12242_20140601_233200_inLine +BABEL_OP3_306_12242_20140601_233200_outLine +BABEL_OP3_306_13324_20140625_222242_inLine +BABEL_OP3_306_13324_20140625_223418_inLine +BABEL_OP3_306_13561_20140802_043219_inLine +BABEL_OP3_306_13561_20140802_043219_outLine +BABEL_OP3_306_14141_20141223_040734_inLine +BABEL_OP3_306_14141_20141223_040734_outLine +BABEL_OP3_306_14229_20150304_204617_inLine +BABEL_OP3_306_14237_20140531_215051_inLine +BABEL_OP3_306_14237_20140531_215051_outLine +BABEL_OP3_306_14575_20140530_194144_inLine +BABEL_OP3_306_14575_20140530_194144_outLine +BABEL_OP3_306_14814_20140602_011013_inLine +BABEL_OP3_306_14814_20140602_011013_outLine +BABEL_OP3_306_15024_20140904_235714_inLine +BABEL_OP3_306_15024_20140904_235714_outLine +BABEL_OP3_306_15163_20141006_024649_inLine +BABEL_OP3_306_15163_20141006_024649_outLine +BABEL_OP3_306_15382_20140730_010226_inLine +BABEL_OP3_306_15382_20140730_010226_outLine +BABEL_OP3_306_15926_20140815_011013_inLine +BABEL_OP3_306_16184_20140519_222131_inLine +BABEL_OP3_306_16184_20140519_222131_outLine +BABEL_OP3_306_16351_20140524_195830_inLine +BABEL_OP3_306_16351_20140524_195830_outLine +BABEL_OP3_306_16787_20140802_223754_inLine +BABEL_OP3_306_16787_20140802_223754_outLine +BABEL_OP3_306_16839_20141030_003721_inLine +BABEL_OP3_306_16839_20141030_003721_outLine +BABEL_OP3_306_16938_20140809_233743_inLine +BABEL_OP3_306_17472_20150318_193931_inLine +BABEL_OP3_306_17472_20150318_193931_outLine +BABEL_OP3_306_17511_20150116_221327_inLine +BABEL_OP3_306_17511_20150116_221327_outLine +BABEL_OP3_306_17881_20150304_004415_inLine +BABEL_OP3_306_17881_20150304_004415_outLine +BABEL_OP3_306_18280_20150223_175908_inLine +BABEL_OP3_306_18280_20150223_175908_outLine +BABEL_OP3_306_18370_20150223_190452_inLine +BABEL_OP3_306_18370_20150223_190452_outLine +BABEL_OP3_306_18490_20150402_010442_inLine +BABEL_OP3_306_18490_20150402_010442_outLine +BABEL_OP3_306_18863_20141103_232200_inLine +BABEL_OP3_306_18863_20141103_232200_outLine +BABEL_OP3_306_19444_20150214_045709_inLine +BABEL_OP3_306_19444_20150214_045709_outLine +BABEL_OP3_306_19767_20150317_173511_inLine +BABEL_OP3_306_19767_20150317_173511_outLine +BABEL_OP3_306_20721_20140529_000851_inLine +BABEL_OP3_306_20721_20140529_000851_outLine +BABEL_OP3_306_21244_20150303_021843_inLine +BABEL_OP3_306_21244_20150303_021843_outLine +BABEL_OP3_306_21581_20140724_022000_inLine +BABEL_OP3_306_21581_20140724_022000_outLine +BABEL_OP3_306_21892_20141031_004104_inLine +BABEL_OP3_306_21892_20141031_004104_outLine +BABEL_OP3_306_22021_20150421_200500_inLine +BABEL_OP3_306_22021_20150421_200500_outLine +BABEL_OP3_306_22494_20141004_000311_inLine +BABEL_OP3_306_22494_20141004_000311_outLine +BABEL_OP3_306_22643_20140526_192640_inLine +BABEL_OP3_306_22643_20140526_192640_outLine +BABEL_OP3_306_23355_20150306_040413_inLine +BABEL_OP3_306_23355_20150306_040413_outLine +BABEL_OP3_306_23395_20140815_012335_inLine +BABEL_OP3_306_23395_20140815_012335_outLine +BABEL_OP3_306_24037_20140524_232238_inLine +BABEL_OP3_306_24037_20140524_232238_outLine +BABEL_OP3_306_24270_20141009_010150_inLine +BABEL_OP3_306_24270_20141009_010150_outLine +BABEL_OP3_306_24679_20140521_043344_inLine +BABEL_OP3_306_24679_20140521_043344_outLine +BABEL_OP3_306_25767_20140603_022935_inLine +BABEL_OP3_306_25767_20140603_022935_outLine +BABEL_OP3_306_26074_20140815_015119_inLine +BABEL_OP3_306_26074_20140815_015119_outLine +BABEL_OP3_306_26388_20140605_212825_inLine +BABEL_OP3_306_26388_20140605_212825_outLine +BABEL_OP3_306_26478_20150317_053650_inLine +BABEL_OP3_306_26478_20150317_053650_outLine +BABEL_OP3_306_26574_20141028_193409_inLine +BABEL_OP3_306_26574_20141028_193409_outLine +BABEL_OP3_306_26836_20140606_012758_inLine +BABEL_OP3_306_26836_20140606_012758_outLine +BABEL_OP3_306_26869_20150311_010234_inLine +BABEL_OP3_306_26869_20150311_010234_outLine +BABEL_OP3_306_27014_20140525_005218_inLine +BABEL_OP3_306_27014_20140525_005218_outLine +BABEL_OP3_306_27218_20140625_013736_inLine +BABEL_OP3_306_27367_20140524_212214_inLine +BABEL_OP3_306_27367_20140524_212214_outLine +BABEL_OP3_306_28538_20140919_192901_inLine +BABEL_OP3_306_28538_20140919_192901_outLine +BABEL_OP3_306_28945_20140610_222125_inLine +BABEL_OP3_306_28945_20140610_222125_outLine +BABEL_OP3_306_30250_20140520_201955_inLine +BABEL_OP3_306_30250_20140520_201955_outLine +BABEL_OP3_306_30395_20140620_010240_inLine +BABEL_OP3_306_30395_20140620_010240_outLine +BABEL_OP3_306_30395_20140620_011044_inLine +BABEL_OP3_306_30395_20140620_011044_outLine +BABEL_OP3_306_31074_20150120_001644_inLine +BABEL_OP3_306_31074_20150120_001644_outLine +BABEL_OP3_306_31182_20141028_015316_inLine +BABEL_OP3_306_31182_20141028_015316_outLine +BABEL_OP3_306_31184_20141006_222942_inLine +BABEL_OP3_306_31184_20141006_222942_outLine +BABEL_OP3_306_31346_20141029_183248_inLine +BABEL_OP3_306_31346_20141029_183248_outLine +BABEL_OP3_306_31992_20140714_213448_inLine +BABEL_OP3_306_31992_20140714_213448_outLine +BABEL_OP3_306_32169_20150311_001538_inLine +BABEL_OP3_306_32169_20150311_001538_outLine +BABEL_OP3_306_32832_20141027_221739_inLine +BABEL_OP3_306_32832_20141027_221739_outLine +BABEL_OP3_306_33251_20140725_025307_inLine +BABEL_OP3_306_33251_20140725_025307_outLine +BABEL_OP3_306_33476_20140730_232844_inLine +BABEL_OP3_306_33476_20140730_232844_outLine +BABEL_OP3_306_33840_20141031_013533_inLine +BABEL_OP3_306_33840_20141031_013533_outLine +BABEL_OP3_306_33840_20141031_014151_inLine +BABEL_OP3_306_33840_20141031_014151_outLine +BABEL_OP3_306_33951_20140725_061646_inLine +BABEL_OP3_306_33951_20140725_061646_outLine +BABEL_OP3_306_34564_20141026_225715_inLine +BABEL_OP3_306_34564_20141026_225715_outLine +BABEL_OP3_306_34564_20141026_230434_inLine +BABEL_OP3_306_34564_20141026_230434_outLine +BABEL_OP3_306_36059_20141223_034056_inLine +BABEL_OP3_306_36059_20141223_034056_outLine +BABEL_OP3_306_36147_20150215_051814_inLine +BABEL_OP3_306_36147_20150215_051814_outLine +BABEL_OP3_306_36293_20140521_011821_inLine +BABEL_OP3_306_36293_20140521_011821_outLine +BABEL_OP3_306_36364_20150123_012425_inLine +BABEL_OP3_306_36364_20150123_012425_outLine +BABEL_OP3_306_36505_20141027_211503_inLine +BABEL_OP3_306_36505_20141027_211503_outLine +BABEL_OP3_306_37007_20140527_013428_inLine +BABEL_OP3_306_37007_20140527_013428_outLine +BABEL_OP3_306_38323_20150418_203354_inLine +BABEL_OP3_306_38323_20150418_203354_outLine +BABEL_OP3_306_38554_20140517_054801_inLine +BABEL_OP3_306_38554_20140517_054801_outLine +BABEL_OP3_306_38554_20140517_055631_inLine +BABEL_OP3_306_38554_20140517_055631_outLine +BABEL_OP3_306_39555_20141030_012732_inLine +BABEL_OP3_306_39555_20141030_012732_outLine +BABEL_OP3_306_40330_20150418_213611_inLine +BABEL_OP3_306_40330_20150418_213611_outLine +BABEL_OP3_306_40686_20140523_014206_inLine +BABEL_OP3_306_40686_20140523_014206_outLine +BABEL_OP3_306_40713_20140605_205025_inLine +BABEL_OP3_306_40713_20140605_205025_outLine +BABEL_OP3_306_41233_20141029_235039_inLine +BABEL_OP3_306_41233_20141029_235039_outLine +BABEL_OP3_306_41233_20141030_004714_inLine +BABEL_OP3_306_41233_20141030_004714_outLine +BABEL_OP3_306_41469_20150405_025457_inLine +BABEL_OP3_306_41469_20150405_025457_outLine +BABEL_OP3_306_41592_20140731_180118_inLine +BABEL_OP3_306_41592_20140731_180118_outLine +BABEL_OP3_306_41920_20140531_032613_inLine +BABEL_OP3_306_41920_20140531_032613_outLine +BABEL_OP3_306_42126_20140528_024621_inLine +BABEL_OP3_306_42126_20140528_024621_outLine +BABEL_OP3_306_42231_20141009_191123_inLine +BABEL_OP3_306_42231_20141009_191123_outLine +BABEL_OP3_306_43286_20140522_203724_inLine +BABEL_OP3_306_43286_20140522_203724_outLine +BABEL_OP3_306_43388_20140802_221518_inLine +BABEL_OP3_306_43388_20140802_221518_outLine +BABEL_OP3_306_43388_20140802_222040_inLine +BABEL_OP3_306_43388_20140802_222040_outLine +BABEL_OP3_306_43388_20140802_222715_inLine +BABEL_OP3_306_43388_20140802_222715_outLine +BABEL_OP3_306_43784_20140608_022047_inLine +BABEL_OP3_306_43784_20140608_022047_outLine +BABEL_OP3_306_44709_20140728_212605_inLine +BABEL_OP3_306_44709_20140728_212605_outLine +BABEL_OP3_306_46066_20141027_233339_inLine +BABEL_OP3_306_46066_20141027_233339_outLine +BABEL_OP3_306_46310_20140602_230134_inLine +BABEL_OP3_306_46310_20140602_230134_outLine +BABEL_OP3_306_46550_20140605_222807_inLine +BABEL_OP3_306_46550_20140605_222807_outLine +BABEL_OP3_306_46625_20140606_202920_inLine +BABEL_OP3_306_46625_20140606_202920_outLine +BABEL_OP3_306_46757_20140920_030716_inLine +BABEL_OP3_306_46757_20140920_030716_outLine +BABEL_OP3_306_46905_20140528_215718_inLine +BABEL_OP3_306_46905_20140528_215718_outLine +BABEL_OP3_306_47923_20150131_000157_inLine +BABEL_OP3_306_47923_20150131_000157_outLine +BABEL_OP3_306_48610_20140604_003825_inLine +BABEL_OP3_306_48610_20140604_003825_outLine +BABEL_OP3_306_49437_20141029_030600_inLine +BABEL_OP3_306_49437_20141029_030600_outLine +BABEL_OP3_306_49502_20150403_222234_inLine +BABEL_OP3_306_49502_20150403_222234_outLine +BABEL_OP3_306_50175_20150402_210041_inLine +BABEL_OP3_306_50175_20150402_210041_outLine +BABEL_OP3_306_50565_20140521_040110_inLine +BABEL_OP3_306_50565_20140521_040110_outLine +BABEL_OP3_306_50962_20140605_232213_inLine +BABEL_OP3_306_50962_20140605_232213_outLine +BABEL_OP3_306_51156_20150116_191446_inLine +BABEL_OP3_306_51156_20150116_191446_outLine +BABEL_OP3_306_52058_20140526_231450_inLine +BABEL_OP3_306_52058_20140526_231450_outLine +BABEL_OP3_306_52265_20150320_030911_inLine +BABEL_OP3_306_52265_20150320_030911_outLine +BABEL_OP3_306_52932_20140608_003800_inLine +BABEL_OP3_306_52932_20140608_003800_outLine +BABEL_OP3_306_53206_20140523_191711_inLine +BABEL_OP3_306_53206_20140523_191711_outLine +BABEL_OP3_306_53758_20150227_224132_inLine +BABEL_OP3_306_53758_20150227_224132_outLine +BABEL_OP3_306_54160_20140602_201949_inLine +BABEL_OP3_306_54160_20140602_201949_outLine +BABEL_OP3_306_54594_20140528_232952_inLine +BABEL_OP3_306_54594_20140528_232952_outLine +BABEL_OP3_306_54697_20141027_014534_inLine +BABEL_OP3_306_54697_20141027_014534_outLine +BABEL_OP3_306_54697_20141027_015651_inLine +BABEL_OP3_306_54697_20141027_015651_outLine +BABEL_OP3_306_55818_20140603_031605_inLine +BABEL_OP3_306_55818_20140603_031605_outLine +BABEL_OP3_306_55902_20150313_043244_inLine +BABEL_OP3_306_55902_20150313_043244_outLine +BABEL_OP3_306_55968_20140515_005800_inLine +BABEL_OP3_306_55968_20140515_005800_outLine +BABEL_OP3_306_56306_20141111_210052_inLine +BABEL_OP3_306_56306_20141111_210052_outLine +BABEL_OP3_306_56523_20140729_211409_inLine +BABEL_OP3_306_56523_20140729_211409_outLine +BABEL_OP3_306_56826_20141005_005430_inLine +BABEL_OP3_306_56826_20141005_005430_outLine +BABEL_OP3_306_56925_20150214_231609_inLine +BABEL_OP3_306_56925_20150214_231609_outLine +BABEL_OP3_306_57065_20140813_021110_inLine +BABEL_OP3_306_57065_20140813_021110_outLine +BABEL_OP3_306_57654_20140622_013309_inLine +BABEL_OP3_306_57654_20140622_013309_outLine +BABEL_OP3_306_58145_20140724_045437_inLine +BABEL_OP3_306_58145_20140724_045437_outLine +BABEL_OP3_306_58489_20141026_005336_inLine +BABEL_OP3_306_58489_20141026_005336_outLine +BABEL_OP3_306_59078_20141009_004020_inLine +BABEL_OP3_306_59078_20141009_004020_outLine +BABEL_OP3_306_59509_20140805_224009_inLine +BABEL_OP3_306_59509_20140805_224009_outLine +BABEL_OP3_306_59509_20140805_224625_inLine +BABEL_OP3_306_59509_20140805_224625_outLine +BABEL_OP3_306_59898_20150411_024935_inLine +BABEL_OP3_306_59898_20150411_024935_outLine +BABEL_OP3_306_60310_20141004_230555_inLine +BABEL_OP3_306_60310_20141004_230555_outLine +BABEL_OP3_306_60352_20140806_021626_inLine +BABEL_OP3_306_60352_20140806_021626_outLine +BABEL_OP3_306_61219_20140603_003614_inLine +BABEL_OP3_306_61219_20140603_003614_outLine +BABEL_OP3_306_61225_20140515_013438_inLine +BABEL_OP3_306_61225_20140515_013438_outLine +BABEL_OP3_306_61435_20141029_014344_inLine +BABEL_OP3_306_61435_20141029_014344_outLine +BABEL_OP3_306_61438_20140527_213221_inLine +BABEL_OP3_306_61438_20140527_213221_outLine +BABEL_OP3_306_61888_20141104_013244_inLine +BABEL_OP3_306_61888_20141104_013244_outLine +BABEL_OP3_306_62491_20140528_021234_inLine +BABEL_OP3_306_62491_20140528_021234_outLine +BABEL_OP3_306_62724_20141031_231843_inLine +BABEL_OP3_306_62724_20141031_231843_outLine +BABEL_OP3_306_63220_20140811_231222_inLine +BABEL_OP3_306_63220_20140811_231222_outLine +BABEL_OP3_306_63265_20150115_213217_inLine +BABEL_OP3_306_63265_20150115_213217_outLine +BABEL_OP3_306_63671_20150420_041005_inLine +BABEL_OP3_306_63671_20150420_041005_outLine +BABEL_OP3_306_63766_20150226_043203_inLine +BABEL_OP3_306_63766_20150226_043203_outLine +BABEL_OP3_306_64350_20140621_190438_inLine +BABEL_OP3_306_64350_20140621_190438_outLine +BABEL_OP3_306_64398_20140731_012313_inLine +BABEL_OP3_306_64398_20140731_012313_outLine +BABEL_OP3_306_64796_20150407_014947_inLine +BABEL_OP3_306_64796_20150407_014947_outLine +BABEL_OP3_306_65723_20140604_235722_inLine +BABEL_OP3_306_65723_20140604_235722_outLine +BABEL_OP3_306_66350_20150225_020558_inLine +BABEL_OP3_306_66350_20150225_020558_outLine +BABEL_OP3_306_66641_20150422_025109_inLine +BABEL_OP3_306_66641_20150422_025109_outLine +BABEL_OP3_306_66916_20140522_002931_inLine +BABEL_OP3_306_66916_20140522_002931_outLine +BABEL_OP3_306_66967_20140606_235110_inLine +BABEL_OP3_306_66967_20140606_235110_outLine +BABEL_OP3_306_67373_20140624_224214_inLine +BABEL_OP3_306_67373_20140624_224214_outLine +BABEL_OP3_306_67373_20140624_225314_inLine +BABEL_OP3_306_67373_20140624_225314_outLine +BABEL_OP3_306_67401_20140815_032242_inLine +BABEL_OP3_306_67401_20140815_032242_outLine +BABEL_OP3_306_67592_20141028_032006_inLine +BABEL_OP3_306_67592_20141028_032006_outLine +BABEL_OP3_306_67622_20140521_015356_inLine +BABEL_OP3_306_67622_20140521_015356_outLine +BABEL_OP3_306_67726_20140523_010156_inLine +BABEL_OP3_306_67726_20140523_010156_outLine +BABEL_OP3_306_67999_20141031_215535_inLine +BABEL_OP3_306_67999_20141031_215535_outLine +BABEL_OP3_306_68059_20140802_023600_inLine +BABEL_OP3_306_68059_20140802_023600_outLine +BABEL_OP3_306_68068_20141004_180553_inLine +BABEL_OP3_306_68068_20141004_180553_outLine +BABEL_OP3_306_69578_20140729_234354_inLine +BABEL_OP3_306_69578_20140729_234354_outLine +BABEL_OP3_306_70110_20140514_211101_inLine +BABEL_OP3_306_70110_20140514_211101_outLine +BABEL_OP3_306_70110_20140514_221144_inLine +BABEL_OP3_306_70110_20140514_221144_outLine +BABEL_OP3_306_70221_20140801_213304_inLine +BABEL_OP3_306_70221_20140801_213304_outLine +BABEL_OP3_306_70293_20150216_013540_inLine +BABEL_OP3_306_70293_20150216_013540_outLine +BABEL_OP3_306_70639_20140528_035113_inLine +BABEL_OP3_306_70639_20140528_035113_outLine +BABEL_OP3_306_70794_20140520_000549_inLine +BABEL_OP3_306_70794_20140520_000549_outLine +BABEL_OP3_306_71401_20150221_015039_inLine +BABEL_OP3_306_71401_20150221_015039_outLine +BABEL_OP3_306_71976_20140529_033557_inLine +BABEL_OP3_306_71976_20140529_033557_outLine +BABEL_OP3_306_72324_20140724_022916_inLine +BABEL_OP3_306_72324_20140724_022916_outLine +BABEL_OP3_306_72324_20140724_024048_inLine +BABEL_OP3_306_72324_20140724_024048_outLine +BABEL_OP3_306_72587_20140811_040036_inLine +BABEL_OP3_306_72587_20140811_040036_outLine +BABEL_OP3_306_72844_20140518_010610_inLine +BABEL_OP3_306_72844_20140518_010610_outLine +BABEL_OP3_306_73119_20140603_013443_inLine +BABEL_OP3_306_73119_20140603_013443_outLine +BABEL_OP3_306_73511_20141005_215627_inLine +BABEL_OP3_306_73511_20141005_215627_outLine +BABEL_OP3_306_74121_20140920_001224_inLine +BABEL_OP3_306_74121_20140920_001224_outLine +BABEL_OP3_306_74280_20140515_234933_inLine +BABEL_OP3_306_74280_20140515_234933_outLine +BABEL_OP3_306_75505_20140512_201003_inLine +BABEL_OP3_306_75505_20140512_201003_outLine +BABEL_OP3_306_77146_20140521_002843_inLine +BABEL_OP3_306_77146_20140521_002843_outLine +BABEL_OP3_306_77225_20150327_022842_inLine +BABEL_OP3_306_77225_20150327_022842_outLine +BABEL_OP3_306_78194_20140611_232911_inLine +BABEL_OP3_306_78194_20140611_232911_outLine +BABEL_OP3_306_78254_20140602_034556_inLine +BABEL_OP3_306_78254_20140602_034556_outLine +BABEL_OP3_306_78398_20140604_220522_inLine +BABEL_OP3_306_78398_20140604_220522_outLine +BABEL_OP3_306_78511_20141030_232402_inLine +BABEL_OP3_306_78511_20141030_232402_outLine +BABEL_OP3_306_78604_20140625_020654_inLine +BABEL_OP3_306_78604_20140625_020654_outLine +BABEL_OP3_306_79139_20140725_061931_inLine +BABEL_OP3_306_79139_20140725_061931_outLine +BABEL_OP3_306_80306_20140729_235651_inLine +BABEL_OP3_306_80306_20140729_235651_outLine +BABEL_OP3_306_80383_20150325_015939_inLine +BABEL_OP3_306_80383_20150325_015939_outLine +BABEL_OP3_306_80781_20140729_014618_inLine +BABEL_OP3_306_80781_20140729_014618_outLine +BABEL_OP3_306_81287_20141009_184932_inLine +BABEL_OP3_306_81287_20141009_184932_outLine +BABEL_OP3_306_81427_20140723_232926_inLine +BABEL_OP3_306_81427_20140723_232926_outLine +BABEL_OP3_306_81581_20140529_022004_inLine +BABEL_OP3_306_81581_20140529_022004_outLine +BABEL_OP3_306_82035_20140812_211933_inLine +BABEL_OP3_306_82035_20140812_211933_outLine +BABEL_OP3_306_82303_20150313_044844_inLine +BABEL_OP3_306_82303_20150313_044844_outLine +BABEL_OP3_306_82637_20140514_233142_inLine +BABEL_OP3_306_82637_20140514_233142_outLine +BABEL_OP3_306_82935_20141027_220108_inLine +BABEL_OP3_306_82935_20141027_220108_outLine +BABEL_OP3_306_82935_20141027_221034_inLine +BABEL_OP3_306_82935_20141027_221034_outLine +BABEL_OP3_306_83651_20140606_023153_inLine +BABEL_OP3_306_83651_20140606_023153_outLine +BABEL_OP3_306_84430_20150331_191720_inLine +BABEL_OP3_306_84430_20150331_191720_outLine +BABEL_OP3_306_84609_20150401_222657_inLine +BABEL_OP3_306_84609_20150401_222657_outLine +BABEL_OP3_306_84611_20140605_003243_inLine +BABEL_OP3_306_84611_20140605_003243_outLine +BABEL_OP3_306_84768_20150416_212057_inLine +BABEL_OP3_306_84768_20150416_212057_outLine +BABEL_OP3_306_84815_20141101_002538_inLine +BABEL_OP3_306_84815_20141101_002538_outLine +BABEL_OP3_306_85028_20141029_200629_inLine +BABEL_OP3_306_85028_20141029_200629_outLine +BABEL_OP3_306_85647_20140805_005301_inLine +BABEL_OP3_306_85647_20140805_005301_outLine +BABEL_OP3_306_86191_20140603_042134_inLine +BABEL_OP3_306_86191_20140603_042134_outLine +BABEL_OP3_306_86433_20140816_072513_inLine +BABEL_OP3_306_86433_20140816_072513_outLine +BABEL_OP3_306_86472_20140730_223950_inLine +BABEL_OP3_306_86472_20140730_223950_outLine +BABEL_OP3_306_86845_20140524_192542_inLine +BABEL_OP3_306_86845_20140524_192542_outLine +BABEL_OP3_306_86888_20140801_232454_inLine +BABEL_OP3_306_86888_20140801_232454_outLine +BABEL_OP3_306_86952_20140531_040557_inLine +BABEL_OP3_306_86952_20140531_040557_outLine +BABEL_OP3_306_87179_20141029_021040_inLine +BABEL_OP3_306_87179_20141029_021040_outLine +BABEL_OP3_306_87353_20150327_191436_inLine +BABEL_OP3_306_87353_20150327_191436_outLine +BABEL_OP3_306_87884_20141101_223809_inLine +BABEL_OP3_306_87884_20141101_223809_outLine +BABEL_OP3_306_88601_20141003_171755_inLine +BABEL_OP3_306_88601_20141003_171755_outLine +BABEL_OP3_306_88661_20141005_225341_inLine +BABEL_OP3_306_88661_20141005_225341_outLine +BABEL_OP3_306_89045_20140517_213454_inLine +BABEL_OP3_306_89045_20140517_213454_outLine +BABEL_OP3_306_89059_20141104_210223_inLine +BABEL_OP3_306_89059_20141104_210223_outLine +BABEL_OP3_306_89059_20141104_211433_inLine +BABEL_OP3_306_89059_20141104_211433_outLine +BABEL_OP3_306_89358_20141003_194649_inLine +BABEL_OP3_306_89358_20141003_194649_outLine +BABEL_OP3_306_89457_20140730_002520_inLine +BABEL_OP3_306_89457_20140730_002520_outLine +BABEL_OP3_306_90440_20150312_002806_inLine +BABEL_OP3_306_90440_20150312_002806_outLine +BABEL_OP3_306_90737_20140903_235501_inLine +BABEL_OP3_306_90737_20140903_235501_outLine +BABEL_OP3_306_91266_20150215_015545_inLine +BABEL_OP3_306_91266_20150215_015545_outLine +BABEL_OP3_306_91266_20150215_022001_inLine +BABEL_OP3_306_91266_20150215_022001_outLine +BABEL_OP3_306_91463_20140729_001809_inLine +BABEL_OP3_306_91463_20140729_001809_outLine +BABEL_OP3_306_91825_20150408_204309_inLine +BABEL_OP3_306_91825_20150408_204309_outLine +BABEL_OP3_306_91891_20141009_203853_inLine +BABEL_OP3_306_91891_20141009_203853_outLine +BABEL_OP3_306_92440_20150326_232645_inLine +BABEL_OP3_306_92440_20150326_232645_outLine +BABEL_OP3_306_92509_20140521_023136_inLine +BABEL_OP3_306_92509_20140521_023136_outLine +BABEL_OP3_306_92809_20150419_011906_inLine +BABEL_OP3_306_92809_20150419_011906_outLine +BABEL_OP3_306_92941_20140607_001711_inLine +BABEL_OP3_306_92941_20140607_001711_outLine +BABEL_OP3_306_92941_20140607_003034_inLine +BABEL_OP3_306_92941_20140607_003034_outLine +BABEL_OP3_306_93632_20141103_184555_inLine +BABEL_OP3_306_93632_20141103_184555_outLine +BABEL_OP3_306_93946_20141101_211743_inLine +BABEL_OP3_306_93946_20141101_211743_outLine +BABEL_OP3_306_93964_20140730_022556_inLine +BABEL_OP3_306_93964_20140730_022556_outLine +BABEL_OP3_306_94253_20140606_032103_inLine +BABEL_OP3_306_94253_20140606_032103_outLine +BABEL_OP3_306_94409_20141006_205245_inLine +BABEL_OP3_306_94409_20141006_205245_outLine +BABEL_OP3_306_94869_20140515_230712_inLine +BABEL_OP3_306_94869_20140515_230712_outLine +BABEL_OP3_306_94978_20141115_234420_inLine +BABEL_OP3_306_94978_20141115_234420_outLine +BABEL_OP3_306_95124_20150416_012109_inLine +BABEL_OP3_306_95124_20150416_012109_outLine +BABEL_OP3_306_95399_20140905_005504_inLine +BABEL_OP3_306_95399_20140905_005504_outLine +BABEL_OP3_306_95598_20140509_043406_inLine +BABEL_OP3_306_95598_20140509_043406_outLine +BABEL_OP3_306_96730_20141028_230035_inLine +BABEL_OP3_306_96730_20141028_230035_outLine +BABEL_OP3_306_96820_20140802_051525_inLine +BABEL_OP3_306_96820_20140802_051525_outLine +BABEL_OP3_306_97570_20140801_224422_inLine +BABEL_OP3_306_97570_20140801_224422_outLine +BABEL_OP3_306_97588_20140521_051503_inLine +BABEL_OP3_306_97588_20140521_051503_outLine +BABEL_OP3_306_98311_20140604_201838_inLine +BABEL_OP3_306_98311_20140604_201838_outLine +BABEL_OP3_306_99344_20140801_002154_inLine +BABEL_OP3_306_99344_20140801_002154_outLine +BABEL_OP3_306_99920_20140604_212052_inLine +BABEL_OP3_306_99920_20140604_212052_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list b/egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list new file mode 100644 index 00000000000..0369662c6a8 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list @@ -0,0 +1,537 @@ +BABEL_OP3_306_10647_20150310_235644_inLine +BABEL_OP3_306_10647_20150310_235644_outLine +BABEL_OP3_306_11310_20140523_212606_inLine +BABEL_OP3_306_11859_20150210_222000_inLine +BABEL_OP3_306_11859_20150210_222000_outLine +BABEL_OP3_306_12220_20140729_004621_inLine +BABEL_OP3_306_12220_20140729_004621_outLine +BABEL_OP3_306_13040_20140621_040421_inLine +BABEL_OP3_306_13040_20140621_040421_outLine +BABEL_OP3_306_13126_20141222_235100_inLine +BABEL_OP3_306_13126_20141222_235100_outLine +BABEL_OP3_306_13189_20141031_050830_inLine +BABEL_OP3_306_13189_20141031_050830_outLine +BABEL_OP3_306_13483_20141007_044436_inLine +BABEL_OP3_306_13483_20141007_044436_outLine +BABEL_OP3_306_14137_20140602_222616_inLine +BABEL_OP3_306_14137_20140602_222616_outLine +BABEL_OP3_306_14179_20140814_232925_inLine +BABEL_OP3_306_14179_20140814_232925_outLine +BABEL_OP3_306_14179_20140814_233948_inLine +BABEL_OP3_306_14179_20140814_233948_outLine +BABEL_OP3_306_15869_20150421_011752_inLine +BABEL_OP3_306_15869_20150421_011752_outLine +BABEL_OP3_306_16249_20150113_220219_inLine +BABEL_OP3_306_16249_20150113_220219_outLine +BABEL_OP3_306_16407_20150227_040000_inLine +BABEL_OP3_306_16407_20150227_040000_outLine +BABEL_OP3_306_16467_20141025_211050_inLine +BABEL_OP3_306_16467_20141025_211050_outLine +BABEL_OP3_306_16802_20150115_213527_inLine +BABEL_OP3_306_16802_20150115_213527_outLine +BABEL_OP3_306_16886_20140731_022140_inLine +BABEL_OP3_306_16886_20140731_022140_outLine +BABEL_OP3_306_17032_20150311_185659_inLine +BABEL_OP3_306_17032_20150311_185659_outLine +BABEL_OP3_306_17496_20140803_035835_inLine +BABEL_OP3_306_17496_20140803_035835_outLine +BABEL_OP3_306_17567_20140803_225538_inLine +BABEL_OP3_306_17567_20140803_225538_outLine +BABEL_OP3_306_17890_20140919_221004_inLine +BABEL_OP3_306_17890_20140919_221004_outLine +BABEL_OP3_306_17923_20140625_232123_inLine +BABEL_OP3_306_17923_20140625_232123_outLine +BABEL_OP3_306_18992_20150303_000227_inLine +BABEL_OP3_306_18992_20150303_000227_outLine +BABEL_OP3_306_19461_20140527_041640_inLine +BABEL_OP3_306_19621_20140803_005045_inLine +BABEL_OP3_306_19621_20140803_005045_outLine +BABEL_OP3_306_19672_20141005_040103_inLine +BABEL_OP3_306_19672_20141005_040103_outLine +BABEL_OP3_306_19672_20141005_040626_inLine +BABEL_OP3_306_19672_20141005_040626_outLine +BABEL_OP3_306_19688_20140524_223141_inLine +BABEL_OP3_306_19703_20140602_002345_inLine +BABEL_OP3_306_19703_20140602_002345_outLine +BABEL_OP3_306_20133_20140514_195807_inLine +BABEL_OP3_306_20133_20140514_195807_outLine +BABEL_OP3_306_20133_20140514_202548_inLine +BABEL_OP3_306_20133_20140514_202548_outLine +BABEL_OP3_306_20682_20141027_012441_inLine +BABEL_OP3_306_20768_20150306_035010_inLine +BABEL_OP3_306_20768_20150306_035010_outLine +BABEL_OP3_306_20800_20140625_005044_inLine +BABEL_OP3_306_20800_20140625_005044_outLine +BABEL_OP3_306_20800_20140625_005605_inLine +BABEL_OP3_306_20800_20140625_005605_outLine +BABEL_OP3_306_21004_20141026_004641_inLine +BABEL_OP3_306_21004_20141026_004641_outLine +BABEL_OP3_306_21029_20140614_210102_outLine +BABEL_OP3_306_21159_20150210_210334_inLine +BABEL_OP3_306_21159_20150210_210334_outLine +BABEL_OP3_306_22216_20140530_234149_inLine +BABEL_OP3_306_22280_20141009_011742_inLine +BABEL_OP3_306_22280_20141009_011742_outLine +BABEL_OP3_306_22321_20140602_210645_inLine +BABEL_OP3_306_22321_20140602_212529_inLine +BABEL_OP3_306_22446_20140531_021922_inLine +BABEL_OP3_306_22466_20140510_200019_inLine +BABEL_OP3_306_22466_20140510_200019_outLine +BABEL_OP3_306_22918_20141101_233512_inLine +BABEL_OP3_306_22918_20141101_233512_outLine +BABEL_OP3_306_22965_20140612_215959_outLine +BABEL_OP3_306_22965_20140612_220852_outLine +BABEL_OP3_306_23151_20150211_025354_inLine +BABEL_OP3_306_23151_20150211_025354_outLine +BABEL_OP3_306_23190_20140729_204900_inLine +BABEL_OP3_306_23190_20140729_204900_outLine +BABEL_OP3_306_23731_20140804_002220_inLine +BABEL_OP3_306_23731_20140804_002220_outLine +BABEL_OP3_306_24470_20141008_233522_inLine +BABEL_OP3_306_24470_20141008_233522_outLine +BABEL_OP3_306_24569_20141101_214133_inLine +BABEL_OP3_306_24569_20141101_214133_outLine +BABEL_OP3_306_24982_20140606_004556_inLine +BABEL_OP3_306_24982_20140606_004556_outLine +BABEL_OP3_306_25012_20140523_200250_inLine +BABEL_OP3_306_25012_20140523_200250_outLine +BABEL_OP3_306_25068_20150203_020803_inLine +BABEL_OP3_306_25068_20150203_020803_outLine +BABEL_OP3_306_25412_20140815_220223_inLine +BABEL_OP3_306_25412_20140815_220223_outLine +BABEL_OP3_306_25895_20150311_050131_outLine +BABEL_OP3_306_26602_20141029_024837_inLine +BABEL_OP3_306_26602_20141029_024837_outLine +BABEL_OP3_306_27203_20140725_002808_inLine +BABEL_OP3_306_27203_20140725_002808_outLine +BABEL_OP3_306_28190_20141031_000818_inLine +BABEL_OP3_306_28190_20141031_000818_outLine +BABEL_OP3_306_28280_20150316_040438_inLine +BABEL_OP3_306_28280_20150316_040438_outLine +BABEL_OP3_306_28522_20140814_224406_inLine +BABEL_OP3_306_28522_20140814_224406_outLine +BABEL_OP3_306_28585_20141028_213521_inLine +BABEL_OP3_306_28585_20141028_213521_outLine +BABEL_OP3_306_28600_20141025_233550_inLine +BABEL_OP3_306_28600_20141025_233550_outLine +BABEL_OP3_306_29076_20140815_025044_inLine +BABEL_OP3_306_29076_20140815_025044_outLine +BABEL_OP3_306_29076_20140815_030534_inLine +BABEL_OP3_306_29076_20140815_030534_outLine +BABEL_OP3_306_29168_20140520_233011_inLine +BABEL_OP3_306_29168_20140520_233011_outLine +BABEL_OP3_306_29168_20140520_234151_inLine +BABEL_OP3_306_29168_20140520_234151_outLine +BABEL_OP3_306_29168_20140520_235529_inLine +BABEL_OP3_306_29168_20140520_235529_outLine +BABEL_OP3_306_29416_20141026_232903_inLine +BABEL_OP3_306_29416_20141026_232903_outLine +BABEL_OP3_306_29482_20150314_051634_inLine +BABEL_OP3_306_29482_20150314_051634_outLine +BABEL_OP3_306_29663_20150311_033510_outLine +BABEL_OP3_306_29911_20140527_033850_inLine +BABEL_OP3_306_29911_20140527_033850_outLine +BABEL_OP3_306_30180_20140728_235122_inLine +BABEL_OP3_306_30180_20140728_235122_outLine +BABEL_OP3_306_30645_20150420_223611_inLine +BABEL_OP3_306_30645_20150420_223611_outLine +BABEL_OP3_306_31624_20140605_214151_inLine +BABEL_OP3_306_31624_20140605_215209_inLine +BABEL_OP3_306_31728_20150401_203654_inLine +BABEL_OP3_306_31728_20150401_203654_outLine +BABEL_OP3_306_32048_20141028_234758_inLine +BABEL_OP3_306_32048_20141028_234758_outLine +BABEL_OP3_306_32380_20150131_012448_inLine +BABEL_OP3_306_32380_20150131_012448_outLine +BABEL_OP3_306_32837_20141031_024422_inLine +BABEL_OP3_306_32837_20141031_024422_outLine +BABEL_OP3_306_33216_20150314_024409_inLine +BABEL_OP3_306_33229_20141029_230937_inLine +BABEL_OP3_306_33229_20141029_230937_outLine +BABEL_OP3_306_34482_20150223_031106_inLine +BABEL_OP3_306_34482_20150223_031106_outLine +BABEL_OP3_306_34811_20140803_015207_inLine +BABEL_OP3_306_34811_20140803_015207_outLine +BABEL_OP3_306_34826_20141028_005224_inLine +BABEL_OP3_306_34826_20141028_005224_outLine +BABEL_OP3_306_35143_20141031_235658_inLine +BABEL_OP3_306_35143_20141031_235658_outLine +BABEL_OP3_306_36039_20141116_001002_outLine +BABEL_OP3_306_37682_20140725_003616_inLine +BABEL_OP3_306_37682_20140725_003616_outLine +BABEL_OP3_306_37853_20141101_023348_inLine +BABEL_OP3_306_37853_20141101_023348_outLine +BABEL_OP3_306_38340_20140611_234929_inLine +BABEL_OP3_306_38340_20140611_234929_outLine +BABEL_OP3_306_38340_20140611_235849_inLine +BABEL_OP3_306_38340_20140611_235849_outLine +BABEL_OP3_306_38588_20140728_230958_inLine +BABEL_OP3_306_38588_20140728_230958_outLine +BABEL_OP3_306_38664_20140730_025027_inLine +BABEL_OP3_306_38664_20140730_025027_outLine +BABEL_OP3_306_38750_20141101_221241_inLine +BABEL_OP3_306_38750_20141101_221241_outLine +BABEL_OP3_306_39307_20140522_010101_inLine +BABEL_OP3_306_39426_20141102_040515_inLine +BABEL_OP3_306_39426_20141102_040515_outLine +BABEL_OP3_306_39579_20150123_014947_inLine +BABEL_OP3_306_39579_20150123_014947_outLine +BABEL_OP3_306_39638_20150418_005151_inLine +BABEL_OP3_306_39638_20150418_005151_outLine +BABEL_OP3_306_39848_20141006_034744_inLine +BABEL_OP3_306_39848_20141006_034744_outLine +BABEL_OP3_306_40557_20141101_025253_inLine +BABEL_OP3_306_40557_20141101_025253_outLine +BABEL_OP3_306_41038_20140812_205140_inLine +BABEL_OP3_306_41038_20140812_205140_outLine +BABEL_OP3_306_41100_20140718_034152_inLine +BABEL_OP3_306_41100_20140718_034152_outLine +BABEL_OP3_306_41100_20140718_040923_inLine +BABEL_OP3_306_41100_20140718_040923_outLine +BABEL_OP3_306_41442_20141026_003328_inLine +BABEL_OP3_306_41442_20141026_003328_outLine +BABEL_OP3_306_41493_20140515_044422_inLine +BABEL_OP3_306_41493_20140515_044422_outLine +BABEL_OP3_306_41609_20150418_225730_inLine +BABEL_OP3_306_41609_20150418_225730_outLine +BABEL_OP3_306_42243_20150408_003626_inLine +BABEL_OP3_306_42243_20150408_003626_outLine +BABEL_OP3_306_42434_20140724_015333_inLine +BABEL_OP3_306_42434_20140724_015333_outLine +BABEL_OP3_306_42497_20140622_023839_inLine +BABEL_OP3_306_42497_20140622_023839_outLine +BABEL_OP3_306_42991_20140808_231227_inLine +BABEL_OP3_306_42991_20140808_231227_outLine +BABEL_OP3_306_42991_20140809_015233_inLine +BABEL_OP3_306_42991_20140809_015233_outLine +BABEL_OP3_306_43285_20140814_222223_inLine +BABEL_OP3_306_43920_20141031_035638_inLine +BABEL_OP3_306_43920_20141031_035638_outLine +BABEL_OP3_306_44477_20140804_041338_inLine +BABEL_OP3_306_44477_20140804_041338_outLine +BABEL_OP3_306_44681_20140528_001629_inLine +BABEL_OP3_306_44681_20140528_001629_outLine +BABEL_OP3_306_44898_20140524_184833_inLine +BABEL_OP3_306_44898_20140524_184833_outLine +BABEL_OP3_306_45374_20150120_011233_inLine +BABEL_OP3_306_45374_20150120_011233_outLine +BABEL_OP3_306_45697_20141031_035336_inLine +BABEL_OP3_306_45697_20141031_035336_outLine +BABEL_OP3_306_46041_20141029_203936_inLine +BABEL_OP3_306_46041_20141029_203936_outLine +BABEL_OP3_306_46041_20141029_210843_inLine +BABEL_OP3_306_46041_20141029_210843_outLine +BABEL_OP3_306_46261_20141009_055048_inLine +BABEL_OP3_306_46261_20141009_055048_outLine +BABEL_OP3_306_46268_20150417_042038_inLine +BABEL_OP3_306_46268_20150417_042038_outLine +BABEL_OP3_306_46589_20141005_224403_inLine +BABEL_OP3_306_46589_20141005_224403_outLine +BABEL_OP3_306_46702_20140517_231741_inLine +BABEL_OP3_306_46702_20140517_231741_outLine +BABEL_OP3_306_46712_20140606_212859_inLine +BABEL_OP3_306_46712_20140606_212859_outLine +BABEL_OP3_306_46712_20140606_214018_inLine +BABEL_OP3_306_46712_20140606_214018_outLine +BABEL_OP3_306_46881_20150403_054836_inLine +BABEL_OP3_306_46881_20150403_054836_outLine +BABEL_OP3_306_48200_20141028_004545_inLine +BABEL_OP3_306_48200_20141028_004545_outLine +BABEL_OP3_306_48422_20141102_004117_inLine +BABEL_OP3_306_48422_20141102_004117_outLine +BABEL_OP3_306_48789_20141015_223422_inLine +BABEL_OP3_306_48789_20141015_223422_outLine +BABEL_OP3_306_48844_20150421_064019_inLine +BABEL_OP3_306_48844_20150421_064019_outLine +BABEL_OP3_306_49118_20141028_000839_inLine +BABEL_OP3_306_49118_20141028_000839_outLine +BABEL_OP3_306_49767_20150312_012314_inLine +BABEL_OP3_306_49767_20150312_012314_outLine +BABEL_OP3_306_49812_20141105_222036_inLine +BABEL_OP3_306_49812_20141105_222036_outLine +BABEL_OP3_306_49907_20140606_231957_inLine +BABEL_OP3_306_49907_20140606_231957_outLine +BABEL_OP3_306_50090_20140804_031708_inLine +BABEL_OP3_306_50090_20140804_031708_outLine +BABEL_OP3_306_50810_20140514_184240_inLine +BABEL_OP3_306_50958_20141016_004240_inLine +BABEL_OP3_306_50958_20141016_004240_outLine +BABEL_OP3_306_50958_20141016_005618_inLine +BABEL_OP3_306_50958_20141016_005618_outLine +BABEL_OP3_306_51484_20141026_005632_inLine +BABEL_OP3_306_51819_20140923_230818_inLine +BABEL_OP3_306_51819_20140923_230818_outLine +BABEL_OP3_306_51858_20150416_031524_inLine +BABEL_OP3_306_51858_20150416_031524_outLine +BABEL_OP3_306_52246_20140730_012314_inLine +BABEL_OP3_306_52246_20140730_012314_outLine +BABEL_OP3_306_52717_20140717_164851_inLine +BABEL_OP3_306_52717_20140717_164851_outLine +BABEL_OP3_306_52818_20140812_202317_inLine +BABEL_OP3_306_52818_20140812_202317_outLine +BABEL_OP3_306_53063_20141102_002734_inLine +BABEL_OP3_306_53063_20141102_002734_outLine +BABEL_OP3_306_54046_20141030_225348_inLine +BABEL_OP3_306_54046_20141030_225348_outLine +BABEL_OP3_306_54074_20140919_194620_inLine +BABEL_OP3_306_54074_20140919_194620_outLine +BABEL_OP3_306_54074_20140919_195619_inLine +BABEL_OP3_306_54074_20140919_195619_outLine +BABEL_OP3_306_54104_20150420_015927_inLine +BABEL_OP3_306_54104_20150420_015927_outLine +BABEL_OP3_306_54953_20140730_001818_inLine +BABEL_OP3_306_54953_20140730_001818_outLine +BABEL_OP3_306_56023_20141029_001317_inLine +BABEL_OP3_306_56023_20141029_001317_outLine +BABEL_OP3_306_56023_20141029_002053_inLine +BABEL_OP3_306_56023_20141029_002053_outLine +BABEL_OP3_306_56023_20141029_003640_inLine +BABEL_OP3_306_56023_20141029_003640_outLine +BABEL_OP3_306_56090_20140511_020343_inLine +BABEL_OP3_306_56090_20140511_020343_outLine +BABEL_OP3_306_56198_20140612_204109_inLine +BABEL_OP3_306_56198_20140612_204109_outLine +BABEL_OP3_306_56326_20140523_001458_inLine +BABEL_OP3_306_56326_20140523_001458_outLine +BABEL_OP3_306_56720_20141006_203142_inLine +BABEL_OP3_306_56720_20141006_203142_outLine +BABEL_OP3_306_57609_20140808_220254_inLine +BABEL_OP3_306_57609_20140808_220254_outLine +BABEL_OP3_306_58850_20140731_002418_inLine +BABEL_OP3_306_58850_20140731_002418_outLine +BABEL_OP3_306_58926_20140605_035534_inLine +BABEL_OP3_306_59549_20140621_221900_inLine +BABEL_OP3_306_59549_20140621_221900_outLine +BABEL_OP3_306_59549_20140621_223133_inLine +BABEL_OP3_306_59549_20140621_223133_outLine +BABEL_OP3_306_59747_20140530_225826_inLine +BABEL_OP3_306_59747_20140530_225826_outLine +BABEL_OP3_306_59747_20140530_231320_inLine +BABEL_OP3_306_59747_20140530_231320_outLine +BABEL_OP3_306_59993_20140606_000233_inLine +BABEL_OP3_306_60626_20140614_202445_inLine +BABEL_OP3_306_60626_20140614_202445_outLine +BABEL_OP3_306_60830_20141006_215349_inLine +BABEL_OP3_306_60830_20141006_215349_outLine +BABEL_OP3_306_61011_20140515_030617_inLine +BABEL_OP3_306_61963_20141028_202812_inLine +BABEL_OP3_306_61963_20141028_202812_outLine +BABEL_OP3_306_62014_20140804_205329_inLine +BABEL_OP3_306_62014_20140804_205329_outLine +BABEL_OP3_306_62047_20141028_035724_inLine +BABEL_OP3_306_62047_20141028_035724_outLine +BABEL_OP3_306_62434_20150414_000517_outLine +BABEL_OP3_306_62810_20150409_183507_inLine +BABEL_OP3_306_62810_20150409_183507_outLine +BABEL_OP3_306_62976_20140811_223219_inLine +BABEL_OP3_306_63084_20140809_013406_inLine +BABEL_OP3_306_63084_20140809_013406_outLine +BABEL_OP3_306_63309_20150417_061125_inLine +BABEL_OP3_306_63309_20150417_061125_outLine +BABEL_OP3_306_63336_20150221_022703_inLine +BABEL_OP3_306_63336_20150221_022703_outLine +BABEL_OP3_306_64065_20140610_210016_inLine +BABEL_OP3_306_64065_20140610_210016_outLine +BABEL_OP3_306_64768_20140604_000427_inLine +BABEL_OP3_306_64768_20140604_000427_outLine +BABEL_OP3_306_65077_20140516_204250_inLine +BABEL_OP3_306_65077_20140516_204250_outLine +BABEL_OP3_306_65692_20140802_044543_inLine +BABEL_OP3_306_65692_20140802_044543_outLine +BABEL_OP3_306_66177_20141104_024434_inLine +BABEL_OP3_306_66177_20141104_024434_outLine +BABEL_OP3_306_67659_20140602_021238_inLine +BABEL_OP3_306_68040_20140802_182145_inLine +BABEL_OP3_306_68385_20140511_024349_inLine +BABEL_OP3_306_68385_20140511_024349_outLine +BABEL_OP3_306_68385_20140511_025326_inLine +BABEL_OP3_306_68385_20140511_025326_outLine +BABEL_OP3_306_68823_20150123_213140_inLine +BABEL_OP3_306_68823_20150123_213140_outLine +BABEL_OP3_306_68910_20150311_040225_inLine +BABEL_OP3_306_68910_20150311_040225_outLine +BABEL_OP3_306_69574_20140517_001243_inLine +BABEL_OP3_306_69574_20140517_001243_outLine +BABEL_OP3_306_70601_20140725_010325_inLine +BABEL_OP3_306_70601_20140725_011335_inLine +BABEL_OP3_306_70726_20150220_234954_inLine +BABEL_OP3_306_70726_20150220_234954_outLine +BABEL_OP3_306_71419_20140526_222116_inLine +BABEL_OP3_306_71419_20140526_222116_outLine +BABEL_OP3_306_71566_20141026_022020_inLine +BABEL_OP3_306_71566_20141026_022020_outLine +BABEL_OP3_306_73022_20141102_005954_inLine +BABEL_OP3_306_73022_20141102_005954_outLine +BABEL_OP3_306_73022_20141102_010949_inLine +BABEL_OP3_306_73022_20141102_010949_outLine +BABEL_OP3_306_73072_20140603_222119_inLine +BABEL_OP3_306_73549_20150312_223219_inLine +BABEL_OP3_306_73549_20150312_223219_outLine +BABEL_OP3_306_74455_20141030_231535_inLine +BABEL_OP3_306_74455_20141030_231535_outLine +BABEL_OP3_306_74667_20140730_220428_inLine +BABEL_OP3_306_74667_20140730_220428_outLine +BABEL_OP3_306_74763_20150422_000233_inLine +BABEL_OP3_306_74763_20150422_000233_outLine +BABEL_OP3_306_74799_20141016_010127_inLine +BABEL_OP3_306_74799_20141016_010127_outLine +BABEL_OP3_306_74921_20140804_005230_inLine +BABEL_OP3_306_74921_20140804_005230_outLine +BABEL_OP3_306_75465_20141025_231951_inLine +BABEL_OP3_306_75465_20141025_231951_outLine +BABEL_OP3_306_76069_20150223_021350_inLine +BABEL_OP3_306_76069_20150223_021350_outLine +BABEL_OP3_306_76238_20141007_011009_inLine +BABEL_OP3_306_76238_20141007_011009_outLine +BABEL_OP3_306_76683_20140813_015005_inLine +BABEL_OP3_306_76683_20140813_015005_outLine +BABEL_OP3_306_76773_20140621_234123_inLine +BABEL_OP3_306_76773_20140621_234123_outLine +BABEL_OP3_306_77033_20141102_032017_inLine +BABEL_OP3_306_77033_20141102_032017_outLine +BABEL_OP3_306_77427_20140803_024549_inLine +BABEL_OP3_306_77427_20140803_024549_outLine +BABEL_OP3_306_78609_20141029_012144_inLine +BABEL_OP3_306_78609_20141029_012144_outLine +BABEL_OP3_306_79571_20140814_212942_inLine +BABEL_OP3_306_79571_20140814_212942_outLine +BABEL_OP3_306_79590_20141006_195244_inLine +BABEL_OP3_306_79590_20141006_195244_outLine +BABEL_OP3_306_79590_20141006_200315_inLine +BABEL_OP3_306_79590_20141006_200315_outLine +BABEL_OP3_306_80439_20140605_000944_inLine +BABEL_OP3_306_80439_20140605_000944_outLine +BABEL_OP3_306_80559_20140625_032329_inLine +BABEL_OP3_306_80559_20140625_032329_outLine +BABEL_OP3_306_80897_20140725_020057_inLine +BABEL_OP3_306_80897_20140725_020057_outLine +BABEL_OP3_306_81229_20140730_223530_inLine +BABEL_OP3_306_81229_20140730_223530_outLine +BABEL_OP3_306_81404_20140725_025731_inLine +BABEL_OP3_306_81810_20140728_223725_inLine +BABEL_OP3_306_81971_20140509_013738_inLine +BABEL_OP3_306_82138_20140730_174109_inLine +BABEL_OP3_306_82138_20140730_174109_outLine +BABEL_OP3_306_82425_20140714_035045_inLine +BABEL_OP3_306_82425_20140714_035045_outLine +BABEL_OP3_306_82496_20150418_234759_inLine +BABEL_OP3_306_82496_20150418_234759_outLine +BABEL_OP3_306_82622_20150411_050327_inLine +BABEL_OP3_306_82622_20150411_050327_outLine +BABEL_OP3_306_82979_20140612_012812_outLine +BABEL_OP3_306_83238_20140809_203535_inLine +BABEL_OP3_306_83238_20140809_203535_outLine +BABEL_OP3_306_83238_20140809_205023_inLine +BABEL_OP3_306_83238_20140809_205023_outLine +BABEL_OP3_306_83775_20140724_231716_inLine +BABEL_OP3_306_83813_20140528_211112_inLine +BABEL_OP3_306_83813_20140528_211112_outLine +BABEL_OP3_306_84061_20140730_005053_inLine +BABEL_OP3_306_84061_20140730_005053_outLine +BABEL_OP3_306_84177_20150214_011945_inLine +BABEL_OP3_306_84177_20150214_011945_outLine +BABEL_OP3_306_84327_20141006_211803_inLine +BABEL_OP3_306_84327_20141006_211803_outLine +BABEL_OP3_306_84408_20140729_231948_inLine +BABEL_OP3_306_84408_20140729_231948_outLine +BABEL_OP3_306_84737_20141031_010833_inLine +BABEL_OP3_306_84737_20141031_010833_outLine +BABEL_OP3_306_84823_20141006_034008_inLine +BABEL_OP3_306_84823_20141006_034008_outLine +BABEL_OP3_306_84838_20141029_023621_inLine +BABEL_OP3_306_84838_20141029_023621_outLine +BABEL_OP3_306_85179_20141101_012428_inLine +BABEL_OP3_306_85179_20141101_012428_outLine +BABEL_OP3_306_85322_20150420_034604_inLine +BABEL_OP3_306_85322_20150420_034604_outLine +BABEL_OP3_306_86100_20150328_002625_inLine +BABEL_OP3_306_86100_20150328_002625_outLine +BABEL_OP3_306_86830_20141031_030135_inLine +BABEL_OP3_306_86830_20141031_030135_outLine +BABEL_OP3_306_87073_20140516_232026_inLine +BABEL_OP3_306_87073_20140516_232026_outLine +BABEL_OP3_306_87470_20140729_214135_inLine +BABEL_OP3_306_87470_20140729_214135_outLine +BABEL_OP3_306_87796_20140816_000301_inLine +BABEL_OP3_306_87796_20140816_000301_outLine +BABEL_OP3_306_88260_20140725_033250_inLine +BABEL_OP3_306_88260_20140725_033250_outLine +BABEL_OP3_306_88394_20140525_002127_inLine +BABEL_OP3_306_88394_20140525_002127_outLine +BABEL_OP3_306_88669_20140802_011238_inLine +BABEL_OP3_306_88669_20140802_011238_outLine +BABEL_OP3_306_88669_20140802_011732_inLine +BABEL_OP3_306_88669_20140802_011732_outLine +BABEL_OP3_306_88669_20140802_012458_inLine +BABEL_OP3_306_88669_20140802_012458_outLine +BABEL_OP3_306_88673_20140731_231306_inLine +BABEL_OP3_306_88673_20140731_231306_outLine +BABEL_OP3_306_88783_20141031_212634_inLine +BABEL_OP3_306_88783_20141031_212634_outLine +BABEL_OP3_306_88938_20141102_003357_inLine +BABEL_OP3_306_88938_20141102_003357_outLine +BABEL_OP3_306_89372_20140516_004539_inLine +BABEL_OP3_306_89372_20140516_004539_outLine +BABEL_OP3_306_89560_20141029_203632_inLine +BABEL_OP3_306_89560_20141029_203632_outLine +BABEL_OP3_306_89650_20150331_011100_inLine +BABEL_OP3_306_89650_20150331_011100_outLine +BABEL_OP3_306_89665_20140725_015846_inLine +BABEL_OP3_306_89665_20140725_015846_outLine +BABEL_OP3_306_89695_20141006_020223_inLine +BABEL_OP3_306_89695_20141006_020223_outLine +BABEL_OP3_306_89794_20140813_221738_inLine +BABEL_OP3_306_89794_20140813_221738_outLine +BABEL_OP3_306_89943_20140607_005926_inLine +BABEL_OP3_306_89943_20140607_005926_outLine +BABEL_OP3_306_90347_20140814_172652_inLine +BABEL_OP3_306_90347_20140814_172652_outLine +BABEL_OP3_306_91125_20140522_213937_inLine +BABEL_OP3_306_91125_20140522_213937_outLine +BABEL_OP3_306_91125_20140522_214703_inLine +BABEL_OP3_306_91125_20140522_214703_outLine +BABEL_OP3_306_91319_20141028_013449_inLine +BABEL_OP3_306_91319_20141028_013449_outLine +BABEL_OP3_306_91971_20150331_203936_inLine +BABEL_OP3_306_91971_20150331_203936_outLine +BABEL_OP3_306_91977_20141004_202232_inLine +BABEL_OP3_306_91977_20141004_202232_outLine +BABEL_OP3_306_92252_20150327_024334_inLine +BABEL_OP3_306_92557_20141031_213221_inLine +BABEL_OP3_306_92557_20141031_213221_outLine +BABEL_OP3_306_92740_20141004_182215_inLine +BABEL_OP3_306_92740_20141004_182215_outLine +BABEL_OP3_306_92886_20140611_015551_inLine +BABEL_OP3_306_92886_20140611_015551_outLine +BABEL_OP3_306_93007_20150314_033427_outLine +BABEL_OP3_306_93475_20140625_235211_inLine +BABEL_OP3_306_93475_20140625_235211_outLine +BABEL_OP3_306_94166_20141102_014755_inLine +BABEL_OP3_306_94166_20141102_014755_outLine +BABEL_OP3_306_94333_20150418_031427_inLine +BABEL_OP3_306_94333_20150418_031427_outLine +BABEL_OP3_306_95446_20141028_001455_inLine +BABEL_OP3_306_95446_20141028_001455_outLine +BABEL_OP3_306_96077_20150327_033005_inLine +BABEL_OP3_306_96077_20150327_033005_outLine +BABEL_OP3_306_96190_20140614_223920_inLine +BABEL_OP3_306_96190_20140614_223920_outLine +BABEL_OP3_306_96584_20141104_034807_inLine +BABEL_OP3_306_97264_20141028_220710_inLine +BABEL_OP3_306_97264_20141028_220710_outLine +BABEL_OP3_306_97363_20140612_224303_inLine +BABEL_OP3_306_97363_20140612_224303_outLine +BABEL_OP3_306_97557_20140802_234323_inLine +BABEL_OP3_306_97557_20140802_234323_outLine +BABEL_OP3_306_97557_20140802_235634_inLine +BABEL_OP3_306_97557_20140802_235634_outLine +BABEL_OP3_306_97896_20140731_015336_inLine +BABEL_OP3_306_97896_20140731_015336_outLine +BABEL_OP3_306_97988_20141101_013315_inLine +BABEL_OP3_306_97988_20141101_013315_outLine +BABEL_OP3_306_98356_20140929_235521_inLine +BABEL_OP3_306_98356_20140929_235521_outLine +BABEL_OP3_306_98565_20150327_040438_inLine +BABEL_OP3_306_98565_20150327_040438_outLine +BABEL_OP3_306_98888_20141006_032811_inLine +BABEL_OP3_306_98888_20141006_032811_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/dev.2h.list b/egs/babel/s5d/conf/lists/307-amharic/dev.2h.list new file mode 100644 index 00000000000..933a75246bc --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/dev.2h.list @@ -0,0 +1,123 @@ +BABEL_OP3_307_11096_20140823_004817_inLine +BABEL_OP3_307_11096_20140823_004817_outLine +BABEL_OP3_307_13030_20140510_014335_inLine +BABEL_OP3_307_13030_20140510_014335_outLine +BABEL_OP3_307_14440_20140601_192635_inLine +BABEL_OP3_307_14440_20140601_192635_outLine +BABEL_OP3_307_15324_20140531_195640_inLine +BABEL_OP3_307_15324_20140531_195640_outLine +BABEL_OP3_307_15848_20140414_191259_inLine +BABEL_OP3_307_15848_20140414_191259_outLine +BABEL_OP3_307_16601_20140616_191918_inLine +BABEL_OP3_307_16601_20140616_191918_outLine +BABEL_OP3_307_17280_20140509_005048_inLine +BABEL_OP3_307_17280_20140509_005048_outLine +BABEL_OP3_307_17881_20140721_204147_inLine +BABEL_OP3_307_17881_20140721_204147_outLine +BABEL_OP3_307_18766_20140725_193025_inLine +BABEL_OP3_307_18766_20140725_193025_outLine +BABEL_OP3_307_19621_20140517_232031_inLine +BABEL_OP3_307_19621_20140517_232031_outLine +BABEL_OP3_307_19782_20140702_230513_inLine +BABEL_OP3_307_19782_20140702_230513_outLine +BABEL_OP3_307_21029_20140430_192710_inLine +BABEL_OP3_307_21029_20140430_192710_outLine +BABEL_OP3_307_28871_20140414_214155_inLine +BABEL_OP3_307_28871_20140414_214155_outLine +BABEL_OP3_307_29168_20140415_202128_inLine +BABEL_OP3_307_29168_20140415_202128_outLine +BABEL_OP3_307_29765_20140823_220912_inLine +BABEL_OP3_307_29765_20140823_220912_outLine +BABEL_OP3_307_30280_20140909_000751_outLine +BABEL_OP3_307_32048_20140705_013312_inLine +BABEL_OP3_307_32048_20140705_013312_outLine +BABEL_OP3_307_32708_20140429_224318_inLine +BABEL_OP3_307_32708_20140429_224318_outLine +BABEL_OP3_307_36219_20140405_235707_inLine +BABEL_OP3_307_36219_20140405_235707_outLine +BABEL_OP3_307_37285_20140618_224046_inLine +BABEL_OP3_307_37285_20140618_224046_outLine +BABEL_OP3_307_41741_20140422_000845_inLine +BABEL_OP3_307_41741_20140422_000845_outLine +BABEL_OP3_307_42848_20140822_203249_inLine +BABEL_OP3_307_42848_20140822_203249_outLine +BABEL_OP3_307_42883_20140823_230118_inLine +BABEL_OP3_307_42883_20140823_230118_outLine +BABEL_OP3_307_44619_20140405_193041_inLine +BABEL_OP3_307_44619_20140405_193041_outLine +BABEL_OP3_307_44961_20140421_215913_inLine +BABEL_OP3_307_44961_20140421_215913_outLine +BABEL_OP3_307_46625_20140414_224528_inLine +BABEL_OP3_307_46625_20140414_224528_outLine +BABEL_OP3_307_47799_20140902_200301_inLine +BABEL_OP3_307_47799_20140902_200301_outLine +BABEL_OP3_307_49902_20140510_004310_inLine +BABEL_OP3_307_49902_20140510_004310_outLine +BABEL_OP3_307_50090_20140531_225332_inLine +BABEL_OP3_307_50090_20140531_225332_outLine +BABEL_OP3_307_52438_20140429_232836_inLine +BABEL_OP3_307_52438_20140429_232836_outLine +BABEL_OP3_307_54160_20140402_232820_inLine +BABEL_OP3_307_54160_20140402_232820_outLine +BABEL_OP3_307_58717_20140518_204047_inLine +BABEL_OP3_307_58717_20140518_204047_outLine +BABEL_OP3_307_60498_20140823_192847_inLine +BABEL_OP3_307_60498_20140823_192847_outLine +BABEL_OP3_307_61011_20140415_180846_inLine +BABEL_OP3_307_61011_20140415_180846_outLine +BABEL_OP3_307_61011_20140415_181727_inLine +BABEL_OP3_307_61011_20140415_181727_outLine +BABEL_OP3_307_61357_20140602_184817_inLine +BABEL_OP3_307_61357_20140602_184817_outLine +BABEL_OP3_307_62200_20140505_000149_inLine +BABEL_OP3_307_62200_20140505_000149_outLine +BABEL_OP3_307_62286_20140503_220651_inLine +BABEL_OP3_307_62286_20140503_220651_outLine +BABEL_OP3_307_64870_20140518_011602_inLine +BABEL_OP3_307_64870_20140518_011602_outLine +BABEL_OP3_307_65692_20140517_182352_inLine +BABEL_OP3_307_65692_20140517_182352_outLine +BABEL_OP3_307_66519_20140510_212511_inLine +BABEL_OP3_307_66519_20140510_212511_outLine +BABEL_OP3_307_69153_20140624_193324_inLine +BABEL_OP3_307_69153_20140624_193324_outLine +BABEL_OP3_307_69633_20140607_233440_inLine +BABEL_OP3_307_69633_20140607_233440_outLine +BABEL_OP3_307_71038_20140712_000601_inLine +BABEL_OP3_307_71038_20140712_000601_outLine +BABEL_OP3_307_73757_20140512_231155_inLine +BABEL_OP3_307_73757_20140512_231155_outLine +BABEL_OP3_307_76372_20140721_231708_inLine +BABEL_OP3_307_76372_20140721_231708_outLine +BABEL_OP3_307_81553_20140707_003952_inLine +BABEL_OP3_307_81553_20140707_003952_outLine +BABEL_OP3_307_85439_20140814_215435_inLine +BABEL_OP3_307_85439_20140814_215435_outLine +BABEL_OP3_307_88550_20140809_212521_inLine +BABEL_OP3_307_88550_20140809_212521_outLine +BABEL_OP3_307_88601_20140512_171733_inLine +BABEL_OP3_307_88601_20140512_171733_outLine +BABEL_OP3_307_89888_20140520_191659_inLine +BABEL_OP3_307_89888_20140520_191659_outLine +BABEL_OP3_307_90777_20140507_231811_inLine +BABEL_OP3_307_90777_20140507_231811_outLine +BABEL_OP3_307_92176_20140515_231853_inLine +BABEL_OP3_307_92176_20140515_231853_outLine +BABEL_OP3_307_92643_20140806_220922_inLine +BABEL_OP3_307_92643_20140806_220922_outLine +BABEL_OP3_307_92886_20140430_194417_inLine +BABEL_OP3_307_92886_20140430_194417_outLine +BABEL_OP3_307_93320_20140823_214255_inLine +BABEL_OP3_307_93320_20140823_214255_outLine +BABEL_OP3_307_94002_20140511_172143_inLine +BABEL_OP3_307_94002_20140511_172143_outLine +BABEL_OP3_307_94237_20140814_181922_inLine +BABEL_OP3_307_94237_20140814_181922_outLine +BABEL_OP3_307_95124_20140828_224047_inLine +BABEL_OP3_307_95124_20140828_224047_outLine +BABEL_OP3_307_96940_20140901_181148_inLine +BABEL_OP3_307_96940_20140901_181148_outLine +BABEL_OP3_307_96985_20140503_190037_inLine +BABEL_OP3_307_96985_20140503_190037_outLine +BABEL_OP3_307_98506_20140807_170934_inLine +BABEL_OP3_307_98506_20140807_170934_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/dev.list b/egs/babel/s5d/conf/lists/307-amharic/dev.list new file mode 100644 index 00000000000..933a75246bc --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/dev.list @@ -0,0 +1,123 @@ +BABEL_OP3_307_11096_20140823_004817_inLine +BABEL_OP3_307_11096_20140823_004817_outLine +BABEL_OP3_307_13030_20140510_014335_inLine +BABEL_OP3_307_13030_20140510_014335_outLine +BABEL_OP3_307_14440_20140601_192635_inLine +BABEL_OP3_307_14440_20140601_192635_outLine +BABEL_OP3_307_15324_20140531_195640_inLine +BABEL_OP3_307_15324_20140531_195640_outLine +BABEL_OP3_307_15848_20140414_191259_inLine +BABEL_OP3_307_15848_20140414_191259_outLine +BABEL_OP3_307_16601_20140616_191918_inLine +BABEL_OP3_307_16601_20140616_191918_outLine +BABEL_OP3_307_17280_20140509_005048_inLine +BABEL_OP3_307_17280_20140509_005048_outLine +BABEL_OP3_307_17881_20140721_204147_inLine +BABEL_OP3_307_17881_20140721_204147_outLine +BABEL_OP3_307_18766_20140725_193025_inLine +BABEL_OP3_307_18766_20140725_193025_outLine +BABEL_OP3_307_19621_20140517_232031_inLine +BABEL_OP3_307_19621_20140517_232031_outLine +BABEL_OP3_307_19782_20140702_230513_inLine +BABEL_OP3_307_19782_20140702_230513_outLine +BABEL_OP3_307_21029_20140430_192710_inLine +BABEL_OP3_307_21029_20140430_192710_outLine +BABEL_OP3_307_28871_20140414_214155_inLine +BABEL_OP3_307_28871_20140414_214155_outLine +BABEL_OP3_307_29168_20140415_202128_inLine +BABEL_OP3_307_29168_20140415_202128_outLine +BABEL_OP3_307_29765_20140823_220912_inLine +BABEL_OP3_307_29765_20140823_220912_outLine +BABEL_OP3_307_30280_20140909_000751_outLine +BABEL_OP3_307_32048_20140705_013312_inLine +BABEL_OP3_307_32048_20140705_013312_outLine +BABEL_OP3_307_32708_20140429_224318_inLine +BABEL_OP3_307_32708_20140429_224318_outLine +BABEL_OP3_307_36219_20140405_235707_inLine +BABEL_OP3_307_36219_20140405_235707_outLine +BABEL_OP3_307_37285_20140618_224046_inLine +BABEL_OP3_307_37285_20140618_224046_outLine +BABEL_OP3_307_41741_20140422_000845_inLine +BABEL_OP3_307_41741_20140422_000845_outLine +BABEL_OP3_307_42848_20140822_203249_inLine +BABEL_OP3_307_42848_20140822_203249_outLine +BABEL_OP3_307_42883_20140823_230118_inLine +BABEL_OP3_307_42883_20140823_230118_outLine +BABEL_OP3_307_44619_20140405_193041_inLine +BABEL_OP3_307_44619_20140405_193041_outLine +BABEL_OP3_307_44961_20140421_215913_inLine +BABEL_OP3_307_44961_20140421_215913_outLine +BABEL_OP3_307_46625_20140414_224528_inLine +BABEL_OP3_307_46625_20140414_224528_outLine +BABEL_OP3_307_47799_20140902_200301_inLine +BABEL_OP3_307_47799_20140902_200301_outLine +BABEL_OP3_307_49902_20140510_004310_inLine +BABEL_OP3_307_49902_20140510_004310_outLine +BABEL_OP3_307_50090_20140531_225332_inLine +BABEL_OP3_307_50090_20140531_225332_outLine +BABEL_OP3_307_52438_20140429_232836_inLine +BABEL_OP3_307_52438_20140429_232836_outLine +BABEL_OP3_307_54160_20140402_232820_inLine +BABEL_OP3_307_54160_20140402_232820_outLine +BABEL_OP3_307_58717_20140518_204047_inLine +BABEL_OP3_307_58717_20140518_204047_outLine +BABEL_OP3_307_60498_20140823_192847_inLine +BABEL_OP3_307_60498_20140823_192847_outLine +BABEL_OP3_307_61011_20140415_180846_inLine +BABEL_OP3_307_61011_20140415_180846_outLine +BABEL_OP3_307_61011_20140415_181727_inLine +BABEL_OP3_307_61011_20140415_181727_outLine +BABEL_OP3_307_61357_20140602_184817_inLine +BABEL_OP3_307_61357_20140602_184817_outLine +BABEL_OP3_307_62200_20140505_000149_inLine +BABEL_OP3_307_62200_20140505_000149_outLine +BABEL_OP3_307_62286_20140503_220651_inLine +BABEL_OP3_307_62286_20140503_220651_outLine +BABEL_OP3_307_64870_20140518_011602_inLine +BABEL_OP3_307_64870_20140518_011602_outLine +BABEL_OP3_307_65692_20140517_182352_inLine +BABEL_OP3_307_65692_20140517_182352_outLine +BABEL_OP3_307_66519_20140510_212511_inLine +BABEL_OP3_307_66519_20140510_212511_outLine +BABEL_OP3_307_69153_20140624_193324_inLine +BABEL_OP3_307_69153_20140624_193324_outLine +BABEL_OP3_307_69633_20140607_233440_inLine +BABEL_OP3_307_69633_20140607_233440_outLine +BABEL_OP3_307_71038_20140712_000601_inLine +BABEL_OP3_307_71038_20140712_000601_outLine +BABEL_OP3_307_73757_20140512_231155_inLine +BABEL_OP3_307_73757_20140512_231155_outLine +BABEL_OP3_307_76372_20140721_231708_inLine +BABEL_OP3_307_76372_20140721_231708_outLine +BABEL_OP3_307_81553_20140707_003952_inLine +BABEL_OP3_307_81553_20140707_003952_outLine +BABEL_OP3_307_85439_20140814_215435_inLine +BABEL_OP3_307_85439_20140814_215435_outLine +BABEL_OP3_307_88550_20140809_212521_inLine +BABEL_OP3_307_88550_20140809_212521_outLine +BABEL_OP3_307_88601_20140512_171733_inLine +BABEL_OP3_307_88601_20140512_171733_outLine +BABEL_OP3_307_89888_20140520_191659_inLine +BABEL_OP3_307_89888_20140520_191659_outLine +BABEL_OP3_307_90777_20140507_231811_inLine +BABEL_OP3_307_90777_20140507_231811_outLine +BABEL_OP3_307_92176_20140515_231853_inLine +BABEL_OP3_307_92176_20140515_231853_outLine +BABEL_OP3_307_92643_20140806_220922_inLine +BABEL_OP3_307_92643_20140806_220922_outLine +BABEL_OP3_307_92886_20140430_194417_inLine +BABEL_OP3_307_92886_20140430_194417_outLine +BABEL_OP3_307_93320_20140823_214255_inLine +BABEL_OP3_307_93320_20140823_214255_outLine +BABEL_OP3_307_94002_20140511_172143_inLine +BABEL_OP3_307_94002_20140511_172143_outLine +BABEL_OP3_307_94237_20140814_181922_inLine +BABEL_OP3_307_94237_20140814_181922_outLine +BABEL_OP3_307_95124_20140828_224047_inLine +BABEL_OP3_307_95124_20140828_224047_outLine +BABEL_OP3_307_96940_20140901_181148_inLine +BABEL_OP3_307_96940_20140901_181148_outLine +BABEL_OP3_307_96985_20140503_190037_inLine +BABEL_OP3_307_96985_20140503_190037_outLine +BABEL_OP3_307_98506_20140807_170934_inLine +BABEL_OP3_307_98506_20140807_170934_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/eval.list b/egs/babel/s5d/conf/lists/307-amharic/eval.list new file mode 100644 index 00000000000..9687fe69738 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/eval.list @@ -0,0 +1,186 @@ +BABEL_OP3_307_10319_20140417_201202_inLine +BABEL_OP3_307_10319_20140417_201202_outLine +BABEL_OP3_307_12846_20140820_004747_inLine +BABEL_OP3_307_12846_20140820_004747_outLine +BABEL_OP3_307_13040_20140519_010732_inLine +BABEL_OP3_307_13040_20140519_010732_outLine +BABEL_OP3_307_13427_20140517_185634_inLine +BABEL_OP3_307_13427_20140517_185634_outLine +BABEL_OP3_307_15617_20140902_211446_inLine +BABEL_OP3_307_15617_20140902_211446_outLine +BABEL_OP3_307_16056_20140403_224737_inLine +BABEL_OP3_307_16056_20140403_224737_outLine +BABEL_OP3_307_16787_20140504_192345_inLine +BABEL_OP3_307_16787_20140504_192345_outLine +BABEL_OP3_307_16787_20140504_193044_inLine +BABEL_OP3_307_16787_20140504_193044_outLine +BABEL_OP3_307_18242_20140822_194420_inLine +BABEL_OP3_307_18242_20140822_194420_outLine +BABEL_OP3_307_19672_20140610_182836_inLine +BABEL_OP3_307_19672_20140610_182836_outLine +BABEL_OP3_307_20738_20140714_223501_inLine +BABEL_OP3_307_20738_20140714_223501_outLine +BABEL_OP3_307_20800_20140501_001836_inLine +BABEL_OP3_307_20800_20140501_001836_outLine +BABEL_OP3_307_21581_20140510_172450_inLine +BABEL_OP3_307_21581_20140510_172450_outLine +BABEL_OP3_307_22641_20140417_190251_inLine +BABEL_OP3_307_22641_20140417_190251_outLine +BABEL_OP3_307_23260_20140809_221233_inLine +BABEL_OP3_307_23260_20140809_221233_outLine +BABEL_OP3_307_23983_20140814_233159_inLine +BABEL_OP3_307_23983_20140814_233159_outLine +BABEL_OP3_307_24033_20140705_202406_inLine +BABEL_OP3_307_24033_20140705_202406_outLine +BABEL_OP3_307_26072_20140707_234609_inLine +BABEL_OP3_307_26072_20140707_234609_outLine +BABEL_OP3_307_28585_20140703_170913_inLine +BABEL_OP3_307_28585_20140703_170913_outLine +BABEL_OP3_307_28606_20140617_001826_inLine +BABEL_OP3_307_28606_20140617_001826_outLine +BABEL_OP3_307_31668_20140827_172922_inLine +BABEL_OP3_307_31668_20140827_172922_outLine +BABEL_OP3_307_33635_20140508_230911_inLine +BABEL_OP3_307_33635_20140508_230911_outLine +BABEL_OP3_307_33659_20140824_234408_inLine +BABEL_OP3_307_33659_20140824_234408_outLine +BABEL_OP3_307_34486_20140824_163426_inLine +BABEL_OP3_307_34486_20140824_163426_outLine +BABEL_OP3_307_34564_20140703_183530_inLine +BABEL_OP3_307_34564_20140703_183530_outLine +BABEL_OP3_307_34713_20140903_004337_inLine +BABEL_OP3_307_34713_20140903_004337_outLine +BABEL_OP3_307_35202_20140609_172217_inLine +BABEL_OP3_307_35202_20140609_172217_outLine +BABEL_OP3_307_35609_20140907_195928_inLine +BABEL_OP3_307_35609_20140907_195928_outLine +BABEL_OP3_307_36017_20140811_180507_inLine +BABEL_OP3_307_36017_20140811_180507_outLine +BABEL_OP3_307_37064_20140405_195726_inLine +BABEL_OP3_307_37064_20140405_195726_outLine +BABEL_OP3_307_41745_20140508_193418_inLine +BABEL_OP3_307_41745_20140508_193418_outLine +BABEL_OP3_307_42231_20140616_222234_inLine +BABEL_OP3_307_42231_20140616_222234_outLine +BABEL_OP3_307_43285_20140607_212542_inLine +BABEL_OP3_307_43285_20140607_212542_outLine +BABEL_OP3_307_44420_20140503_221325_inLine +BABEL_OP3_307_44420_20140503_221325_outLine +BABEL_OP3_307_44847_20140527_221753_inLine +BABEL_OP3_307_44847_20140527_221753_outLine +BABEL_OP3_307_45106_20140530_183351_inLine +BABEL_OP3_307_45106_20140530_183351_outLine +BABEL_OP3_307_45777_20140506_181506_inLine +BABEL_OP3_307_45777_20140506_181506_outLine +BABEL_OP3_307_47877_20140705_224331_inLine +BABEL_OP3_307_47877_20140705_224331_outLine +BABEL_OP3_307_47959_20140505_185302_inLine +BABEL_OP3_307_47959_20140505_185302_outLine +BABEL_OP3_307_48399_20140403_003150_inLine +BABEL_OP3_307_48399_20140403_003150_outLine +BABEL_OP3_307_49637_20140417_211134_inLine +BABEL_OP3_307_49637_20140417_211134_outLine +BABEL_OP3_307_50175_20140415_222418_inLine +BABEL_OP3_307_50175_20140415_222418_outLine +BABEL_OP3_307_50630_20140609_215223_inLine +BABEL_OP3_307_50630_20140609_215223_outLine +BABEL_OP3_307_51858_20140829_174031_inLine +BABEL_OP3_307_51858_20140829_174031_outLine +BABEL_OP3_307_52694_20140519_182152_inLine +BABEL_OP3_307_52694_20140519_182152_outLine +BABEL_OP3_307_53072_20140810_001530_inLine +BABEL_OP3_307_53072_20140810_001530_outLine +BABEL_OP3_307_54405_20140517_202903_inLine +BABEL_OP3_307_54405_20140517_202903_outLine +BABEL_OP3_307_57609_20140519_194402_inLine +BABEL_OP3_307_57609_20140519_194402_outLine +BABEL_OP3_307_60307_20140907_225330_inLine +BABEL_OP3_307_60307_20140907_225330_outLine +BABEL_OP3_307_60538_20140423_174547_inLine +BABEL_OP3_307_60538_20140423_174547_outLine +BABEL_OP3_307_62362_20140824_175404_inLine +BABEL_OP3_307_62362_20140824_175404_outLine +BABEL_OP3_307_62852_20140416_014025_inLine +BABEL_OP3_307_62852_20140416_014025_outLine +BABEL_OP3_307_63309_20140828_003208_inLine +BABEL_OP3_307_63309_20140828_003208_outLine +BABEL_OP3_307_63445_20140401_225339_inLine +BABEL_OP3_307_63445_20140401_225339_outLine +BABEL_OP3_307_64494_20140430_224138_inLine +BABEL_OP3_307_64494_20140430_224138_outLine +BABEL_OP3_307_64638_20140609_213059_inLine +BABEL_OP3_307_64638_20140609_213059_outLine +BABEL_OP3_307_65252_20140813_202634_inLine +BABEL_OP3_307_65252_20140813_202634_outLine +BABEL_OP3_307_65370_20140907_174141_inLine +BABEL_OP3_307_65370_20140907_174141_outLine +BABEL_OP3_307_67794_20140430_211624_inLine +BABEL_OP3_307_67794_20140430_211624_outLine +BABEL_OP3_307_67794_20140430_212806_inLine +BABEL_OP3_307_67794_20140430_212806_outLine +BABEL_OP3_307_70110_20140414_223000_inLine +BABEL_OP3_307_70110_20140414_223000_outLine +BABEL_OP3_307_73042_20140403_013739_inLine +BABEL_OP3_307_73042_20140403_013739_outLine +BABEL_OP3_307_75460_20140821_232032_inLine +BABEL_OP3_307_75460_20140821_232032_outLine +BABEL_OP3_307_76773_20140403_224239_inLine +BABEL_OP3_307_76773_20140403_224239_outLine +BABEL_OP3_307_77112_20140405_232547_inLine +BABEL_OP3_307_77112_20140405_232547_outLine +BABEL_OP3_307_77391_20140404_205514_inLine +BABEL_OP3_307_77391_20140404_205514_outLine +BABEL_OP3_307_79820_20140404_235700_inLine +BABEL_OP3_307_79820_20140404_235700_outLine +BABEL_OP3_307_80897_20140605_185417_inLine +BABEL_OP3_307_80897_20140605_185417_outLine +BABEL_OP3_307_82361_20140811_190547_inLine +BABEL_OP3_307_82361_20140811_190547_outLine +BABEL_OP3_307_82966_20140704_224020_inLine +BABEL_OP3_307_82966_20140704_224020_outLine +BABEL_OP3_307_83062_20140730_214025_inLine +BABEL_OP3_307_83062_20140730_214025_outLine +BABEL_OP3_307_83366_20140529_193250_inLine +BABEL_OP3_307_83366_20140529_193250_outLine +BABEL_OP3_307_83545_20140813_230842_inLine +BABEL_OP3_307_83545_20140813_230842_outLine +BABEL_OP3_307_83775_20140510_215248_inLine +BABEL_OP3_307_83775_20140510_215248_outLine +BABEL_OP3_307_83775_20140510_220305_inLine +BABEL_OP3_307_83775_20140510_220305_outLine +BABEL_OP3_307_83851_20140404_202207_inLine +BABEL_OP3_307_83851_20140404_202207_outLine +BABEL_OP3_307_86748_20140707_202225_inLine +BABEL_OP3_307_86748_20140707_202225_outLine +BABEL_OP3_307_87073_20140327_221923_inLine +BABEL_OP3_307_87073_20140327_221923_outLine +BABEL_OP3_307_87693_20140503_194632_inLine +BABEL_OP3_307_87693_20140503_194632_outLine +BABEL_OP3_307_89045_20140519_191547_inLine +BABEL_OP3_307_89045_20140519_191547_outLine +BABEL_OP3_307_89330_20140821_234229_inLine +BABEL_OP3_307_89330_20140821_234229_outLine +BABEL_OP3_307_89794_20140531_224759_inLine +BABEL_OP3_307_89794_20140531_224759_outLine +BABEL_OP3_307_90440_20140829_001435_inLine +BABEL_OP3_307_90440_20140829_001435_outLine +BABEL_OP3_307_90935_20140508_183907_inLine +BABEL_OP3_307_90935_20140508_183907_outLine +BABEL_OP3_307_91463_20140603_203737_inLine +BABEL_OP3_307_91463_20140603_203737_outLine +BABEL_OP3_307_92060_20140814_230458_inLine +BABEL_OP3_307_92060_20140814_230458_outLine +BABEL_OP3_307_92698_20140510_215147_inLine +BABEL_OP3_307_92698_20140510_215147_outLine +BABEL_OP3_307_94587_20140614_000734_inLine +BABEL_OP3_307_94587_20140614_000734_outLine +BABEL_OP3_307_96205_20140512_195746_inLine +BABEL_OP3_307_96205_20140512_195746_outLine +BABEL_OP3_307_97264_20140705_170053_inLine +BABEL_OP3_307_97264_20140705_170053_outLine +BABEL_OP3_307_98580_20140504_195655_inLine +BABEL_OP3_307_98580_20140504_195655_outLine +BABEL_OP3_307_99487_20140518_212249_inLine +BABEL_OP3_307_99487_20140518_212249_outLine +BABEL_OP3_307_99952_20140822_185201_inLine +BABEL_OP3_307_99952_20140822_185201_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/sub-train.list b/egs/babel/s5d/conf/lists/307-amharic/sub-train.list new file mode 100644 index 00000000000..a21532c03d7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/sub-train.list @@ -0,0 +1,122 @@ +BABEL_OP3_307_14229_20140503_233516_inLine +BABEL_OP3_307_14229_20140503_233516_outLine +BABEL_OP3_307_14725_20140421_212856_inLine +BABEL_OP3_307_14725_20140421_212856_outLine +BABEL_OP3_307_15216_20140628_231525_inLine +BABEL_OP3_307_15216_20140628_231525_outLine +BABEL_OP3_307_15902_20140422_235151_inLine +BABEL_OP3_307_15902_20140422_235151_outLine +BABEL_OP3_307_16475_20140511_014949_inLine +BABEL_OP3_307_16475_20140511_014949_outLine +BABEL_OP3_307_17496_20140530_181532_inLine +BABEL_OP3_307_17496_20140530_181532_outLine +BABEL_OP3_307_22321_20140417_205436_inLine +BABEL_OP3_307_22321_20140417_205436_outLine +BABEL_OP3_307_22612_20140624_171814_inLine +BABEL_OP3_307_22612_20140624_171814_outLine +BABEL_OP3_307_23006_20140506_191811_inLine +BABEL_OP3_307_23006_20140506_191811_outLine +BABEL_OP3_307_25767_20140403_234644_inLine +BABEL_OP3_307_25767_20140403_234644_outLine +BABEL_OP3_307_26602_20140702_235542_inLine +BABEL_OP3_307_26602_20140702_235542_outLine +BABEL_OP3_307_27125_20140414_222204_inLine +BABEL_OP3_307_27125_20140414_222204_outLine +BABEL_OP3_307_28190_20140703_190209_inLine +BABEL_OP3_307_28190_20140703_190209_outLine +BABEL_OP3_307_29076_20140605_214715_inLine +BABEL_OP3_307_29076_20140605_214715_outLine +BABEL_OP3_307_33251_20140603_185012_inLine +BABEL_OP3_307_33251_20140603_185012_outLine +BABEL_OP3_307_34197_20140401_235309_inLine +BABEL_OP3_307_34197_20140401_235309_outLine +BABEL_OP3_307_34336_20140405_010509_inLine +BABEL_OP3_307_34336_20140405_010509_outLine +BABEL_OP3_307_35583_20140706_224724_inLine +BABEL_OP3_307_35583_20140706_224724_outLine +BABEL_OP3_307_38076_20140531_001406_inLine +BABEL_OP3_307_38076_20140531_001406_outLine +BABEL_OP3_307_39059_20140717_183250_inLine +BABEL_OP3_307_39059_20140717_183250_outLine +BABEL_OP3_307_41097_20140531_181736_inLine +BABEL_OP3_307_41097_20140531_181736_outLine +BABEL_OP3_307_41685_20140825_205956_inLine +BABEL_OP3_307_41685_20140825_205956_outLine +BABEL_OP3_307_44446_20140827_003250_inLine +BABEL_OP3_307_44446_20140827_003250_outLine +BABEL_OP3_307_49502_20140415_220754_inLine +BABEL_OP3_307_49502_20140415_220754_outLine +BABEL_OP3_307_51611_20140423_232011_inLine +BABEL_OP3_307_51611_20140423_232011_outLine +BABEL_OP3_307_53842_20140513_184522_inLine +BABEL_OP3_307_53842_20140513_184522_outLine +BABEL_OP3_307_56198_20140501_005036_inLine +BABEL_OP3_307_56198_20140501_005036_outLine +BABEL_OP3_307_57678_20140405_000739_inLine +BABEL_OP3_307_57678_20140405_000739_outLine +BABEL_OP3_307_61971_20140811_182130_inLine +BABEL_OP3_307_61971_20140811_182130_outLine +BABEL_OP3_307_64350_20140403_011744_inLine +BABEL_OP3_307_64350_20140403_011744_outLine +BABEL_OP3_307_64768_20140404_233306_inLine +BABEL_OP3_307_64768_20140404_233306_outLine +BABEL_OP3_307_67552_20140611_194432_inLine +BABEL_OP3_307_67552_20140611_194432_outLine +BABEL_OP3_307_70986_20140825_003434_inLine +BABEL_OP3_307_70986_20140825_003434_outLine +BABEL_OP3_307_71263_20140602_180728_inLine +BABEL_OP3_307_71263_20140602_180728_outLine +BABEL_OP3_307_73446_20140809_165436_inLine +BABEL_OP3_307_73446_20140809_165436_outLine +BABEL_OP3_307_74799_20140602_191429_inLine +BABEL_OP3_307_74799_20140602_191429_outLine +BABEL_OP3_307_77139_20140416_004159_inLine +BABEL_OP3_307_77139_20140416_004159_outLine +BABEL_OP3_307_77803_20140402_001929_inLine +BABEL_OP3_307_77803_20140402_001929_outLine +BABEL_OP3_307_78161_20140828_164656_inLine +BABEL_OP3_307_78161_20140828_164656_outLine +BABEL_OP3_307_78194_20140411_164649_inLine +BABEL_OP3_307_78194_20140411_164649_outLine +BABEL_OP3_307_79167_20140606_224734_inLine +BABEL_OP3_307_79167_20140606_224734_outLine +BABEL_OP3_307_79429_20140826_212728_inLine +BABEL_OP3_307_79429_20140826_212728_outLine +BABEL_OP3_307_80069_20140821_213402_inLine +BABEL_OP3_307_80069_20140821_213402_outLine +BABEL_OP3_307_82140_20140513_191321_inLine +BABEL_OP3_307_82140_20140513_191321_outLine +BABEL_OP3_307_82863_20140511_183302_inLine +BABEL_OP3_307_82863_20140511_183302_outLine +BABEL_OP3_307_82904_20140730_002106_inLine +BABEL_OP3_307_82904_20140730_002106_outLine +BABEL_OP3_307_86472_20140609_222936_inLine +BABEL_OP3_307_86472_20140609_222936_outLine +BABEL_OP3_307_86888_20140530_190736_inLine +BABEL_OP3_307_86888_20140530_190736_outLine +BABEL_OP3_307_87074_20140429_185857_inLine +BABEL_OP3_307_87074_20140429_185857_outLine +BABEL_OP3_307_90417_20140822_223028_inLine +BABEL_OP3_307_90417_20140822_223028_outLine +BABEL_OP3_307_90417_20140822_224049_inLine +BABEL_OP3_307_90417_20140822_224049_outLine +BABEL_OP3_307_90709_20140421_235753_inLine +BABEL_OP3_307_90709_20140421_235753_outLine +BABEL_OP3_307_91189_20140821_210308_inLine +BABEL_OP3_307_91189_20140821_210308_outLine +BABEL_OP3_307_91581_20140623_234855_inLine +BABEL_OP3_307_91581_20140623_234855_outLine +BABEL_OP3_307_91884_20140723_193506_inLine +BABEL_OP3_307_91884_20140723_193506_outLine +BABEL_OP3_307_91888_20140813_180920_inLine +BABEL_OP3_307_91888_20140813_180920_outLine +BABEL_OP3_307_92757_20140809_200327_inLine +BABEL_OP3_307_92757_20140809_200327_outLine +BABEL_OP3_307_93469_20140813_214657_inLine +BABEL_OP3_307_93469_20140813_214657_outLine +BABEL_OP3_307_94465_20140622_180637_inLine +BABEL_OP3_307_94465_20140622_180637_outLine +BABEL_OP3_307_94891_20140830_193021_inLine +BABEL_OP3_307_94891_20140830_193021_outLine +BABEL_OP3_307_97588_20140415_223241_inLine +BABEL_OP3_307_97588_20140415_223241_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list new file mode 100644 index 00000000000..fce3045a1ed --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list @@ -0,0 +1,364 @@ +BABEL_OP3_307_10638_20140902_000559_inLine +BABEL_OP3_307_10638_20140902_000559_outLine +BABEL_OP3_307_10647_20140721_185220_inLine +BABEL_OP3_307_10647_20140721_185220_outLine +BABEL_OP3_307_10938_20140511_203436_inLine +BABEL_OP3_307_10938_20140511_203436_outLine +BABEL_OP3_307_11673_20140403_181549_inLine +BABEL_OP3_307_11673_20140403_181549_outLine +BABEL_OP3_307_11797_20140403_212832_inLine +BABEL_OP3_307_11797_20140403_212832_outLine +BABEL_OP3_307_12767_20140403_010841_inLine +BABEL_OP3_307_12767_20140403_010841_outLine +BABEL_OP3_307_13490_20140511_183719_inLine +BABEL_OP3_307_13490_20140511_183719_outLine +BABEL_OP3_307_13664_20140414_233828_inLine +BABEL_OP3_307_13664_20140414_233828_outLine +BABEL_OP3_307_13709_20140712_220945_inLine +BABEL_OP3_307_13709_20140712_220945_outLine +BABEL_OP3_307_13776_20140824_184628_inLine +BABEL_OP3_307_13776_20140824_184628_outLine +BABEL_OP3_307_14237_20140417_200235_inLine +BABEL_OP3_307_14237_20140417_200235_outLine +BABEL_OP3_307_14814_20140505_232452_inLine +BABEL_OP3_307_14814_20140505_232452_outLine +BABEL_OP3_307_15227_20140821_214005_inLine +BABEL_OP3_307_15227_20140821_214005_outLine +BABEL_OP3_307_15227_20140822_215614_inLine +BABEL_OP3_307_15227_20140822_215614_outLine +BABEL_OP3_307_15535_20140614_181940_inLine +BABEL_OP3_307_15535_20140614_181940_outLine +BABEL_OP3_307_15730_20140520_180833_inLine +BABEL_OP3_307_15730_20140520_180833_outLine +BABEL_OP3_307_16149_20140403_005747_inLine +BABEL_OP3_307_16149_20140403_005747_outLine +BABEL_OP3_307_17520_20140518_010259_inLine +BABEL_OP3_307_17520_20140518_010259_outLine +BABEL_OP3_307_18566_20140730_203138_inLine +BABEL_OP3_307_18566_20140730_203138_outLine +BABEL_OP3_307_18939_20140417_155733_inLine +BABEL_OP3_307_18939_20140417_155733_outLine +BABEL_OP3_307_18939_20140417_160632_inLine +BABEL_OP3_307_18939_20140417_160632_outLine +BABEL_OP3_307_19818_20140529_184253_inLine +BABEL_OP3_307_19818_20140529_184253_outLine +BABEL_OP3_307_20437_20140825_181004_inLine +BABEL_OP3_307_20437_20140825_181004_outLine +BABEL_OP3_307_20916_20140415_014115_inLine +BABEL_OP3_307_20916_20140415_014115_outLine +BABEL_OP3_307_20972_20140821_181210_inLine +BABEL_OP3_307_20972_20140821_181210_outLine +BABEL_OP3_307_21327_20140624_183416_inLine +BABEL_OP3_307_21327_20140624_183416_outLine +BABEL_OP3_307_21435_20140715_021926_inLine +BABEL_OP3_307_21435_20140715_021926_outLine +BABEL_OP3_307_23980_20140508_223043_inLine +BABEL_OP3_307_23980_20140508_223043_outLine +BABEL_OP3_307_24010_20140903_194143_inLine +BABEL_OP3_307_24010_20140903_194143_outLine +BABEL_OP3_307_24017_20140630_191336_inLine +BABEL_OP3_307_24017_20140630_191336_outLine +BABEL_OP3_307_24270_20140602_192257_inLine +BABEL_OP3_307_24270_20140602_192257_outLine +BABEL_OP3_307_24470_20140604_230747_inLine +BABEL_OP3_307_24470_20140604_230747_outLine +BABEL_OP3_307_26388_20140504_193621_inLine +BABEL_OP3_307_26388_20140504_193621_outLine +BABEL_OP3_307_29021_20140725_002551_inLine +BABEL_OP3_307_29021_20140725_002551_outLine +BABEL_OP3_307_29072_20140613_182323_inLine +BABEL_OP3_307_29072_20140613_182323_outLine +BABEL_OP3_307_29633_20140722_010644_inLine +BABEL_OP3_307_29633_20140722_010644_outLine +BABEL_OP3_307_30098_20140725_200446_inLine +BABEL_OP3_307_30098_20140725_200446_outLine +BABEL_OP3_307_31346_20140704_220402_inLine +BABEL_OP3_307_31346_20140704_220402_outLine +BABEL_OP3_307_32122_20140510_002050_inLine +BABEL_OP3_307_32122_20140510_002050_outLine +BABEL_OP3_307_32171_20140827_233808_inLine +BABEL_OP3_307_32171_20140827_233808_outLine +BABEL_OP3_307_32301_20140618_175857_inLine +BABEL_OP3_307_32301_20140618_175857_outLine +BABEL_OP3_307_32328_20140701_173938_inLine +BABEL_OP3_307_32328_20140701_173938_outLine +BABEL_OP3_307_32837_20140628_224152_inLine +BABEL_OP3_307_32837_20140628_224152_outLine +BABEL_OP3_307_33175_20140416_211640_inLine +BABEL_OP3_307_33175_20140416_211640_outLine +BABEL_OP3_307_33229_20140717_222336_inLine +BABEL_OP3_307_33229_20140717_222336_outLine +BABEL_OP3_307_33273_20140504_190501_inLine +BABEL_OP3_307_33273_20140504_190501_outLine +BABEL_OP3_307_34679_20140405_000658_inLine +BABEL_OP3_307_34679_20140405_000658_outLine +BABEL_OP3_307_34811_20140517_012722_inLine +BABEL_OP3_307_34811_20140517_012722_outLine +BABEL_OP3_307_35139_20140403_212641_inLine +BABEL_OP3_307_35139_20140403_212641_outLine +BABEL_OP3_307_35181_20140719_185816_inLine +BABEL_OP3_307_35181_20140719_185816_outLine +BABEL_OP3_307_36669_20140512_181519_inLine +BABEL_OP3_307_36669_20140512_181519_outLine +BABEL_OP3_307_37228_20140706_173354_inLine +BABEL_OP3_307_37228_20140706_173354_outLine +BABEL_OP3_307_38588_20140505_183744_inLine +BABEL_OP3_307_38588_20140505_183744_outLine +BABEL_OP3_307_38664_20140508_003821_inLine +BABEL_OP3_307_38664_20140508_003821_outLine +BABEL_OP3_307_41720_20140824_215221_inLine +BABEL_OP3_307_41720_20140824_215221_outLine +BABEL_OP3_307_43286_20140519_004615_inLine +BABEL_OP3_307_43286_20140519_004615_outLine +BABEL_OP3_307_43323_20140824_230200_inLine +BABEL_OP3_307_43323_20140824_230200_outLine +BABEL_OP3_307_43784_20140430_225016_inLine +BABEL_OP3_307_43784_20140430_225016_outLine +BABEL_OP3_307_43794_20140902_183511_inLine +BABEL_OP3_307_43794_20140902_183511_outLine +BABEL_OP3_307_43920_20140622_222232_inLine +BABEL_OP3_307_43920_20140622_222232_outLine +BABEL_OP3_307_44477_20140611_180941_inLine +BABEL_OP3_307_44477_20140611_180941_outLine +BABEL_OP3_307_45771_20140824_012354_inLine +BABEL_OP3_307_45771_20140824_012354_outLine +BABEL_OP3_307_46041_20140705_175737_inLine +BABEL_OP3_307_46041_20140705_175737_outLine +BABEL_OP3_307_46310_20140417_192000_inLine +BABEL_OP3_307_46310_20140417_192000_outLine +BABEL_OP3_307_46589_20140606_191357_inLine +BABEL_OP3_307_46589_20140606_191357_outLine +BABEL_OP3_307_46681_20140403_002233_inLine +BABEL_OP3_307_46681_20140403_002233_outLine +BABEL_OP3_307_46770_20140706_002306_inLine +BABEL_OP3_307_46770_20140706_002306_outLine +BABEL_OP3_307_46976_20140516_234604_inLine +BABEL_OP3_307_46976_20140516_234604_outLine +BABEL_OP3_307_47451_20140624_234108_inLine +BABEL_OP3_307_47451_20140624_234108_outLine +BABEL_OP3_307_48243_20140423_214726_inLine +BABEL_OP3_307_48243_20140423_214726_outLine +BABEL_OP3_307_49027_20140811_191512_inLine +BABEL_OP3_307_49027_20140811_191512_outLine +BABEL_OP3_307_49287_20140527_215142_inLine +BABEL_OP3_307_49287_20140527_215142_outLine +BABEL_OP3_307_49768_20140505_000629_inLine +BABEL_OP3_307_49768_20140505_000629_outLine +BABEL_OP3_307_49907_20140429_214231_inLine +BABEL_OP3_307_49907_20140429_214231_outLine +BABEL_OP3_307_50427_20140519_180652_inLine +BABEL_OP3_307_50427_20140519_180652_outLine +BABEL_OP3_307_50940_20140902_173543_inLine +BABEL_OP3_307_50940_20140902_173543_outLine +BABEL_OP3_307_51185_20140901_232033_inLine +BABEL_OP3_307_51185_20140901_232033_outLine +BABEL_OP3_307_51484_20140703_181343_inLine +BABEL_OP3_307_51484_20140703_181343_outLine +BABEL_OP3_307_51968_20140503_185322_inLine +BABEL_OP3_307_51968_20140503_185322_outLine +BABEL_OP3_307_51968_20140503_185916_inLine +BABEL_OP3_307_51968_20140503_185916_outLine +BABEL_OP3_307_52301_20140423_210352_inLine +BABEL_OP3_307_52301_20140423_210352_outLine +BABEL_OP3_307_52381_20140705_233901_inLine +BABEL_OP3_307_52381_20140705_233901_outLine +BABEL_OP3_307_52404_20140607_181619_inLine +BABEL_OP3_307_52404_20140607_181619_outLine +BABEL_OP3_307_52422_20140707_220639_inLine +BABEL_OP3_307_52422_20140707_220639_outLine +BABEL_OP3_307_54104_20140503_183514_inLine +BABEL_OP3_307_54104_20140503_183514_outLine +BABEL_OP3_307_54477_20140705_174757_inLine +BABEL_OP3_307_54477_20140705_174757_outLine +BABEL_OP3_307_54827_20140814_180107_inLine +BABEL_OP3_307_54827_20140814_180107_outLine +BABEL_OP3_307_54841_20140713_170956_inLine +BABEL_OP3_307_54841_20140713_170956_outLine +BABEL_OP3_307_55902_20140829_192235_inLine +BABEL_OP3_307_55902_20140829_192235_outLine +BABEL_OP3_307_56023_20140704_191158_inLine +BABEL_OP3_307_56023_20140704_191158_outLine +BABEL_OP3_307_57464_20140728_215432_inLine +BABEL_OP3_307_57464_20140728_215432_outLine +BABEL_OP3_307_58103_20140511_191956_inLine +BABEL_OP3_307_58103_20140511_191956_outLine +BABEL_OP3_307_58145_20140605_175238_inLine +BABEL_OP3_307_58145_20140605_175238_outLine +BABEL_OP3_307_58313_20140605_235938_inLine +BABEL_OP3_307_58313_20140605_235938_outLine +BABEL_OP3_307_58585_20140717_221803_inLine +BABEL_OP3_307_58585_20140717_221803_outLine +BABEL_OP3_307_58734_20140422_182501_inLine +BABEL_OP3_307_58734_20140422_182501_outLine +BABEL_OP3_307_59028_20140820_184151_inLine +BABEL_OP3_307_59028_20140820_184151_outLine +BABEL_OP3_307_59091_20140706_233018_inLine +BABEL_OP3_307_59091_20140706_233018_outLine +BABEL_OP3_307_59307_20140730_225719_inLine +BABEL_OP3_307_59307_20140730_225719_outLine +BABEL_OP3_307_59635_20140705_193327_inLine +BABEL_OP3_307_59635_20140705_193327_outLine +BABEL_OP3_307_60026_20140416_210913_inLine +BABEL_OP3_307_60026_20140416_210913_outLine +BABEL_OP3_307_60474_20140503_215918_inLine +BABEL_OP3_307_60474_20140503_215918_outLine +BABEL_OP3_307_61167_20140511_204037_inLine +BABEL_OP3_307_61167_20140511_204037_outLine +BABEL_OP3_307_61731_20140407_191634_inLine +BABEL_OP3_307_61731_20140407_191634_outLine +BABEL_OP3_307_62158_20140907_190726_inLine +BABEL_OP3_307_62158_20140907_190726_outLine +BABEL_OP3_307_64065_20140502_190738_inLine +BABEL_OP3_307_64065_20140502_190738_outLine +BABEL_OP3_307_65064_20140604_223702_inLine +BABEL_OP3_307_65064_20140604_223702_outLine +BABEL_OP3_307_65367_20140706_182846_inLine +BABEL_OP3_307_65367_20140706_182846_outLine +BABEL_OP3_307_66001_20140518_232707_inLine +BABEL_OP3_307_66001_20140518_232707_outLine +BABEL_OP3_307_66305_20140807_184053_inLine +BABEL_OP3_307_66305_20140807_184053_outLine +BABEL_OP3_307_66822_20140504_164117_inLine +BABEL_OP3_307_66822_20140504_164117_outLine +BABEL_OP3_307_67283_20140421_213932_inLine +BABEL_OP3_307_67283_20140421_213932_outLine +BABEL_OP3_307_67659_20140503_214825_inLine +BABEL_OP3_307_67659_20140503_214825_outLine +BABEL_OP3_307_68748_20140609_212915_inLine +BABEL_OP3_307_68748_20140609_212915_outLine +BABEL_OP3_307_69096_20140813_192001_inLine +BABEL_OP3_307_69096_20140813_192001_outLine +BABEL_OP3_307_69992_20140502_183707_inLine +BABEL_OP3_307_69992_20140502_183707_outLine +BABEL_OP3_307_70452_20140504_180340_inLine +BABEL_OP3_307_70452_20140504_180340_outLine +BABEL_OP3_307_71189_20140715_012540_inLine +BABEL_OP3_307_71189_20140715_012540_outLine +BABEL_OP3_307_71404_20140423_203052_inLine +BABEL_OP3_307_71404_20140423_203052_outLine +BABEL_OP3_307_72587_20140529_225152_inLine +BABEL_OP3_307_72587_20140529_225152_outLine +BABEL_OP3_307_72952_20140819_214300_inLine +BABEL_OP3_307_72952_20140819_214300_outLine +BABEL_OP3_307_73005_20140815_000302_inLine +BABEL_OP3_307_73005_20140815_000302_outLine +BABEL_OP3_307_73258_20140508_180508_inLine +BABEL_OP3_307_73258_20140508_180508_outLine +BABEL_OP3_307_73299_20140822_002656_inLine +BABEL_OP3_307_73299_20140822_002656_outLine +BABEL_OP3_307_73511_20140614_171020_inLine +BABEL_OP3_307_73511_20140614_171020_outLine +BABEL_OP3_307_74667_20140508_225904_inLine +BABEL_OP3_307_74667_20140508_225904_outLine +BABEL_OP3_307_75365_20140821_220730_inLine +BABEL_OP3_307_75365_20140821_220730_outLine +BABEL_OP3_307_75993_20140404_202655_inLine +BABEL_OP3_307_75993_20140404_202655_outLine +BABEL_OP3_307_76238_20140623_222754_inLine +BABEL_OP3_307_76238_20140623_222754_outLine +BABEL_OP3_307_76499_20140512_232123_inLine +BABEL_OP3_307_76499_20140512_232123_outLine +BABEL_OP3_307_76902_20140829_203049_inLine +BABEL_OP3_307_76902_20140829_203049_outLine +BABEL_OP3_307_77427_20140508_024629_inLine +BABEL_OP3_307_77427_20140508_024629_outLine +BABEL_OP3_307_77832_20140903_183557_inLine +BABEL_OP3_307_77832_20140903_183557_outLine +BABEL_OP3_307_78943_20140505_000428_inLine +BABEL_OP3_307_78943_20140505_000428_outLine +BABEL_OP3_307_79451_20140417_185927_inLine +BABEL_OP3_307_79451_20140417_185927_outLine +BABEL_OP3_307_79660_20140820_174118_inLine +BABEL_OP3_307_79660_20140820_174118_outLine +BABEL_OP3_307_80136_20140706_191530_inLine +BABEL_OP3_307_80136_20140706_191530_outLine +BABEL_OP3_307_80306_20140510_220902_inLine +BABEL_OP3_307_80306_20140510_220902_outLine +BABEL_OP3_307_81213_20140501_002133_inLine +BABEL_OP3_307_81213_20140501_002133_outLine +BABEL_OP3_307_81287_20140616_182444_inLine +BABEL_OP3_307_81287_20140616_182444_outLine +BABEL_OP3_307_81424_20140614_215540_inLine +BABEL_OP3_307_81424_20140614_215540_outLine +BABEL_OP3_307_81435_20140529_235732_inLine +BABEL_OP3_307_81435_20140529_235732_outLine +BABEL_OP3_307_81671_20140704_213446_inLine +BABEL_OP3_307_81671_20140704_213446_outLine +BABEL_OP3_307_82496_20140429_221502_inLine +BABEL_OP3_307_82496_20140429_221502_outLine +BABEL_OP3_307_82626_20140825_181202_inLine +BABEL_OP3_307_82626_20140825_181202_outLine +BABEL_OP3_307_82935_20140702_173347_inLine +BABEL_OP3_307_82935_20140702_173347_outLine +BABEL_OP3_307_86191_20140505_200151_inLine +BABEL_OP3_307_86191_20140505_200151_outLine +BABEL_OP3_307_86433_20140601_173214_inLine +BABEL_OP3_307_86433_20140601_173214_outLine +BABEL_OP3_307_86713_20140704_201850_inLine +BABEL_OP3_307_86713_20140704_201850_outLine +BABEL_OP3_307_86715_20140820_191201_inLine +BABEL_OP3_307_86715_20140820_191201_outLine +BABEL_OP3_307_86722_20140404_001449_inLine +BABEL_OP3_307_86722_20140404_001449_outLine +BABEL_OP3_307_88756_20140908_011014_inLine +BABEL_OP3_307_88756_20140908_011014_outLine +BABEL_OP3_307_88776_20140417_180154_inLine +BABEL_OP3_307_88776_20140417_180154_outLine +BABEL_OP3_307_88783_20140623_173406_inLine +BABEL_OP3_307_88783_20140623_173406_outLine +BABEL_OP3_307_89203_20140705_004511_inLine +BABEL_OP3_307_89203_20140705_004511_outLine +BABEL_OP3_307_89358_20140513_014405_inLine +BABEL_OP3_307_89358_20140513_014405_outLine +BABEL_OP3_307_89575_20140705_220326_inLine +BABEL_OP3_307_89575_20140705_220326_outLine +BABEL_OP3_307_89877_20140602_225948_inLine +BABEL_OP3_307_89877_20140602_225948_outLine +BABEL_OP3_307_90572_20140723_230358_inLine +BABEL_OP3_307_90572_20140723_230358_outLine +BABEL_OP3_307_90739_20140503_223700_inLine +BABEL_OP3_307_90739_20140503_223700_outLine +BABEL_OP3_307_91944_20140430_182005_inLine +BABEL_OP3_307_91944_20140430_182005_outLine +BABEL_OP3_307_92605_20140902_013736_inLine +BABEL_OP3_307_92605_20140902_013736_outLine +BABEL_OP3_307_92942_20140603_223928_inLine +BABEL_OP3_307_92942_20140603_223928_outLine +BABEL_OP3_307_93490_20140704_173442_inLine +BABEL_OP3_307_93490_20140704_173442_outLine +BABEL_OP3_307_93604_20140814_210305_inLine +BABEL_OP3_307_93604_20140814_210305_outLine +BABEL_OP3_307_93858_20140822_215929_inLine +BABEL_OP3_307_93858_20140822_215929_outLine +BABEL_OP3_307_94025_20140606_214625_inLine +BABEL_OP3_307_94025_20140606_214625_outLine +BABEL_OP3_307_94253_20140423_183534_inLine +BABEL_OP3_307_94253_20140423_183534_outLine +BABEL_OP3_307_94316_20140814_001643_inLine +BABEL_OP3_307_94316_20140814_001643_outLine +BABEL_OP3_307_94333_20140417_212859_inLine +BABEL_OP3_307_94333_20140417_212859_outLine +BABEL_OP3_307_94409_20140506_174815_inLine +BABEL_OP3_307_94409_20140506_174815_outLine +BABEL_OP3_307_94442_20140725_195152_inLine +BABEL_OP3_307_94442_20140725_195152_outLine +BABEL_OP3_307_94969_20140903_171944_inLine +BABEL_OP3_307_94969_20140903_171944_outLine +BABEL_OP3_307_95077_20140622_221523_inLine +BABEL_OP3_307_95077_20140622_221523_outLine +BABEL_OP3_307_95670_20140417_201744_inLine +BABEL_OP3_307_95670_20140417_201744_outLine +BABEL_OP3_307_96690_20140507_212636_inLine +BABEL_OP3_307_96690_20140507_212636_outLine +BABEL_OP3_307_96820_20140517_194553_inLine +BABEL_OP3_307_96820_20140517_194553_outLine +BABEL_OP3_307_96910_20140504_223516_inLine +BABEL_OP3_307_96910_20140504_223516_outLine +BABEL_OP3_307_98192_20140823_224529_inLine +BABEL_OP3_307_98192_20140823_224529_outLine +BABEL_OP3_307_98365_20140606_004323_inLine +BABEL_OP3_307_98365_20140606_004323_outLine +BABEL_OP3_307_99202_20140519_213506_inLine +BABEL_OP3_307_99202_20140519_213506_outLine +BABEL_OP3_307_99594_20140508_192558_inLine +BABEL_OP3_307_99594_20140508_192558_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/training.list b/egs/babel/s5d/conf/lists/307-amharic/training.list new file mode 100644 index 00000000000..e58883c0fc7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/training.list @@ -0,0 +1,486 @@ +BABEL_OP3_307_10638_20140902_000559_inLine +BABEL_OP3_307_10638_20140902_000559_outLine +BABEL_OP3_307_10647_20140721_185220_inLine +BABEL_OP3_307_10647_20140721_185220_outLine +BABEL_OP3_307_10938_20140511_203436_inLine +BABEL_OP3_307_10938_20140511_203436_outLine +BABEL_OP3_307_11673_20140403_181549_inLine +BABEL_OP3_307_11673_20140403_181549_outLine +BABEL_OP3_307_11797_20140403_212832_inLine +BABEL_OP3_307_11797_20140403_212832_outLine +BABEL_OP3_307_12767_20140403_010841_inLine +BABEL_OP3_307_12767_20140403_010841_outLine +BABEL_OP3_307_13490_20140511_183719_inLine +BABEL_OP3_307_13490_20140511_183719_outLine +BABEL_OP3_307_13664_20140414_233828_inLine +BABEL_OP3_307_13664_20140414_233828_outLine +BABEL_OP3_307_13709_20140712_220945_inLine +BABEL_OP3_307_13709_20140712_220945_outLine +BABEL_OP3_307_13776_20140824_184628_inLine +BABEL_OP3_307_13776_20140824_184628_outLine +BABEL_OP3_307_14229_20140503_233516_inLine +BABEL_OP3_307_14229_20140503_233516_outLine +BABEL_OP3_307_14237_20140417_200235_inLine +BABEL_OP3_307_14237_20140417_200235_outLine +BABEL_OP3_307_14725_20140421_212856_inLine +BABEL_OP3_307_14725_20140421_212856_outLine +BABEL_OP3_307_14814_20140505_232452_inLine +BABEL_OP3_307_14814_20140505_232452_outLine +BABEL_OP3_307_15216_20140628_231525_inLine +BABEL_OP3_307_15216_20140628_231525_outLine +BABEL_OP3_307_15227_20140821_214005_inLine +BABEL_OP3_307_15227_20140821_214005_outLine +BABEL_OP3_307_15227_20140822_215614_inLine +BABEL_OP3_307_15227_20140822_215614_outLine +BABEL_OP3_307_15535_20140614_181940_inLine +BABEL_OP3_307_15535_20140614_181940_outLine +BABEL_OP3_307_15730_20140520_180833_inLine +BABEL_OP3_307_15730_20140520_180833_outLine +BABEL_OP3_307_15902_20140422_235151_inLine +BABEL_OP3_307_15902_20140422_235151_outLine +BABEL_OP3_307_16149_20140403_005747_inLine +BABEL_OP3_307_16149_20140403_005747_outLine +BABEL_OP3_307_16475_20140511_014949_inLine +BABEL_OP3_307_16475_20140511_014949_outLine +BABEL_OP3_307_17496_20140530_181532_inLine +BABEL_OP3_307_17496_20140530_181532_outLine +BABEL_OP3_307_17520_20140518_010259_inLine +BABEL_OP3_307_17520_20140518_010259_outLine +BABEL_OP3_307_18566_20140730_203138_inLine +BABEL_OP3_307_18566_20140730_203138_outLine +BABEL_OP3_307_18939_20140417_155733_inLine +BABEL_OP3_307_18939_20140417_155733_outLine +BABEL_OP3_307_18939_20140417_160632_inLine +BABEL_OP3_307_18939_20140417_160632_outLine +BABEL_OP3_307_19818_20140529_184253_inLine +BABEL_OP3_307_19818_20140529_184253_outLine +BABEL_OP3_307_20437_20140825_181004_inLine +BABEL_OP3_307_20437_20140825_181004_outLine +BABEL_OP3_307_20916_20140415_014115_inLine +BABEL_OP3_307_20916_20140415_014115_outLine +BABEL_OP3_307_20972_20140821_181210_inLine +BABEL_OP3_307_20972_20140821_181210_outLine +BABEL_OP3_307_21327_20140624_183416_inLine +BABEL_OP3_307_21327_20140624_183416_outLine +BABEL_OP3_307_21435_20140715_021926_inLine +BABEL_OP3_307_21435_20140715_021926_outLine +BABEL_OP3_307_22321_20140417_205436_inLine +BABEL_OP3_307_22321_20140417_205436_outLine +BABEL_OP3_307_22612_20140624_171814_inLine +BABEL_OP3_307_22612_20140624_171814_outLine +BABEL_OP3_307_23006_20140506_191811_inLine +BABEL_OP3_307_23006_20140506_191811_outLine +BABEL_OP3_307_23980_20140508_223043_inLine +BABEL_OP3_307_23980_20140508_223043_outLine +BABEL_OP3_307_24010_20140903_194143_inLine +BABEL_OP3_307_24010_20140903_194143_outLine +BABEL_OP3_307_24017_20140630_191336_inLine +BABEL_OP3_307_24017_20140630_191336_outLine +BABEL_OP3_307_24270_20140602_192257_inLine +BABEL_OP3_307_24270_20140602_192257_outLine +BABEL_OP3_307_24470_20140604_230747_inLine +BABEL_OP3_307_24470_20140604_230747_outLine +BABEL_OP3_307_25767_20140403_234644_inLine +BABEL_OP3_307_25767_20140403_234644_outLine +BABEL_OP3_307_26388_20140504_193621_inLine +BABEL_OP3_307_26388_20140504_193621_outLine +BABEL_OP3_307_26602_20140702_235542_inLine +BABEL_OP3_307_26602_20140702_235542_outLine +BABEL_OP3_307_27125_20140414_222204_inLine +BABEL_OP3_307_27125_20140414_222204_outLine +BABEL_OP3_307_28190_20140703_190209_inLine +BABEL_OP3_307_28190_20140703_190209_outLine +BABEL_OP3_307_29021_20140725_002551_inLine +BABEL_OP3_307_29021_20140725_002551_outLine +BABEL_OP3_307_29072_20140613_182323_inLine +BABEL_OP3_307_29072_20140613_182323_outLine +BABEL_OP3_307_29076_20140605_214715_inLine +BABEL_OP3_307_29076_20140605_214715_outLine +BABEL_OP3_307_29633_20140722_010644_inLine +BABEL_OP3_307_29633_20140722_010644_outLine +BABEL_OP3_307_30098_20140725_200446_inLine +BABEL_OP3_307_30098_20140725_200446_outLine +BABEL_OP3_307_31346_20140704_220402_inLine +BABEL_OP3_307_31346_20140704_220402_outLine +BABEL_OP3_307_32122_20140510_002050_inLine +BABEL_OP3_307_32122_20140510_002050_outLine +BABEL_OP3_307_32171_20140827_233808_inLine +BABEL_OP3_307_32171_20140827_233808_outLine +BABEL_OP3_307_32301_20140618_175857_inLine +BABEL_OP3_307_32301_20140618_175857_outLine +BABEL_OP3_307_32328_20140701_173938_inLine +BABEL_OP3_307_32328_20140701_173938_outLine +BABEL_OP3_307_32837_20140628_224152_inLine +BABEL_OP3_307_32837_20140628_224152_outLine +BABEL_OP3_307_33175_20140416_211640_inLine +BABEL_OP3_307_33175_20140416_211640_outLine +BABEL_OP3_307_33229_20140717_222336_inLine +BABEL_OP3_307_33229_20140717_222336_outLine +BABEL_OP3_307_33251_20140603_185012_inLine +BABEL_OP3_307_33251_20140603_185012_outLine +BABEL_OP3_307_33273_20140504_190501_inLine +BABEL_OP3_307_33273_20140504_190501_outLine +BABEL_OP3_307_34197_20140401_235309_inLine +BABEL_OP3_307_34197_20140401_235309_outLine +BABEL_OP3_307_34336_20140405_010509_inLine +BABEL_OP3_307_34336_20140405_010509_outLine +BABEL_OP3_307_34679_20140405_000658_inLine +BABEL_OP3_307_34679_20140405_000658_outLine +BABEL_OP3_307_34811_20140517_012722_inLine +BABEL_OP3_307_34811_20140517_012722_outLine +BABEL_OP3_307_35139_20140403_212641_inLine +BABEL_OP3_307_35139_20140403_212641_outLine +BABEL_OP3_307_35181_20140719_185816_inLine +BABEL_OP3_307_35181_20140719_185816_outLine +BABEL_OP3_307_35583_20140706_224724_inLine +BABEL_OP3_307_35583_20140706_224724_outLine +BABEL_OP3_307_36669_20140512_181519_inLine +BABEL_OP3_307_36669_20140512_181519_outLine +BABEL_OP3_307_37228_20140706_173354_inLine +BABEL_OP3_307_37228_20140706_173354_outLine +BABEL_OP3_307_38076_20140531_001406_inLine +BABEL_OP3_307_38076_20140531_001406_outLine +BABEL_OP3_307_38588_20140505_183744_inLine +BABEL_OP3_307_38588_20140505_183744_outLine +BABEL_OP3_307_38664_20140508_003821_inLine +BABEL_OP3_307_38664_20140508_003821_outLine +BABEL_OP3_307_39059_20140717_183250_inLine +BABEL_OP3_307_39059_20140717_183250_outLine +BABEL_OP3_307_41097_20140531_181736_inLine +BABEL_OP3_307_41097_20140531_181736_outLine +BABEL_OP3_307_41685_20140825_205956_inLine +BABEL_OP3_307_41685_20140825_205956_outLine +BABEL_OP3_307_41720_20140824_215221_inLine +BABEL_OP3_307_41720_20140824_215221_outLine +BABEL_OP3_307_43286_20140519_004615_inLine +BABEL_OP3_307_43286_20140519_004615_outLine +BABEL_OP3_307_43323_20140824_230200_inLine +BABEL_OP3_307_43323_20140824_230200_outLine +BABEL_OP3_307_43784_20140430_225016_inLine +BABEL_OP3_307_43784_20140430_225016_outLine +BABEL_OP3_307_43794_20140902_183511_inLine +BABEL_OP3_307_43794_20140902_183511_outLine +BABEL_OP3_307_43920_20140622_222232_inLine +BABEL_OP3_307_43920_20140622_222232_outLine +BABEL_OP3_307_44446_20140827_003250_inLine +BABEL_OP3_307_44446_20140827_003250_outLine +BABEL_OP3_307_44477_20140611_180941_inLine +BABEL_OP3_307_44477_20140611_180941_outLine +BABEL_OP3_307_45771_20140824_012354_inLine +BABEL_OP3_307_45771_20140824_012354_outLine +BABEL_OP3_307_46041_20140705_175737_inLine +BABEL_OP3_307_46041_20140705_175737_outLine +BABEL_OP3_307_46310_20140417_192000_inLine +BABEL_OP3_307_46310_20140417_192000_outLine +BABEL_OP3_307_46589_20140606_191357_inLine +BABEL_OP3_307_46589_20140606_191357_outLine +BABEL_OP3_307_46681_20140403_002233_inLine +BABEL_OP3_307_46681_20140403_002233_outLine +BABEL_OP3_307_46770_20140706_002306_inLine +BABEL_OP3_307_46770_20140706_002306_outLine +BABEL_OP3_307_46976_20140516_234604_inLine +BABEL_OP3_307_46976_20140516_234604_outLine +BABEL_OP3_307_47451_20140624_234108_inLine +BABEL_OP3_307_47451_20140624_234108_outLine +BABEL_OP3_307_48243_20140423_214726_inLine +BABEL_OP3_307_48243_20140423_214726_outLine +BABEL_OP3_307_49027_20140811_191512_inLine +BABEL_OP3_307_49027_20140811_191512_outLine +BABEL_OP3_307_49287_20140527_215142_inLine +BABEL_OP3_307_49287_20140527_215142_outLine +BABEL_OP3_307_49502_20140415_220754_inLine +BABEL_OP3_307_49502_20140415_220754_outLine +BABEL_OP3_307_49768_20140505_000629_inLine +BABEL_OP3_307_49768_20140505_000629_outLine +BABEL_OP3_307_49907_20140429_214231_inLine +BABEL_OP3_307_49907_20140429_214231_outLine +BABEL_OP3_307_50427_20140519_180652_inLine +BABEL_OP3_307_50427_20140519_180652_outLine +BABEL_OP3_307_50940_20140902_173543_inLine +BABEL_OP3_307_50940_20140902_173543_outLine +BABEL_OP3_307_51185_20140901_232033_inLine +BABEL_OP3_307_51185_20140901_232033_outLine +BABEL_OP3_307_51484_20140703_181343_inLine +BABEL_OP3_307_51484_20140703_181343_outLine +BABEL_OP3_307_51611_20140423_232011_inLine +BABEL_OP3_307_51611_20140423_232011_outLine +BABEL_OP3_307_51968_20140503_185322_inLine +BABEL_OP3_307_51968_20140503_185322_outLine +BABEL_OP3_307_51968_20140503_185916_inLine +BABEL_OP3_307_51968_20140503_185916_outLine +BABEL_OP3_307_52301_20140423_210352_inLine +BABEL_OP3_307_52301_20140423_210352_outLine +BABEL_OP3_307_52381_20140705_233901_inLine +BABEL_OP3_307_52381_20140705_233901_outLine +BABEL_OP3_307_52404_20140607_181619_inLine +BABEL_OP3_307_52404_20140607_181619_outLine +BABEL_OP3_307_52422_20140707_220639_inLine +BABEL_OP3_307_52422_20140707_220639_outLine +BABEL_OP3_307_53842_20140513_184522_inLine +BABEL_OP3_307_53842_20140513_184522_outLine +BABEL_OP3_307_54104_20140503_183514_inLine +BABEL_OP3_307_54104_20140503_183514_outLine +BABEL_OP3_307_54477_20140705_174757_inLine +BABEL_OP3_307_54477_20140705_174757_outLine +BABEL_OP3_307_54827_20140814_180107_inLine +BABEL_OP3_307_54827_20140814_180107_outLine +BABEL_OP3_307_54841_20140713_170956_inLine +BABEL_OP3_307_54841_20140713_170956_outLine +BABEL_OP3_307_55902_20140829_192235_inLine +BABEL_OP3_307_55902_20140829_192235_outLine +BABEL_OP3_307_56023_20140704_191158_inLine +BABEL_OP3_307_56023_20140704_191158_outLine +BABEL_OP3_307_56198_20140501_005036_inLine +BABEL_OP3_307_56198_20140501_005036_outLine +BABEL_OP3_307_57464_20140728_215432_inLine +BABEL_OP3_307_57464_20140728_215432_outLine +BABEL_OP3_307_57678_20140405_000739_inLine +BABEL_OP3_307_57678_20140405_000739_outLine +BABEL_OP3_307_58103_20140511_191956_inLine +BABEL_OP3_307_58103_20140511_191956_outLine +BABEL_OP3_307_58145_20140605_175238_inLine +BABEL_OP3_307_58145_20140605_175238_outLine +BABEL_OP3_307_58313_20140605_235938_inLine +BABEL_OP3_307_58313_20140605_235938_outLine +BABEL_OP3_307_58585_20140717_221803_inLine +BABEL_OP3_307_58585_20140717_221803_outLine +BABEL_OP3_307_58734_20140422_182501_inLine +BABEL_OP3_307_58734_20140422_182501_outLine +BABEL_OP3_307_59028_20140820_184151_inLine +BABEL_OP3_307_59028_20140820_184151_outLine +BABEL_OP3_307_59091_20140706_233018_inLine +BABEL_OP3_307_59091_20140706_233018_outLine +BABEL_OP3_307_59307_20140730_225719_inLine +BABEL_OP3_307_59307_20140730_225719_outLine +BABEL_OP3_307_59635_20140705_193327_inLine +BABEL_OP3_307_59635_20140705_193327_outLine +BABEL_OP3_307_60026_20140416_210913_inLine +BABEL_OP3_307_60026_20140416_210913_outLine +BABEL_OP3_307_60474_20140503_215918_inLine +BABEL_OP3_307_60474_20140503_215918_outLine +BABEL_OP3_307_61167_20140511_204037_inLine +BABEL_OP3_307_61167_20140511_204037_outLine +BABEL_OP3_307_61731_20140407_191634_inLine +BABEL_OP3_307_61731_20140407_191634_outLine +BABEL_OP3_307_61971_20140811_182130_inLine +BABEL_OP3_307_61971_20140811_182130_outLine +BABEL_OP3_307_62158_20140907_190726_inLine +BABEL_OP3_307_62158_20140907_190726_outLine +BABEL_OP3_307_64065_20140502_190738_inLine +BABEL_OP3_307_64065_20140502_190738_outLine +BABEL_OP3_307_64350_20140403_011744_inLine +BABEL_OP3_307_64350_20140403_011744_outLine +BABEL_OP3_307_64768_20140404_233306_inLine +BABEL_OP3_307_64768_20140404_233306_outLine +BABEL_OP3_307_65064_20140604_223702_inLine +BABEL_OP3_307_65064_20140604_223702_outLine +BABEL_OP3_307_65367_20140706_182846_inLine +BABEL_OP3_307_65367_20140706_182846_outLine +BABEL_OP3_307_66001_20140518_232707_inLine +BABEL_OP3_307_66001_20140518_232707_outLine +BABEL_OP3_307_66305_20140807_184053_inLine +BABEL_OP3_307_66305_20140807_184053_outLine +BABEL_OP3_307_66822_20140504_164117_inLine +BABEL_OP3_307_66822_20140504_164117_outLine +BABEL_OP3_307_67283_20140421_213932_inLine +BABEL_OP3_307_67283_20140421_213932_outLine +BABEL_OP3_307_67552_20140611_194432_inLine +BABEL_OP3_307_67552_20140611_194432_outLine +BABEL_OP3_307_67659_20140503_214825_inLine +BABEL_OP3_307_67659_20140503_214825_outLine +BABEL_OP3_307_68748_20140609_212915_inLine +BABEL_OP3_307_68748_20140609_212915_outLine +BABEL_OP3_307_69096_20140813_192001_inLine +BABEL_OP3_307_69096_20140813_192001_outLine +BABEL_OP3_307_69992_20140502_183707_inLine +BABEL_OP3_307_69992_20140502_183707_outLine +BABEL_OP3_307_70452_20140504_180340_inLine +BABEL_OP3_307_70452_20140504_180340_outLine +BABEL_OP3_307_70986_20140825_003434_inLine +BABEL_OP3_307_70986_20140825_003434_outLine +BABEL_OP3_307_71189_20140715_012540_inLine +BABEL_OP3_307_71189_20140715_012540_outLine +BABEL_OP3_307_71263_20140602_180728_inLine +BABEL_OP3_307_71263_20140602_180728_outLine +BABEL_OP3_307_71404_20140423_203052_inLine +BABEL_OP3_307_71404_20140423_203052_outLine +BABEL_OP3_307_72587_20140529_225152_inLine +BABEL_OP3_307_72587_20140529_225152_outLine +BABEL_OP3_307_72952_20140819_214300_inLine +BABEL_OP3_307_72952_20140819_214300_outLine +BABEL_OP3_307_73005_20140815_000302_inLine +BABEL_OP3_307_73005_20140815_000302_outLine +BABEL_OP3_307_73258_20140508_180508_inLine +BABEL_OP3_307_73258_20140508_180508_outLine +BABEL_OP3_307_73299_20140822_002656_inLine +BABEL_OP3_307_73299_20140822_002656_outLine +BABEL_OP3_307_73446_20140809_165436_inLine +BABEL_OP3_307_73446_20140809_165436_outLine +BABEL_OP3_307_73511_20140614_171020_inLine +BABEL_OP3_307_73511_20140614_171020_outLine +BABEL_OP3_307_74667_20140508_225904_inLine +BABEL_OP3_307_74667_20140508_225904_outLine +BABEL_OP3_307_74799_20140602_191429_inLine +BABEL_OP3_307_74799_20140602_191429_outLine +BABEL_OP3_307_75365_20140821_220730_inLine +BABEL_OP3_307_75365_20140821_220730_outLine +BABEL_OP3_307_75993_20140404_202655_inLine +BABEL_OP3_307_75993_20140404_202655_outLine +BABEL_OP3_307_76238_20140623_222754_inLine +BABEL_OP3_307_76238_20140623_222754_outLine +BABEL_OP3_307_76499_20140512_232123_inLine +BABEL_OP3_307_76499_20140512_232123_outLine +BABEL_OP3_307_76902_20140829_203049_inLine +BABEL_OP3_307_76902_20140829_203049_outLine +BABEL_OP3_307_77139_20140416_004159_inLine +BABEL_OP3_307_77139_20140416_004159_outLine +BABEL_OP3_307_77427_20140508_024629_inLine +BABEL_OP3_307_77427_20140508_024629_outLine +BABEL_OP3_307_77803_20140402_001929_inLine +BABEL_OP3_307_77803_20140402_001929_outLine +BABEL_OP3_307_77832_20140903_183557_inLine +BABEL_OP3_307_77832_20140903_183557_outLine +BABEL_OP3_307_78161_20140828_164656_inLine +BABEL_OP3_307_78161_20140828_164656_outLine +BABEL_OP3_307_78194_20140411_164649_inLine +BABEL_OP3_307_78194_20140411_164649_outLine +BABEL_OP3_307_78943_20140505_000428_inLine +BABEL_OP3_307_78943_20140505_000428_outLine +BABEL_OP3_307_79167_20140606_224734_inLine +BABEL_OP3_307_79167_20140606_224734_outLine +BABEL_OP3_307_79429_20140826_212728_inLine +BABEL_OP3_307_79429_20140826_212728_outLine +BABEL_OP3_307_79451_20140417_185927_inLine +BABEL_OP3_307_79451_20140417_185927_outLine +BABEL_OP3_307_79660_20140820_174118_inLine +BABEL_OP3_307_79660_20140820_174118_outLine +BABEL_OP3_307_80069_20140821_213402_inLine +BABEL_OP3_307_80069_20140821_213402_outLine +BABEL_OP3_307_80136_20140706_191530_inLine +BABEL_OP3_307_80136_20140706_191530_outLine +BABEL_OP3_307_80306_20140510_220902_inLine +BABEL_OP3_307_80306_20140510_220902_outLine +BABEL_OP3_307_81213_20140501_002133_inLine +BABEL_OP3_307_81213_20140501_002133_outLine +BABEL_OP3_307_81287_20140616_182444_inLine +BABEL_OP3_307_81287_20140616_182444_outLine +BABEL_OP3_307_81424_20140614_215540_inLine +BABEL_OP3_307_81424_20140614_215540_outLine +BABEL_OP3_307_81435_20140529_235732_inLine +BABEL_OP3_307_81435_20140529_235732_outLine +BABEL_OP3_307_81671_20140704_213446_inLine +BABEL_OP3_307_81671_20140704_213446_outLine +BABEL_OP3_307_82140_20140513_191321_inLine +BABEL_OP3_307_82140_20140513_191321_outLine +BABEL_OP3_307_82496_20140429_221502_inLine +BABEL_OP3_307_82496_20140429_221502_outLine +BABEL_OP3_307_82626_20140825_181202_inLine +BABEL_OP3_307_82626_20140825_181202_outLine +BABEL_OP3_307_82863_20140511_183302_inLine +BABEL_OP3_307_82863_20140511_183302_outLine +BABEL_OP3_307_82904_20140730_002106_inLine +BABEL_OP3_307_82904_20140730_002106_outLine +BABEL_OP3_307_82935_20140702_173347_inLine +BABEL_OP3_307_82935_20140702_173347_outLine +BABEL_OP3_307_86191_20140505_200151_inLine +BABEL_OP3_307_86191_20140505_200151_outLine +BABEL_OP3_307_86433_20140601_173214_inLine +BABEL_OP3_307_86433_20140601_173214_outLine +BABEL_OP3_307_86472_20140609_222936_inLine +BABEL_OP3_307_86472_20140609_222936_outLine +BABEL_OP3_307_86713_20140704_201850_inLine +BABEL_OP3_307_86713_20140704_201850_outLine +BABEL_OP3_307_86715_20140820_191201_inLine +BABEL_OP3_307_86715_20140820_191201_outLine +BABEL_OP3_307_86722_20140404_001449_inLine +BABEL_OP3_307_86722_20140404_001449_outLine +BABEL_OP3_307_86888_20140530_190736_inLine +BABEL_OP3_307_86888_20140530_190736_outLine +BABEL_OP3_307_87074_20140429_185857_inLine +BABEL_OP3_307_87074_20140429_185857_outLine +BABEL_OP3_307_88756_20140908_011014_inLine +BABEL_OP3_307_88756_20140908_011014_outLine +BABEL_OP3_307_88776_20140417_180154_inLine +BABEL_OP3_307_88776_20140417_180154_outLine +BABEL_OP3_307_88783_20140623_173406_inLine +BABEL_OP3_307_88783_20140623_173406_outLine +BABEL_OP3_307_89203_20140705_004511_inLine +BABEL_OP3_307_89203_20140705_004511_outLine +BABEL_OP3_307_89358_20140513_014405_inLine +BABEL_OP3_307_89358_20140513_014405_outLine +BABEL_OP3_307_89575_20140705_220326_inLine +BABEL_OP3_307_89575_20140705_220326_outLine +BABEL_OP3_307_89877_20140602_225948_inLine +BABEL_OP3_307_89877_20140602_225948_outLine +BABEL_OP3_307_90417_20140822_223028_inLine +BABEL_OP3_307_90417_20140822_223028_outLine +BABEL_OP3_307_90417_20140822_224049_inLine +BABEL_OP3_307_90417_20140822_224049_outLine +BABEL_OP3_307_90572_20140723_230358_inLine +BABEL_OP3_307_90572_20140723_230358_outLine +BABEL_OP3_307_90709_20140421_235753_inLine +BABEL_OP3_307_90709_20140421_235753_outLine +BABEL_OP3_307_90739_20140503_223700_inLine +BABEL_OP3_307_90739_20140503_223700_outLine +BABEL_OP3_307_91189_20140821_210308_inLine +BABEL_OP3_307_91189_20140821_210308_outLine +BABEL_OP3_307_91581_20140623_234855_inLine +BABEL_OP3_307_91581_20140623_234855_outLine +BABEL_OP3_307_91884_20140723_193506_inLine +BABEL_OP3_307_91884_20140723_193506_outLine +BABEL_OP3_307_91888_20140813_180920_inLine +BABEL_OP3_307_91888_20140813_180920_outLine +BABEL_OP3_307_91944_20140430_182005_inLine +BABEL_OP3_307_91944_20140430_182005_outLine +BABEL_OP3_307_92605_20140902_013736_inLine +BABEL_OP3_307_92605_20140902_013736_outLine +BABEL_OP3_307_92757_20140809_200327_inLine +BABEL_OP3_307_92757_20140809_200327_outLine +BABEL_OP3_307_92942_20140603_223928_inLine +BABEL_OP3_307_92942_20140603_223928_outLine +BABEL_OP3_307_93469_20140813_214657_inLine +BABEL_OP3_307_93469_20140813_214657_outLine +BABEL_OP3_307_93490_20140704_173442_inLine +BABEL_OP3_307_93490_20140704_173442_outLine +BABEL_OP3_307_93604_20140814_210305_inLine +BABEL_OP3_307_93604_20140814_210305_outLine +BABEL_OP3_307_93858_20140822_215929_inLine +BABEL_OP3_307_93858_20140822_215929_outLine +BABEL_OP3_307_94025_20140606_214625_inLine +BABEL_OP3_307_94025_20140606_214625_outLine +BABEL_OP3_307_94253_20140423_183534_inLine +BABEL_OP3_307_94253_20140423_183534_outLine +BABEL_OP3_307_94316_20140814_001643_inLine +BABEL_OP3_307_94316_20140814_001643_outLine +BABEL_OP3_307_94333_20140417_212859_inLine +BABEL_OP3_307_94333_20140417_212859_outLine +BABEL_OP3_307_94409_20140506_174815_inLine +BABEL_OP3_307_94409_20140506_174815_outLine +BABEL_OP3_307_94442_20140725_195152_inLine +BABEL_OP3_307_94442_20140725_195152_outLine +BABEL_OP3_307_94465_20140622_180637_inLine +BABEL_OP3_307_94465_20140622_180637_outLine +BABEL_OP3_307_94891_20140830_193021_inLine +BABEL_OP3_307_94891_20140830_193021_outLine +BABEL_OP3_307_94969_20140903_171944_inLine +BABEL_OP3_307_94969_20140903_171944_outLine +BABEL_OP3_307_95077_20140622_221523_inLine +BABEL_OP3_307_95077_20140622_221523_outLine +BABEL_OP3_307_95670_20140417_201744_inLine +BABEL_OP3_307_95670_20140417_201744_outLine +BABEL_OP3_307_96690_20140507_212636_inLine +BABEL_OP3_307_96690_20140507_212636_outLine +BABEL_OP3_307_96820_20140517_194553_inLine +BABEL_OP3_307_96820_20140517_194553_outLine +BABEL_OP3_307_96910_20140504_223516_inLine +BABEL_OP3_307_96910_20140504_223516_outLine +BABEL_OP3_307_97588_20140415_223241_inLine +BABEL_OP3_307_97588_20140415_223241_outLine +BABEL_OP3_307_98192_20140823_224529_inLine +BABEL_OP3_307_98192_20140823_224529_outLine +BABEL_OP3_307_98365_20140606_004323_inLine +BABEL_OP3_307_98365_20140606_004323_outLine +BABEL_OP3_307_99202_20140519_213506_inLine +BABEL_OP3_307_99202_20140519_213506_outLine +BABEL_OP3_307_99594_20140508_192558_inLine +BABEL_OP3_307_99594_20140508_192558_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list b/egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list new file mode 100644 index 00000000000..2015539e910 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list @@ -0,0 +1,568 @@ +BABEL_OP3_307_10019_20140510_215248_inLine +BABEL_OP3_307_10019_20140510_215248_outLine +BABEL_OP3_307_10019_20140510_220549_inLine +BABEL_OP3_307_10019_20140510_220549_outLine +BABEL_OP3_307_10188_20140414_190900_inLine +BABEL_OP3_307_10188_20140414_190900_outLine +BABEL_OP3_307_10974_20140518_232844_inLine +BABEL_OP3_307_10974_20140518_232844_outLine +BABEL_OP3_307_13586_20140517_192301_inLine +BABEL_OP3_307_13586_20140517_192301_outLine +BABEL_OP3_307_14137_20140504_224411_inLine +BABEL_OP3_307_14137_20140504_224411_outLine +BABEL_OP3_307_14141_20140729_225447_inLine +BABEL_OP3_307_14141_20140729_225447_outLine +BABEL_OP3_307_14158_20140609_183923_inLine +BABEL_OP3_307_14158_20140609_183923_outLine +BABEL_OP3_307_14719_20140630_214352_inLine +BABEL_OP3_307_14719_20140630_214352_outLine +BABEL_OP3_307_14719_20140630_215754_inLine +BABEL_OP3_307_14719_20140630_215754_outLine +BABEL_OP3_307_14807_20140603_161507_inLine +BABEL_OP3_307_14807_20140603_161507_outLine +BABEL_OP3_307_14807_20140603_163538_inLine +BABEL_OP3_307_14807_20140603_163538_outLine +BABEL_OP3_307_15163_20140505_213531_inLine +BABEL_OP3_307_15163_20140505_213531_outLine +BABEL_OP3_307_15466_20140829_012731_inLine +BABEL_OP3_307_15466_20140829_012731_outLine +BABEL_OP3_307_15638_20140613_232945_inLine +BABEL_OP3_307_15638_20140613_232945_outLine +BABEL_OP3_307_16886_20140507_225852_inLine +BABEL_OP3_307_16886_20140507_225852_outLine +BABEL_OP3_307_16938_20140518_195229_inLine +BABEL_OP3_307_16938_20140518_195229_outLine +BABEL_OP3_307_17113_20140822_183518_inLine +BABEL_OP3_307_17113_20140822_183518_outLine +BABEL_OP3_307_17127_20140710_180949_inLine +BABEL_OP3_307_17127_20140710_180949_outLine +BABEL_OP3_307_17165_20140510_233034_inLine +BABEL_OP3_307_17165_20140510_233034_outLine +BABEL_OP3_307_17165_20140520_203751_inLine +BABEL_OP3_307_17165_20140520_203751_outLine +BABEL_OP3_307_17567_20140518_203832_inLine +BABEL_OP3_307_17567_20140518_203832_outLine +BABEL_OP3_307_17582_20140822_232433_inLine +BABEL_OP3_307_17582_20140822_232433_outLine +BABEL_OP3_307_17890_20140617_183508_inLine +BABEL_OP3_307_17890_20140617_183508_outLine +BABEL_OP3_307_18863_20140629_183439_inLine +BABEL_OP3_307_18863_20140629_183439_outLine +BABEL_OP3_307_19120_20140730_221602_inLine +BABEL_OP3_307_19120_20140730_221602_outLine +BABEL_OP3_307_19703_20140504_214945_inLine +BABEL_OP3_307_19703_20140504_214945_outLine +BABEL_OP3_307_19749_20140718_000521_inLine +BABEL_OP3_307_19749_20140718_000521_outLine +BABEL_OP3_307_19767_20140811_181547_inLine +BABEL_OP3_307_19767_20140811_181547_outLine +BABEL_OP3_307_20330_20140716_175203_inLine +BABEL_OP3_307_20330_20140716_175203_outLine +BABEL_OP3_307_21109_20140703_171502_inLine +BABEL_OP3_307_21109_20140703_171502_outLine +BABEL_OP3_307_21109_20140703_180309_inLine +BABEL_OP3_307_21109_20140703_180309_outLine +BABEL_OP3_307_21159_20140824_225236_inLine +BABEL_OP3_307_21159_20140824_225236_outLine +BABEL_OP3_307_21159_20140901_165658_inLine +BABEL_OP3_307_21159_20140901_165658_outLine +BABEL_OP3_307_21393_20140814_190327_inLine +BABEL_OP3_307_21393_20140814_190327_outLine +BABEL_OP3_307_23395_20140605_170532_inLine +BABEL_OP3_307_23395_20140605_170532_outLine +BABEL_OP3_307_23395_20140605_171255_inLine +BABEL_OP3_307_23395_20140605_171255_outLine +BABEL_OP3_307_23681_20140823_184904_inLine +BABEL_OP3_307_23681_20140823_184904_outLine +BABEL_OP3_307_23681_20140823_190005_inLine +BABEL_OP3_307_23681_20140823_190005_outLine +BABEL_OP3_307_24290_20140717_001151_inLine +BABEL_OP3_307_24290_20140717_001151_outLine +BABEL_OP3_307_24323_20140508_012148_inLine +BABEL_OP3_307_24323_20140508_012148_outLine +BABEL_OP3_307_24323_20140508_020931_inLine +BABEL_OP3_307_24323_20140508_020931_outLine +BABEL_OP3_307_24323_20140508_022325_inLine +BABEL_OP3_307_24323_20140508_022325_outLine +BABEL_OP3_307_24605_20140403_000212_inLine +BABEL_OP3_307_24605_20140403_000212_outLine +BABEL_OP3_307_24779_20140905_004858_inLine +BABEL_OP3_307_24779_20140905_004858_outLine +BABEL_OP3_307_25085_20140822_212709_inLine +BABEL_OP3_307_25085_20140822_212709_outLine +BABEL_OP3_307_25220_20140905_000706_inLine +BABEL_OP3_307_25220_20140905_000706_outLine +BABEL_OP3_307_25412_20140604_234923_inLine +BABEL_OP3_307_25412_20140604_234923_outLine +BABEL_OP3_307_25412_20140605_000418_inLine +BABEL_OP3_307_25412_20140605_000418_outLine +BABEL_OP3_307_25961_20140403_011918_inLine +BABEL_OP3_307_25961_20140403_011918_outLine +BABEL_OP3_307_26074_20140604_165342_inLine +BABEL_OP3_307_26074_20140604_165342_outLine +BABEL_OP3_307_26478_20140824_185710_inLine +BABEL_OP3_307_26478_20140824_185710_outLine +BABEL_OP3_307_26999_20140530_190830_inLine +BABEL_OP3_307_26999_20140530_190830_outLine +BABEL_OP3_307_27042_20140630_180037_inLine +BABEL_OP3_307_27042_20140630_180037_outLine +BABEL_OP3_307_27218_20140518_235212_inLine +BABEL_OP3_307_27218_20140518_235212_outLine +BABEL_OP3_307_27478_20140808_231848_inLine +BABEL_OP3_307_27478_20140808_231848_outLine +BABEL_OP3_307_27590_20140618_185748_inLine +BABEL_OP3_307_27590_20140618_185748_outLine +BABEL_OP3_307_27590_20140618_190731_inLine +BABEL_OP3_307_27590_20140618_190731_outLine +BABEL_OP3_307_28303_20140503_225229_inLine +BABEL_OP3_307_28303_20140503_225229_outLine +BABEL_OP3_307_28422_20140607_215944_inLine +BABEL_OP3_307_28422_20140607_215944_outLine +BABEL_OP3_307_28538_20140513_205210_inLine +BABEL_OP3_307_28538_20140513_205210_outLine +BABEL_OP3_307_29352_20140824_172023_inLine +BABEL_OP3_307_29352_20140824_172023_outLine +BABEL_OP3_307_29416_20140705_233008_inLine +BABEL_OP3_307_29416_20140705_233008_outLine +BABEL_OP3_307_29777_20140705_173903_inLine +BABEL_OP3_307_29777_20140705_173903_outLine +BABEL_OP3_307_30253_20140624_003258_inLine +BABEL_OP3_307_30253_20140624_003258_outLine +BABEL_OP3_307_30497_20140809_181809_inLine +BABEL_OP3_307_30497_20140809_181809_outLine +BABEL_OP3_307_30869_20140630_183404_inLine +BABEL_OP3_307_30869_20140630_183404_outLine +BABEL_OP3_307_30869_20140630_184229_inLine +BABEL_OP3_307_30869_20140630_184229_outLine +BABEL_OP3_307_31109_20140518_201149_inLine +BABEL_OP3_307_31109_20140518_201149_outLine +BABEL_OP3_307_31182_20140701_221449_inLine +BABEL_OP3_307_31182_20140701_221449_outLine +BABEL_OP3_307_31182_20140702_223108_inLine +BABEL_OP3_307_31182_20140702_223108_outLine +BABEL_OP3_307_31628_20140610_230053_inLine +BABEL_OP3_307_31628_20140610_230053_outLine +BABEL_OP3_307_31979_20140512_015136_inLine +BABEL_OP3_307_31979_20140512_015136_outLine +BABEL_OP3_307_32630_20140821_180259_inLine +BABEL_OP3_307_32630_20140821_180259_outLine +BABEL_OP3_307_32630_20140821_181033_inLine +BABEL_OP3_307_32630_20140821_181033_outLine +BABEL_OP3_307_32630_20140821_182004_inLine +BABEL_OP3_307_32630_20140821_182004_outLine +BABEL_OP3_307_32959_20140621_191212_inLine +BABEL_OP3_307_32959_20140621_191212_outLine +BABEL_OP3_307_33216_20140904_190946_inLine +BABEL_OP3_307_33216_20140904_190946_outLine +BABEL_OP3_307_33704_20140705_224629_inLine +BABEL_OP3_307_33704_20140705_224629_outLine +BABEL_OP3_307_33840_20140628_232051_inLine +BABEL_OP3_307_33840_20140628_232051_outLine +BABEL_OP3_307_34328_20140511_224229_inLine +BABEL_OP3_307_34328_20140511_224229_outLine +BABEL_OP3_307_34899_20140824_175928_inLine +BABEL_OP3_307_34899_20140824_175928_outLine +BABEL_OP3_307_35885_20140717_212149_inLine +BABEL_OP3_307_35885_20140717_212149_outLine +BABEL_OP3_307_36059_20140725_170553_inLine +BABEL_OP3_307_36059_20140725_170553_outLine +BABEL_OP3_307_36059_20140725_171011_inLine +BABEL_OP3_307_36059_20140725_171011_outLine +BABEL_OP3_307_36341_20140414_233501_inLine +BABEL_OP3_307_36341_20140414_233501_outLine +BABEL_OP3_307_36341_20140415_224118_inLine +BABEL_OP3_307_36341_20140415_224118_outLine +BABEL_OP3_307_37229_20140820_214115_inLine +BABEL_OP3_307_37229_20140820_214115_outLine +BABEL_OP3_307_37229_20140820_215332_inLine +BABEL_OP3_307_37229_20140820_215332_outLine +BABEL_OP3_307_37499_20140823_233015_inLine +BABEL_OP3_307_37499_20140823_233015_outLine +BABEL_OP3_307_37598_20140602_183825_inLine +BABEL_OP3_307_37598_20140602_183825_outLine +BABEL_OP3_307_38554_20140414_233433_inLine +BABEL_OP3_307_38554_20140414_233433_outLine +BABEL_OP3_307_38979_20140711_231114_inLine +BABEL_OP3_307_38979_20140711_231114_outLine +BABEL_OP3_307_38979_20140711_232222_inLine +BABEL_OP3_307_38979_20140711_232222_outLine +BABEL_OP3_307_39099_20140814_210148_inLine +BABEL_OP3_307_39099_20140814_210148_outLine +BABEL_OP3_307_39307_20140422_002337_inLine +BABEL_OP3_307_39307_20140422_002337_outLine +BABEL_OP3_307_39680_20140710_174506_inLine +BABEL_OP3_307_39680_20140710_174506_outLine +BABEL_OP3_307_41038_20140611_180425_inLine +BABEL_OP3_307_41038_20140611_180425_outLine +BABEL_OP3_307_41233_20140629_192647_inLine +BABEL_OP3_307_41233_20140629_192647_outLine +BABEL_OP3_307_41400_20140811_174054_inLine +BABEL_OP3_307_41400_20140811_174054_outLine +BABEL_OP3_307_41692_20140824_185620_inLine +BABEL_OP3_307_41692_20140824_185620_outLine +BABEL_OP3_307_42600_20140504_183919_inLine +BABEL_OP3_307_42600_20140504_183919_outLine +BABEL_OP3_307_42619_20140527_230410_inLine +BABEL_OP3_307_42619_20140527_230410_outLine +BABEL_OP3_307_42718_20140809_190046_inLine +BABEL_OP3_307_42718_20140809_190046_outLine +BABEL_OP3_307_42942_20140508_233559_inLine +BABEL_OP3_307_42942_20140508_233559_outLine +BABEL_OP3_307_42991_20140611_192649_inLine +BABEL_OP3_307_42991_20140611_192649_outLine +BABEL_OP3_307_42991_20140611_193641_inLine +BABEL_OP3_307_42991_20140611_193641_outLine +BABEL_OP3_307_43239_20140703_231849_inLine +BABEL_OP3_307_43239_20140703_231849_outLine +BABEL_OP3_307_43368_20140510_180418_inLine +BABEL_OP3_307_43368_20140510_180418_outLine +BABEL_OP3_307_43368_20140510_183109_inLine +BABEL_OP3_307_43368_20140510_183109_outLine +BABEL_OP3_307_43395_20140825_200133_inLine +BABEL_OP3_307_43395_20140825_200133_outLine +BABEL_OP3_307_43646_20140414_232607_inLine +BABEL_OP3_307_43646_20140414_232607_outLine +BABEL_OP3_307_44255_20140716_232402_inLine +BABEL_OP3_307_44255_20140716_232402_outLine +BABEL_OP3_307_44255_20140716_233533_inLine +BABEL_OP3_307_44255_20140716_233533_outLine +BABEL_OP3_307_44347_20140707_214025_inLine +BABEL_OP3_307_44347_20140707_214025_outLine +BABEL_OP3_307_44531_20140724_233405_inLine +BABEL_OP3_307_44531_20140724_233405_outLine +BABEL_OP3_307_44868_20140609_224633_inLine +BABEL_OP3_307_44868_20140609_224633_outLine +BABEL_OP3_307_45201_20140811_170742_inLine +BABEL_OP3_307_45201_20140811_170742_outLine +BABEL_OP3_307_45235_20140705_174005_inLine +BABEL_OP3_307_45235_20140705_174005_outLine +BABEL_OP3_307_45559_20140725_180337_inLine +BABEL_OP3_307_45559_20140725_180337_outLine +BABEL_OP3_307_45560_20140403_200140_inLine +BABEL_OP3_307_45560_20140403_200140_outLine +BABEL_OP3_307_45642_20140417_165148_inLine +BABEL_OP3_307_45642_20140417_165148_outLine +BABEL_OP3_307_45642_20140417_171209_inLine +BABEL_OP3_307_45642_20140417_171209_outLine +BABEL_OP3_307_45697_20140723_174904_inLine +BABEL_OP3_307_45697_20140723_174904_outLine +BABEL_OP3_307_45770_20140417_234713_inLine +BABEL_OP3_307_45770_20140417_234713_outLine +BABEL_OP3_307_45770_20140418_001050_inLine +BABEL_OP3_307_45770_20140418_001050_outLine +BABEL_OP3_307_45908_20140811_183550_inLine +BABEL_OP3_307_45908_20140811_183550_outLine +BABEL_OP3_307_46008_20140811_173939_inLine +BABEL_OP3_307_46008_20140811_173939_outLine +BABEL_OP3_307_46169_20140628_213057_inLine +BABEL_OP3_307_46169_20140628_213057_outLine +BABEL_OP3_307_46315_20140613_173444_inLine +BABEL_OP3_307_46315_20140613_173444_outLine +BABEL_OP3_307_46688_20140422_234341_inLine +BABEL_OP3_307_46688_20140422_234341_outLine +BABEL_OP3_307_46881_20140416_183617_inLine +BABEL_OP3_307_46881_20140416_183617_outLine +BABEL_OP3_307_46881_20140416_184809_inLine +BABEL_OP3_307_46881_20140416_184809_outLine +BABEL_OP3_307_47215_20140421_184514_inLine +BABEL_OP3_307_47215_20140421_184514_outLine +BABEL_OP3_307_48016_20140820_165510_inLine +BABEL_OP3_307_48016_20140820_165510_outLine +BABEL_OP3_307_48663_20140811_165120_inLine +BABEL_OP3_307_48663_20140811_165120_outLine +BABEL_OP3_307_48844_20140430_185608_inLine +BABEL_OP3_307_48844_20140430_185608_outLine +BABEL_OP3_307_49197_20140503_171214_inLine +BABEL_OP3_307_49197_20140503_171214_outLine +BABEL_OP3_307_49767_20140904_203629_inLine +BABEL_OP3_307_49767_20140904_203629_outLine +BABEL_OP3_307_49775_20140329_000415_inLine +BABEL_OP3_307_49775_20140329_000415_outLine +BABEL_OP3_307_49775_20140329_002350_inLine +BABEL_OP3_307_49775_20140329_002350_outLine +BABEL_OP3_307_50726_20140404_005620_inLine +BABEL_OP3_307_50726_20140404_005620_outLine +BABEL_OP3_307_50958_20140508_000931_inLine +BABEL_OP3_307_50958_20140508_000931_outLine +BABEL_OP3_307_50962_20140430_182307_inLine +BABEL_OP3_307_50962_20140430_182307_outLine +BABEL_OP3_307_51015_20140619_234356_inLine +BABEL_OP3_307_51015_20140619_234356_outLine +BABEL_OP3_307_51414_20140824_173748_inLine +BABEL_OP3_307_51414_20140824_173748_outLine +BABEL_OP3_307_51414_20140824_175004_inLine +BABEL_OP3_307_51414_20140824_175004_outLine +BABEL_OP3_307_51530_20140813_230739_inLine +BABEL_OP3_307_51530_20140813_230739_outLine +BABEL_OP3_307_52025_20140520_191824_inLine +BABEL_OP3_307_52025_20140520_191824_outLine +BABEL_OP3_307_52070_20140904_203955_inLine +BABEL_OP3_307_52070_20140904_203955_outLine +BABEL_OP3_307_52442_20140516_214027_inLine +BABEL_OP3_307_52442_20140516_214027_outLine +BABEL_OP3_307_52447_20140822_171509_inLine +BABEL_OP3_307_52447_20140822_171509_outLine +BABEL_OP3_307_52447_20140822_172455_inLine +BABEL_OP3_307_52447_20140822_172455_outLine +BABEL_OP3_307_52725_20140725_190010_inLine +BABEL_OP3_307_52725_20140725_190010_outLine +BABEL_OP3_307_52725_20140725_190854_inLine +BABEL_OP3_307_52725_20140725_190854_outLine +BABEL_OP3_307_52804_20140502_184324_inLine +BABEL_OP3_307_52804_20140502_184324_outLine +BABEL_OP3_307_53144_20140807_225121_inLine +BABEL_OP3_307_53144_20140807_225121_outLine +BABEL_OP3_307_53492_20140730_174335_inLine +BABEL_OP3_307_53492_20140730_174335_outLine +BABEL_OP3_307_53665_20140809_225603_inLine +BABEL_OP3_307_53665_20140809_225603_outLine +BABEL_OP3_307_55818_20140403_203355_inLine +BABEL_OP3_307_55818_20140403_203355_outLine +BABEL_OP3_307_56019_20140716_230530_inLine +BABEL_OP3_307_56019_20140716_230530_outLine +BABEL_OP3_307_56076_20140810_005108_inLine +BABEL_OP3_307_56076_20140810_005108_outLine +BABEL_OP3_307_56213_20140621_172222_inLine +BABEL_OP3_307_56213_20140621_172222_outLine +BABEL_OP3_307_56306_20140705_225134_inLine +BABEL_OP3_307_56306_20140705_225134_outLine +BABEL_OP3_307_56684_20140630_231811_inLine +BABEL_OP3_307_56684_20140630_231811_outLine +BABEL_OP3_307_56720_20140616_224418_inLine +BABEL_OP3_307_56720_20140616_224418_outLine +BABEL_OP3_307_57542_20140720_222540_inLine +BABEL_OP3_307_57542_20140720_222540_outLine +BABEL_OP3_307_57650_20140712_172810_inLine +BABEL_OP3_307_57650_20140712_172810_outLine +BABEL_OP3_307_57922_20140603_234523_inLine +BABEL_OP3_307_57922_20140603_234523_outLine +BABEL_OP3_307_60310_20140628_231715_inLine +BABEL_OP3_307_60310_20140628_231715_outLine +BABEL_OP3_307_60436_20140730_191522_inLine +BABEL_OP3_307_60436_20140730_191522_outLine +BABEL_OP3_307_60706_20140401_190403_inLine +BABEL_OP3_307_60706_20140401_190403_outLine +BABEL_OP3_307_60836_20140405_213236_inLine +BABEL_OP3_307_60836_20140405_213236_outLine +BABEL_OP3_307_60836_20140406_002450_inLine +BABEL_OP3_307_60836_20140406_002450_outLine +BABEL_OP3_307_61219_20140404_200459_inLine +BABEL_OP3_307_61219_20140404_200459_outLine +BABEL_OP3_307_61225_20140414_220024_inLine +BABEL_OP3_307_61225_20140414_220024_outLine +BABEL_OP3_307_61963_20140710_174351_inLine +BABEL_OP3_307_61963_20140710_174351_outLine +BABEL_OP3_307_62155_20140721_233109_inLine +BABEL_OP3_307_62155_20140721_233109_outLine +BABEL_OP3_307_62289_20140811_205629_inLine +BABEL_OP3_307_62289_20140811_205629_outLine +BABEL_OP3_307_63906_20140807_235743_inLine +BABEL_OP3_307_63906_20140807_235743_outLine +BABEL_OP3_307_63938_20140715_225113_inLine +BABEL_OP3_307_63938_20140715_225113_outLine +BABEL_OP3_307_64014_20140717_232855_inLine +BABEL_OP3_307_64014_20140717_232855_outLine +BABEL_OP3_307_65298_20140718_004934_inLine +BABEL_OP3_307_65298_20140718_004934_outLine +BABEL_OP3_307_65477_20140507_213725_inLine +BABEL_OP3_307_65477_20140507_213725_outLine +BABEL_OP3_307_65477_20140507_214428_inLine +BABEL_OP3_307_65477_20140507_214428_outLine +BABEL_OP3_307_65477_20140507_215852_inLine +BABEL_OP3_307_65477_20140507_215852_outLine +BABEL_OP3_307_65913_20140811_185916_inLine +BABEL_OP3_307_65913_20140811_185916_outLine +BABEL_OP3_307_66026_20140622_001323_inLine +BABEL_OP3_307_66026_20140622_001323_outLine +BABEL_OP3_307_66026_20140622_003222_inLine +BABEL_OP3_307_66026_20140622_003222_outLine +BABEL_OP3_307_66837_20140622_193057_inLine +BABEL_OP3_307_66837_20140622_193057_outLine +BABEL_OP3_307_68182_20140712_230018_inLine +BABEL_OP3_307_68182_20140712_230018_outLine +BABEL_OP3_307_68306_20140619_234111_inLine +BABEL_OP3_307_68306_20140619_234111_outLine +BABEL_OP3_307_69746_20140708_002605_inLine +BABEL_OP3_307_69746_20140708_002605_outLine +BABEL_OP3_307_69885_20140809_214354_inLine +BABEL_OP3_307_69885_20140809_214354_outLine +BABEL_OP3_307_69885_20140809_221241_inLine +BABEL_OP3_307_69885_20140809_221241_outLine +BABEL_OP3_307_70221_20140531_232511_inLine +BABEL_OP3_307_70221_20140531_232511_outLine +BABEL_OP3_307_71121_20140827_212105_inLine +BABEL_OP3_307_71121_20140827_212105_outLine +BABEL_OP3_307_71282_20140712_184618_inLine +BABEL_OP3_307_71282_20140712_184618_outLine +BABEL_OP3_307_72349_20140811_213219_inLine +BABEL_OP3_307_72349_20140811_213219_outLine +BABEL_OP3_307_72844_20140414_222309_inLine +BABEL_OP3_307_72844_20140414_222309_outLine +BABEL_OP3_307_72844_20140414_223414_inLine +BABEL_OP3_307_72844_20140414_223414_outLine +BABEL_OP3_307_73549_20140905_002803_inLine +BABEL_OP3_307_73549_20140905_002803_outLine +BABEL_OP3_307_73964_20140809_233453_inLine +BABEL_OP3_307_73964_20140809_233453_outLine +BABEL_OP3_307_73964_20140809_234749_inLine +BABEL_OP3_307_73964_20140809_234749_outLine +BABEL_OP3_307_74111_20140630_190239_inLine +BABEL_OP3_307_74111_20140630_190239_outLine +BABEL_OP3_307_74253_20140621_235240_inLine +BABEL_OP3_307_74253_20140621_235240_outLine +BABEL_OP3_307_74280_20140414_183758_inLine +BABEL_OP3_307_74280_20140414_183758_outLine +BABEL_OP3_307_74455_20140715_191928_inLine +BABEL_OP3_307_74455_20140715_191928_outLine +BABEL_OP3_307_75223_20140401_234318_inLine +BABEL_OP3_307_75223_20140401_234318_outLine +BABEL_OP3_307_75223_20140401_235025_inLine +BABEL_OP3_307_75223_20140401_235025_outLine +BABEL_OP3_307_75261_20140630_231504_inLine +BABEL_OP3_307_75261_20140630_231504_outLine +BABEL_OP3_307_75342_20140617_225740_inLine +BABEL_OP3_307_75342_20140617_225740_outLine +BABEL_OP3_307_75342_20140617_231149_inLine +BABEL_OP3_307_75342_20140617_231149_outLine +BABEL_OP3_307_75359_20140812_195810_inLine +BABEL_OP3_307_75359_20140812_195810_outLine +BABEL_OP3_307_75366_20140905_004427_inLine +BABEL_OP3_307_75366_20140905_004427_outLine +BABEL_OP3_307_75465_20140629_190739_inLine +BABEL_OP3_307_75465_20140629_190739_outLine +BABEL_OP3_307_75869_20140722_003619_inLine +BABEL_OP3_307_75869_20140722_003619_outLine +BABEL_OP3_307_75981_20140730_202631_inLine +BABEL_OP3_307_75981_20140730_202631_outLine +BABEL_OP3_307_78544_20140610_183736_inLine +BABEL_OP3_307_78544_20140610_183736_outLine +BABEL_OP3_307_78609_20140702_235349_inLine +BABEL_OP3_307_78609_20140702_235349_outLine +BABEL_OP3_307_78749_20140904_210224_inLine +BABEL_OP3_307_78749_20140904_210224_outLine +BABEL_OP3_307_79139_20140510_225328_inLine +BABEL_OP3_307_79139_20140510_225328_outLine +BABEL_OP3_307_79898_20140904_214416_inLine +BABEL_OP3_307_79898_20140904_214416_outLine +BABEL_OP3_307_80577_20140715_181331_inLine +BABEL_OP3_307_80577_20140715_181331_outLine +BABEL_OP3_307_80655_20140812_230923_inLine +BABEL_OP3_307_80655_20140812_230923_outLine +BABEL_OP3_307_80655_20140812_233001_inLine +BABEL_OP3_307_80655_20140812_233001_outLine +BABEL_OP3_307_80721_20140621_190505_inLine +BABEL_OP3_307_80721_20140621_190505_outLine +BABEL_OP3_307_80881_20140422_202404_inLine +BABEL_OP3_307_80881_20140422_202404_outLine +BABEL_OP3_307_81674_20140826_223550_inLine +BABEL_OP3_307_81674_20140826_223550_outLine +BABEL_OP3_307_83783_20140605_230854_inLine +BABEL_OP3_307_83783_20140605_230854_outLine +BABEL_OP3_307_83783_20140605_231912_inLine +BABEL_OP3_307_83783_20140605_231912_outLine +BABEL_OP3_307_83935_20140614_224802_inLine +BABEL_OP3_307_83935_20140614_224802_outLine +BABEL_OP3_307_84061_20140511_233610_inLine +BABEL_OP3_307_84061_20140511_233610_outLine +BABEL_OP3_307_84125_20140331_234215_inLine +BABEL_OP3_307_84125_20140331_234215_outLine +BABEL_OP3_307_84370_20140820_212437_inLine +BABEL_OP3_307_84370_20140820_212437_outLine +BABEL_OP3_307_84408_20140503_212710_inLine +BABEL_OP3_307_84408_20140503_212710_outLine +BABEL_OP3_307_84737_20140708_221232_inLine +BABEL_OP3_307_84737_20140708_221232_outLine +BABEL_OP3_307_84823_20140630_223225_inLine +BABEL_OP3_307_84823_20140630_223225_outLine +BABEL_OP3_307_84936_20140531_001856_inLine +BABEL_OP3_307_84936_20140531_001856_outLine +BABEL_OP3_307_84936_20140531_002943_inLine +BABEL_OP3_307_84936_20140531_002943_outLine +BABEL_OP3_307_85048_20140605_171622_inLine +BABEL_OP3_307_85048_20140605_171622_outLine +BABEL_OP3_307_85254_20140827_191205_inLine +BABEL_OP3_307_85254_20140827_191205_outLine +BABEL_OP3_307_85340_20140430_212442_inLine +BABEL_OP3_307_85340_20140430_212442_outLine +BABEL_OP3_307_87298_20140404_234437_inLine +BABEL_OP3_307_87298_20140404_234437_outLine +BABEL_OP3_307_88982_20140517_225450_inLine +BABEL_OP3_307_88982_20140517_225450_outLine +BABEL_OP3_307_89560_20140708_181828_inLine +BABEL_OP3_307_89560_20140708_181828_outLine +BABEL_OP3_307_90080_20140730_192002_inLine +BABEL_OP3_307_90080_20140730_192002_outLine +BABEL_OP3_307_90760_20140822_233431_inLine +BABEL_OP3_307_90760_20140822_233431_outLine +BABEL_OP3_307_91125_20140417_193326_inLine +BABEL_OP3_307_91125_20140417_193326_outLine +BABEL_OP3_307_91930_20140723_214657_inLine +BABEL_OP3_307_91930_20140723_214657_outLine +BABEL_OP3_307_91977_20140609_172756_inLine +BABEL_OP3_307_91977_20140609_172756_outLine +BABEL_OP3_307_92077_20140725_182941_inLine +BABEL_OP3_307_92077_20140725_182941_outLine +BABEL_OP3_307_92096_20140720_214645_inLine +BABEL_OP3_307_92096_20140720_214645_outLine +BABEL_OP3_307_92356_20140710_165331_inLine +BABEL_OP3_307_92356_20140710_165331_outLine +BABEL_OP3_307_92736_20140628_222129_inLine +BABEL_OP3_307_92736_20140628_222129_outLine +BABEL_OP3_307_92792_20140806_183000_inLine +BABEL_OP3_307_92792_20140806_183000_outLine +BABEL_OP3_307_92792_20140806_184601_inLine +BABEL_OP3_307_92792_20140806_184601_outLine +BABEL_OP3_307_93411_20140511_171810_inLine +BABEL_OP3_307_93411_20140511_171810_outLine +BABEL_OP3_307_93411_20140511_172906_inLine +BABEL_OP3_307_93411_20140511_172906_outLine +BABEL_OP3_307_93861_20140513_195727_inLine +BABEL_OP3_307_94141_20140813_184047_inLine +BABEL_OP3_307_94141_20140813_184047_outLine +BABEL_OP3_307_94166_20140717_182459_inLine +BABEL_OP3_307_94666_20140517_180258_inLine +BABEL_OP3_307_94666_20140517_180258_outLine +BABEL_OP3_307_95399_20140514_005142_inLine +BABEL_OP3_307_95399_20140514_005142_outLine +BABEL_OP3_307_95467_20140822_201531_inLine +BABEL_OP3_307_95467_20140822_201531_outLine +BABEL_OP3_307_95598_20140415_012206_inLine +BABEL_OP3_307_95598_20140415_012206_outLine +BABEL_OP3_307_95935_20140702_232733_inLine +BABEL_OP3_307_95935_20140702_232733_outLine +BABEL_OP3_307_95966_20140504_202018_inLine +BABEL_OP3_307_95966_20140504_202018_outLine +BABEL_OP3_307_96247_20140721_235224_inLine +BABEL_OP3_307_96247_20140721_235224_outLine +BABEL_OP3_307_96584_20140717_173523_inLine +BABEL_OP3_307_96584_20140717_173523_outLine +BABEL_OP3_307_96842_20140725_185113_inLine +BABEL_OP3_307_96842_20140725_185113_outLine +BABEL_OP3_307_96934_20140407_232228_inLine +BABEL_OP3_307_96934_20140407_232228_outLine +BABEL_OP3_307_97136_20140731_173922_inLine +BABEL_OP3_307_97136_20140731_173922_outLine +BABEL_OP3_307_97570_20140529_233742_inLine +BABEL_OP3_307_97570_20140529_233742_outLine +BABEL_OP3_307_97836_20140730_225750_inLine +BABEL_OP3_307_97836_20140730_225750_outLine +BABEL_OP3_307_97849_20140813_181409_inLine +BABEL_OP3_307_97849_20140813_181409_outLine +BABEL_OP3_307_97911_20140904_224017_inLine +BABEL_OP3_307_97911_20140904_224017_outLine +BABEL_OP3_307_97988_20140620_223625_inLine +BABEL_OP3_307_97988_20140620_223625_outLine +BABEL_OP3_307_98489_20140404_222049_inLine +BABEL_OP3_307_98489_20140404_222049_outLine +BABEL_OP3_307_98678_20140721_224047_inLine +BABEL_OP3_307_98678_20140721_224047_outLine +BABEL_OP3_307_99401_20140422_215803_inLine +BABEL_OP3_307_99401_20140422_215803_outLine +BABEL_OP3_307_99718_20140417_190158_inLine +BABEL_OP3_307_99718_20140417_190158_outLine +BABEL_OP3_307_99732_20140630_175525_inLine +BABEL_OP3_307_99732_20140630_175525_outLine +BABEL_OP3_307_99813_20140516_235439_inLine +BABEL_OP3_307_99813_20140516_235439_outLine +BABEL_OP3_307_99920_20140404_002016_inLine +BABEL_OP3_307_99920_20140404_002016_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list b/egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list new file mode 100644 index 00000000000..47596e1204d --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list @@ -0,0 +1,124 @@ +BABEL_OP3_401_10319_20140923_150904_inLine +BABEL_OP3_401_10319_20140923_150904_outLine +BABEL_OP3_401_12916_20140930_182205_inLine +BABEL_OP3_401_12916_20140930_182205_outLine +BABEL_OP3_401_14229_20141015_145028_inLine +BABEL_OP3_401_14229_20141015_145028_outLine +BABEL_OP3_401_14350_20141002_192854_inLine +BABEL_OP3_401_14350_20141002_192854_outLine +BABEL_OP3_401_14875_20141013_220929_inLine +BABEL_OP3_401_14875_20141013_220929_outLine +BABEL_OP3_401_14875_20141013_222027_inLine +BABEL_OP3_401_14875_20141013_222027_outLine +BABEL_OP3_401_15163_20141020_201846_inLine +BABEL_OP3_401_15163_20141020_201846_outLine +BABEL_OP3_401_15216_20141104_171637_inLine +BABEL_OP3_401_15216_20141104_171637_outLine +BABEL_OP3_401_15324_20141031_194259_inLine +BABEL_OP3_401_15324_20141031_194259_outLine +BABEL_OP3_401_15848_20140916_174516_inLine +BABEL_OP3_401_15848_20140916_174516_outLine +BABEL_OP3_401_16184_20140924_115115_inLine +BABEL_OP3_401_16184_20140924_115115_outLine +BABEL_OP3_401_17440_20141014_172206_inLine +BABEL_OP3_401_17440_20141014_172206_outLine +BABEL_OP3_401_19621_20141027_174015_inLine +BABEL_OP3_401_19621_20141027_174015_outLine +BABEL_OP3_401_21109_20141102_133420_inLine +BABEL_OP3_401_21109_20141102_133420_outLine +BABEL_OP3_401_23505_20140930_172516_inLine +BABEL_OP3_401_23505_20140930_172516_outLine +BABEL_OP3_401_26074_20141031_001437_inLine +BABEL_OP3_401_26074_20141031_001437_outLine +BABEL_OP3_401_27125_20140916_141748_inLine +BABEL_OP3_401_27125_20140916_141748_outLine +BABEL_OP3_401_27478_20141119_222255_inLine +BABEL_OP3_401_27478_20141119_222255_outLine +BABEL_OP3_401_28606_20140930_180938_inLine +BABEL_OP3_401_28606_20140930_180938_outLine +BABEL_OP3_401_29023_20141021_134200_inLine +BABEL_OP3_401_29023_20141021_134200_outLine +BABEL_OP3_401_29135_20140919_181952_inLine +BABEL_OP3_401_29135_20140919_181952_outLine +BABEL_OP3_401_29208_20141018_152040_inLine +BABEL_OP3_401_29208_20141018_152040_outLine +BABEL_OP3_401_29777_20141105_172935_inLine +BABEL_OP3_401_29777_20141105_172935_outLine +BABEL_OP3_401_31490_20141001_195242_inLine +BABEL_OP3_401_31490_20141001_195242_outLine +BABEL_OP3_401_32301_20140927_150237_inLine +BABEL_OP3_401_32301_20140927_150237_outLine +BABEL_OP3_401_32727_20141014_193244_inLine +BABEL_OP3_401_32727_20141014_193244_outLine +BABEL_OP3_401_32861_20141112_183418_inLine +BABEL_OP3_401_32861_20141112_183418_outLine +BABEL_OP3_401_32914_20141101_192546_inLine +BABEL_OP3_401_32914_20141101_192546_outLine +BABEL_OP3_401_36219_20141014_150115_inLine +BABEL_OP3_401_36219_20141014_150115_outLine +BABEL_OP3_401_36505_20141104_134657_inLine +BABEL_OP3_401_36505_20141104_134657_outLine +BABEL_OP3_401_38554_20140917_124843_inLine +BABEL_OP3_401_38554_20140917_124843_outLine +BABEL_OP3_401_41100_20141001_131139_inLine +BABEL_OP3_401_41100_20141001_131139_outLine +BABEL_OP3_401_41741_20141002_230232_inLine +BABEL_OP3_401_41741_20141002_230232_outLine +BABEL_OP3_401_42243_20140924_154551_inLine +BABEL_OP3_401_42243_20140924_154551_outLine +BABEL_OP3_401_43368_20141016_160322_inLine +BABEL_OP3_401_43368_20141016_160322_outLine +BABEL_OP3_401_43388_20141019_234056_inLine +BABEL_OP3_401_43388_20141019_234056_outLine +BABEL_OP3_401_43789_20141020_153059_inLine +BABEL_OP3_401_43789_20141020_153059_outLine +BABEL_OP3_401_44347_20141103_201828_inLine +BABEL_OP3_401_44347_20141103_201828_outLine +BABEL_OP3_401_44420_20141014_143409_inLine +BABEL_OP3_401_44420_20141014_143409_outLine +BABEL_OP3_401_44531_20141122_231122_inLine +BABEL_OP3_401_44531_20141122_231122_outLine +BABEL_OP3_401_44619_20141003_141028_inLine +BABEL_OP3_401_44619_20141003_141028_outLine +BABEL_OP3_401_44868_20140925_153133_inLine +BABEL_OP3_401_44868_20140925_153133_outLine +BABEL_OP3_401_46625_20140919_144521_inLine +BABEL_OP3_401_46625_20140919_144521_outLine +BABEL_OP3_401_47215_20141001_143242_inLine +BABEL_OP3_401_47215_20141001_143242_outLine +BABEL_OP3_401_48200_20141104_174608_inLine +BABEL_OP3_401_48200_20141104_174608_outLine +BABEL_OP3_401_52025_20140917_170707_inLine +BABEL_OP3_401_52025_20140917_170707_outLine +BABEL_OP3_401_54046_20141105_192438_inLine +BABEL_OP3_401_54046_20141105_192438_outLine +BABEL_OP3_401_54744_20141001_143512_inLine +BABEL_OP3_401_54744_20141001_143512_outLine +BABEL_OP3_401_56090_20140917_155639_inLine +BABEL_OP3_401_56090_20140917_155639_outLine +BABEL_OP3_401_59898_20140930_142511_inLine +BABEL_OP3_401_59898_20140930_142511_outLine +BABEL_OP3_401_61011_20140919_134829_inLine +BABEL_OP3_401_61011_20140919_134829_outLine +BABEL_OP3_401_61011_20140919_141527_inLine +BABEL_OP3_401_61011_20140919_141527_outLine +BABEL_OP3_401_61678_20140919_183209_inLine +BABEL_OP3_401_61678_20140919_183209_outLine +BABEL_OP3_401_62724_20141016_200105_inLine +BABEL_OP3_401_62724_20141016_200105_outLine +BABEL_OP3_401_63081_20140919_142151_inLine +BABEL_OP3_401_63081_20140919_142151_outLine +BABEL_OP3_401_72007_20140930_173109_inLine +BABEL_OP3_401_72007_20140930_173109_outLine +BABEL_OP3_401_78544_20140924_155131_inLine +BABEL_OP3_401_78544_20140924_155131_outLine +BABEL_OP3_401_81424_20140927_134153_inLine +BABEL_OP3_401_81424_20140927_134153_outLine +BABEL_OP3_401_81553_20141112_153426_inLine +BABEL_OP3_401_81553_20141112_153426_outLine +BABEL_OP3_401_84815_20141014_163920_inLine +BABEL_OP3_401_84815_20141014_163920_outLine +BABEL_OP3_401_87884_20141014_190149_inLine +BABEL_OP3_401_87884_20141014_190149_outLine +BABEL_OP3_401_98506_20141124_133100_inLine +BABEL_OP3_401_98506_20141124_133100_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/dev.list b/egs/babel/s5d/conf/lists/401-mongolian/dev.list new file mode 100644 index 00000000000..47596e1204d --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/dev.list @@ -0,0 +1,124 @@ +BABEL_OP3_401_10319_20140923_150904_inLine +BABEL_OP3_401_10319_20140923_150904_outLine +BABEL_OP3_401_12916_20140930_182205_inLine +BABEL_OP3_401_12916_20140930_182205_outLine +BABEL_OP3_401_14229_20141015_145028_inLine +BABEL_OP3_401_14229_20141015_145028_outLine +BABEL_OP3_401_14350_20141002_192854_inLine +BABEL_OP3_401_14350_20141002_192854_outLine +BABEL_OP3_401_14875_20141013_220929_inLine +BABEL_OP3_401_14875_20141013_220929_outLine +BABEL_OP3_401_14875_20141013_222027_inLine +BABEL_OP3_401_14875_20141013_222027_outLine +BABEL_OP3_401_15163_20141020_201846_inLine +BABEL_OP3_401_15163_20141020_201846_outLine +BABEL_OP3_401_15216_20141104_171637_inLine +BABEL_OP3_401_15216_20141104_171637_outLine +BABEL_OP3_401_15324_20141031_194259_inLine +BABEL_OP3_401_15324_20141031_194259_outLine +BABEL_OP3_401_15848_20140916_174516_inLine +BABEL_OP3_401_15848_20140916_174516_outLine +BABEL_OP3_401_16184_20140924_115115_inLine +BABEL_OP3_401_16184_20140924_115115_outLine +BABEL_OP3_401_17440_20141014_172206_inLine +BABEL_OP3_401_17440_20141014_172206_outLine +BABEL_OP3_401_19621_20141027_174015_inLine +BABEL_OP3_401_19621_20141027_174015_outLine +BABEL_OP3_401_21109_20141102_133420_inLine +BABEL_OP3_401_21109_20141102_133420_outLine +BABEL_OP3_401_23505_20140930_172516_inLine +BABEL_OP3_401_23505_20140930_172516_outLine +BABEL_OP3_401_26074_20141031_001437_inLine +BABEL_OP3_401_26074_20141031_001437_outLine +BABEL_OP3_401_27125_20140916_141748_inLine +BABEL_OP3_401_27125_20140916_141748_outLine +BABEL_OP3_401_27478_20141119_222255_inLine +BABEL_OP3_401_27478_20141119_222255_outLine +BABEL_OP3_401_28606_20140930_180938_inLine +BABEL_OP3_401_28606_20140930_180938_outLine +BABEL_OP3_401_29023_20141021_134200_inLine +BABEL_OP3_401_29023_20141021_134200_outLine +BABEL_OP3_401_29135_20140919_181952_inLine +BABEL_OP3_401_29135_20140919_181952_outLine +BABEL_OP3_401_29208_20141018_152040_inLine +BABEL_OP3_401_29208_20141018_152040_outLine +BABEL_OP3_401_29777_20141105_172935_inLine +BABEL_OP3_401_29777_20141105_172935_outLine +BABEL_OP3_401_31490_20141001_195242_inLine +BABEL_OP3_401_31490_20141001_195242_outLine +BABEL_OP3_401_32301_20140927_150237_inLine +BABEL_OP3_401_32301_20140927_150237_outLine +BABEL_OP3_401_32727_20141014_193244_inLine +BABEL_OP3_401_32727_20141014_193244_outLine +BABEL_OP3_401_32861_20141112_183418_inLine +BABEL_OP3_401_32861_20141112_183418_outLine +BABEL_OP3_401_32914_20141101_192546_inLine +BABEL_OP3_401_32914_20141101_192546_outLine +BABEL_OP3_401_36219_20141014_150115_inLine +BABEL_OP3_401_36219_20141014_150115_outLine +BABEL_OP3_401_36505_20141104_134657_inLine +BABEL_OP3_401_36505_20141104_134657_outLine +BABEL_OP3_401_38554_20140917_124843_inLine +BABEL_OP3_401_38554_20140917_124843_outLine +BABEL_OP3_401_41100_20141001_131139_inLine +BABEL_OP3_401_41100_20141001_131139_outLine +BABEL_OP3_401_41741_20141002_230232_inLine +BABEL_OP3_401_41741_20141002_230232_outLine +BABEL_OP3_401_42243_20140924_154551_inLine +BABEL_OP3_401_42243_20140924_154551_outLine +BABEL_OP3_401_43368_20141016_160322_inLine +BABEL_OP3_401_43368_20141016_160322_outLine +BABEL_OP3_401_43388_20141019_234056_inLine +BABEL_OP3_401_43388_20141019_234056_outLine +BABEL_OP3_401_43789_20141020_153059_inLine +BABEL_OP3_401_43789_20141020_153059_outLine +BABEL_OP3_401_44347_20141103_201828_inLine +BABEL_OP3_401_44347_20141103_201828_outLine +BABEL_OP3_401_44420_20141014_143409_inLine +BABEL_OP3_401_44420_20141014_143409_outLine +BABEL_OP3_401_44531_20141122_231122_inLine +BABEL_OP3_401_44531_20141122_231122_outLine +BABEL_OP3_401_44619_20141003_141028_inLine +BABEL_OP3_401_44619_20141003_141028_outLine +BABEL_OP3_401_44868_20140925_153133_inLine +BABEL_OP3_401_44868_20140925_153133_outLine +BABEL_OP3_401_46625_20140919_144521_inLine +BABEL_OP3_401_46625_20140919_144521_outLine +BABEL_OP3_401_47215_20141001_143242_inLine +BABEL_OP3_401_47215_20141001_143242_outLine +BABEL_OP3_401_48200_20141104_174608_inLine +BABEL_OP3_401_48200_20141104_174608_outLine +BABEL_OP3_401_52025_20140917_170707_inLine +BABEL_OP3_401_52025_20140917_170707_outLine +BABEL_OP3_401_54046_20141105_192438_inLine +BABEL_OP3_401_54046_20141105_192438_outLine +BABEL_OP3_401_54744_20141001_143512_inLine +BABEL_OP3_401_54744_20141001_143512_outLine +BABEL_OP3_401_56090_20140917_155639_inLine +BABEL_OP3_401_56090_20140917_155639_outLine +BABEL_OP3_401_59898_20140930_142511_inLine +BABEL_OP3_401_59898_20140930_142511_outLine +BABEL_OP3_401_61011_20140919_134829_inLine +BABEL_OP3_401_61011_20140919_134829_outLine +BABEL_OP3_401_61011_20140919_141527_inLine +BABEL_OP3_401_61011_20140919_141527_outLine +BABEL_OP3_401_61678_20140919_183209_inLine +BABEL_OP3_401_61678_20140919_183209_outLine +BABEL_OP3_401_62724_20141016_200105_inLine +BABEL_OP3_401_62724_20141016_200105_outLine +BABEL_OP3_401_63081_20140919_142151_inLine +BABEL_OP3_401_63081_20140919_142151_outLine +BABEL_OP3_401_72007_20140930_173109_inLine +BABEL_OP3_401_72007_20140930_173109_outLine +BABEL_OP3_401_78544_20140924_155131_inLine +BABEL_OP3_401_78544_20140924_155131_outLine +BABEL_OP3_401_81424_20140927_134153_inLine +BABEL_OP3_401_81424_20140927_134153_outLine +BABEL_OP3_401_81553_20141112_153426_inLine +BABEL_OP3_401_81553_20141112_153426_outLine +BABEL_OP3_401_84815_20141014_163920_inLine +BABEL_OP3_401_84815_20141014_163920_outLine +BABEL_OP3_401_87884_20141014_190149_inLine +BABEL_OP3_401_87884_20141014_190149_outLine +BABEL_OP3_401_98506_20141124_133100_inLine +BABEL_OP3_401_98506_20141124_133100_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/eval.list b/egs/babel/s5d/conf/lists/401-mongolian/eval.list new file mode 100644 index 00000000000..d6756127490 --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/eval.list @@ -0,0 +1,186 @@ +BABEL_OP3_401_10416_20141019_182621_inLine +BABEL_OP3_401_10416_20141019_182621_outLine +BABEL_OP3_401_10974_20141027_160927_inLine +BABEL_OP3_401_10974_20141027_160927_outLine +BABEL_OP3_401_13040_20141003_135314_inLine +BABEL_OP3_401_13040_20141003_135314_outLine +BABEL_OP3_401_14158_20140923_184703_inLine +BABEL_OP3_401_14158_20140923_184703_outLine +BABEL_OP3_401_15262_20141001_153004_inLine +BABEL_OP3_401_15262_20141001_153004_outLine +BABEL_OP3_401_16056_20140930_142245_inLine +BABEL_OP3_401_16056_20140930_142245_outLine +BABEL_OP3_401_16601_20140930_160550_inLine +BABEL_OP3_401_16601_20140930_160550_outLine +BABEL_OP3_401_16787_20141017_144614_inLine +BABEL_OP3_401_16787_20141017_144614_outLine +BABEL_OP3_401_17573_20141001_155309_inLine +BABEL_OP3_401_17573_20141001_155309_outLine +BABEL_OP3_401_18863_20141104_193022_inLine +BABEL_OP3_401_18863_20141104_193022_outLine +BABEL_OP3_401_19120_20141119_194530_inLine +BABEL_OP3_401_19120_20141119_194530_outLine +BABEL_OP3_401_21029_20141003_135457_inLine +BABEL_OP3_401_21029_20141003_135457_outLine +BABEL_OP3_401_21581_20141019_224155_inLine +BABEL_OP3_401_21581_20141019_224155_outLine +BABEL_OP3_401_21794_20141020_182533_inLine +BABEL_OP3_401_21794_20141020_182533_outLine +BABEL_OP3_401_22216_20140922_180034_inLine +BABEL_OP3_401_22216_20140922_180034_outLine +BABEL_OP3_401_22612_20141020_164557_inLine +BABEL_OP3_401_22612_20141020_164557_outLine +BABEL_OP3_401_22641_20140930_120916_inLine +BABEL_OP3_401_22641_20140930_120916_outLine +BABEL_OP3_401_23395_20141029_191310_inLine +BABEL_OP3_401_23395_20141029_191310_outLine +BABEL_OP3_401_23731_20141027_182446_inLine +BABEL_OP3_401_23731_20141027_182446_outLine +BABEL_OP3_401_23983_20141204_001919_inLine +BABEL_OP3_401_23983_20141204_001919_outLine +BABEL_OP3_401_24589_20141014_140038_inLine +BABEL_OP3_401_24589_20141014_140038_outLine +BABEL_OP3_401_27082_20141110_145114_inLine +BABEL_OP3_401_27082_20141110_145114_outLine +BABEL_OP3_401_27218_20141002_130035_inLine +BABEL_OP3_401_27218_20141002_130035_outLine +BABEL_OP3_401_28538_20141020_154852_inLine +BABEL_OP3_401_28538_20141020_154852_outLine +BABEL_OP3_401_28585_20141205_234451_inLine +BABEL_OP3_401_28585_20141205_234451_outLine +BABEL_OP3_401_28945_20141013_144506_inLine +BABEL_OP3_401_28945_20141013_144506_outLine +BABEL_OP3_401_30250_20140919_185656_inLine +BABEL_OP3_401_30250_20140919_185656_outLine +BABEL_OP3_401_30497_20141124_222137_inLine +BABEL_OP3_401_30497_20141124_222137_outLine +BABEL_OP3_401_31979_20141018_172147_inLine +BABEL_OP3_401_31979_20141018_172147_outLine +BABEL_OP3_401_32959_20141010_150730_inLine +BABEL_OP3_401_32959_20141010_150730_outLine +BABEL_OP3_401_34688_20140930_171613_inLine +BABEL_OP3_401_34688_20140930_171613_outLine +BABEL_OP3_401_35069_20141111_153956_inLine +BABEL_OP3_401_35069_20141111_153956_outLine +BABEL_OP3_401_36341_20140919_151216_inLine +BABEL_OP3_401_36341_20140919_151216_outLine +BABEL_OP3_401_37281_20141021_145639_inLine +BABEL_OP3_401_37281_20141021_145639_outLine +BABEL_OP3_401_38431_20141017_210308_inLine +BABEL_OP3_401_38431_20141017_210308_outLine +BABEL_OP3_401_39059_20141113_134730_inLine +BABEL_OP3_401_39059_20141113_134730_outLine +BABEL_OP3_401_39159_20140919_165446_inLine +BABEL_OP3_401_39159_20140919_165446_outLine +BABEL_OP3_401_39680_20141114_221332_inLine +BABEL_OP3_401_39680_20141114_221332_outLine +BABEL_OP3_401_41400_20141201_173539_inLine +BABEL_OP3_401_41400_20141201_173539_outLine +BABEL_OP3_401_41542_20141117_192728_inLine +BABEL_OP3_401_41542_20141117_192728_outLine +BABEL_OP3_401_41920_20141001_131923_inLine +BABEL_OP3_401_41920_20141001_131923_outLine +BABEL_OP3_401_42600_20141015_152342_inLine +BABEL_OP3_401_42600_20141015_152342_outLine +BABEL_OP3_401_42991_20140922_191649_inLine +BABEL_OP3_401_42991_20140922_191649_outLine +BABEL_OP3_401_43920_20141001_185918_inLine +BABEL_OP3_401_43920_20141001_185918_outLine +BABEL_OP3_401_44847_20141101_215443_inLine +BABEL_OP3_401_44847_20141101_215443_outLine +BABEL_OP3_401_45106_20141103_141740_inLine +BABEL_OP3_401_45106_20141103_141740_outLine +BABEL_OP3_401_45106_20141103_142537_inLine +BABEL_OP3_401_45106_20141103_142537_outLine +BABEL_OP3_401_45642_20141001_205602_inLine +BABEL_OP3_401_45642_20141001_205602_outLine +BABEL_OP3_401_45777_20141015_141952_inLine +BABEL_OP3_401_45777_20141015_141952_outLine +BABEL_OP3_401_46333_20140929_163346_inLine +BABEL_OP3_401_46333_20140929_163346_outLine +BABEL_OP3_401_46702_20140917_183418_inLine +BABEL_OP3_401_46702_20140917_183418_outLine +BABEL_OP3_401_47877_20141111_151410_inLine +BABEL_OP3_401_47877_20141111_151410_outLine +BABEL_OP3_401_48789_20141020_160327_inLine +BABEL_OP3_401_48789_20141020_160327_outLine +BABEL_OP3_401_50630_20140926_164312_inLine +BABEL_OP3_401_50630_20140926_164312_outLine +BABEL_OP3_401_50726_20140922_142113_inLine +BABEL_OP3_401_50726_20140922_142113_outLine +BABEL_OP3_401_50962_20141009_174305_inLine +BABEL_OP3_401_50962_20141009_174305_outLine +BABEL_OP3_401_51540_20141110_152608_inLine +BABEL_OP3_401_51540_20141110_152608_outLine +BABEL_OP3_401_52438_20141014_155319_inLine +BABEL_OP3_401_52438_20141014_155319_outLine +BABEL_OP3_401_52442_20141023_165129_inLine +BABEL_OP3_401_52442_20141023_165129_outLine +BABEL_OP3_401_53063_20141016_144707_inLine +BABEL_OP3_401_53063_20141016_144707_outLine +BABEL_OP3_401_53419_20141018_182244_inLine +BABEL_OP3_401_53419_20141018_182244_outLine +BABEL_OP3_401_56213_20141016_153651_inLine +BABEL_OP3_401_56213_20141016_153651_outLine +BABEL_OP3_401_57922_20141030_180727_inLine +BABEL_OP3_401_57922_20141030_180727_outLine +BABEL_OP3_401_58047_20141030_203452_inLine +BABEL_OP3_401_58047_20141030_203452_outLine +BABEL_OP3_401_58489_20141001_175646_inLine +BABEL_OP3_401_58489_20141001_175646_outLine +BABEL_OP3_401_59993_20141006_214918_inLine +BABEL_OP3_401_59993_20141006_214918_outLine +BABEL_OP3_401_62155_20141124_185836_inLine +BABEL_OP3_401_62155_20141124_185836_outLine +BABEL_OP3_401_62852_20140922_125106_inLine +BABEL_OP3_401_62852_20140922_125106_outLine +BABEL_OP3_401_63670_20141016_165949_inLine +BABEL_OP3_401_63670_20141016_165949_outLine +BABEL_OP3_401_64494_20141007_112731_inLine +BABEL_OP3_401_64494_20141007_112731_outLine +BABEL_OP3_401_66026_20141016_173200_inLine +BABEL_OP3_401_66026_20141016_173200_outLine +BABEL_OP3_401_67842_20141006_131608_inLine +BABEL_OP3_401_67842_20141006_131608_outLine +BABEL_OP3_401_68306_20140929_200051_inLine +BABEL_OP3_401_68306_20140929_200051_outLine +BABEL_OP3_401_69153_20141016_161457_inLine +BABEL_OP3_401_69153_20141016_161457_outLine +BABEL_OP3_401_70282_20141030_201700_inLine +BABEL_OP3_401_70282_20141030_201700_outLine +BABEL_OP3_401_77567_20140921_154030_inLine +BABEL_OP3_401_77567_20140921_154030_outLine +BABEL_OP3_401_78511_20141001_195118_inLine +BABEL_OP3_401_78511_20141001_195118_outLine +BABEL_OP3_401_79139_20141110_182604_inLine +BABEL_OP3_401_79139_20141110_182604_outLine +BABEL_OP3_401_80897_20141030_171507_inLine +BABEL_OP3_401_80897_20141030_171507_outLine +BABEL_OP3_401_81392_20140929_161849_inLine +BABEL_OP3_401_81392_20140929_161849_outLine +BABEL_OP3_401_81404_20141016_154459_inLine +BABEL_OP3_401_81404_20141016_154459_outLine +BABEL_OP3_401_81404_20141016_155649_inLine +BABEL_OP3_401_81404_20141016_155649_outLine +BABEL_OP3_401_83935_20140930_153105_inLine +BABEL_OP3_401_83935_20140930_153105_outLine +BABEL_OP3_401_84327_20140930_203221_inLine +BABEL_OP3_401_84327_20140930_203221_outLine +BABEL_OP3_401_84823_20141018_193727_inLine +BABEL_OP3_401_84823_20141018_193727_outLine +BABEL_OP3_401_87280_20141021_202831_inLine +BABEL_OP3_401_87280_20141021_202831_outLine +BABEL_OP3_401_88550_20141127_184443_inLine +BABEL_OP3_401_88550_20141127_184443_outLine +BABEL_OP3_401_89372_20140921_132733_inLine +BABEL_OP3_401_89372_20140921_132733_outLine +BABEL_OP3_401_91581_20141001_163329_inLine +BABEL_OP3_401_91581_20141001_163329_outLine +BABEL_OP3_401_93946_20141016_192913_inLine +BABEL_OP3_401_93946_20141016_192913_outLine +BABEL_OP3_401_94002_20141020_150022_inLine +BABEL_OP3_401_94002_20141020_150022_outLine +BABEL_OP3_401_96504_20141014_144817_inLine +BABEL_OP3_401_96504_20141014_144817_outLine +BABEL_OP3_401_99732_20141018_150700_inLine +BABEL_OP3_401_99732_20141018_150700_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/sub-train.list b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.list new file mode 100644 index 00000000000..f4b87dcaef8 --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.list @@ -0,0 +1,126 @@ +BABEL_OP3_401_13030_20141015_163112_inLine +BABEL_OP3_401_13030_20141015_163112_outLine +BABEL_OP3_401_13324_20141002_165637_inLine +BABEL_OP3_401_13324_20141002_165637_outLine +BABEL_OP3_401_13586_20141023_193242_inLine +BABEL_OP3_401_13586_20141023_193242_outLine +BABEL_OP3_401_14560_20140922_140509_inLine +BABEL_OP3_401_14560_20140922_140509_outLine +BABEL_OP3_401_15902_20140930_144526_inLine +BABEL_OP3_401_15902_20140930_144526_outLine +BABEL_OP3_401_16475_20141016_143941_inLine +BABEL_OP3_401_16475_20141016_143941_outLine +BABEL_OP3_401_17567_20141023_213629_inLine +BABEL_OP3_401_17567_20141023_213629_outLine +BABEL_OP3_401_17923_20141002_172711_inLine +BABEL_OP3_401_17923_20141002_172711_outLine +BABEL_OP3_401_19101_20141029_183652_inLine +BABEL_OP3_401_19101_20141029_183652_outLine +BABEL_OP3_401_19722_20140930_200553_inLine +BABEL_OP3_401_19722_20140930_200553_outLine +BABEL_OP3_401_20916_20140922_174215_inLine +BABEL_OP3_401_20916_20140922_174215_outLine +BABEL_OP3_401_22321_20140929_180456_inLine +BABEL_OP3_401_22321_20140929_180456_outLine +BABEL_OP3_401_23893_20141125_213344_inLine +BABEL_OP3_401_23893_20141125_213344_outLine +BABEL_OP3_401_24290_20141124_184351_inLine +BABEL_OP3_401_24290_20141124_184351_outLine +BABEL_OP3_401_24323_20141017_151036_inLine +BABEL_OP3_401_24323_20141017_151036_outLine +BABEL_OP3_401_24470_20141029_145653_inLine +BABEL_OP3_401_24470_20141029_145653_outLine +BABEL_OP3_401_24605_20141001_142727_inLine +BABEL_OP3_401_24605_20141001_142727_outLine +BABEL_OP3_401_25961_20140929_183632_inLine +BABEL_OP3_401_25961_20140929_183632_outLine +BABEL_OP3_401_26072_20141112_173131_inLine +BABEL_OP3_401_26072_20141112_173131_outLine +BABEL_OP3_401_26398_20141204_001557_inLine +BABEL_OP3_401_26398_20141204_001557_outLine +BABEL_OP3_401_26574_20141103_163656_inLine +BABEL_OP3_401_26574_20141103_163656_outLine +BABEL_OP3_401_26999_20141101_213851_inLine +BABEL_OP3_401_26999_20141101_213851_outLine +BABEL_OP3_401_27042_20141017_184608_inLine +BABEL_OP3_401_27042_20141017_184608_outLine +BABEL_OP3_401_27841_20141113_200006_inLine +BABEL_OP3_401_27841_20141113_200006_outLine +BABEL_OP3_401_28775_20141003_162126_inLine +BABEL_OP3_401_28775_20141003_162126_outLine +BABEL_OP3_401_29076_20141031_003943_inLine +BABEL_OP3_401_29076_20141031_003943_outLine +BABEL_OP3_401_29404_20141121_153054_inLine +BABEL_OP3_401_29404_20141121_153054_outLine +BABEL_OP3_401_29685_20141019_210404_inLine +BABEL_OP3_401_29685_20141019_210404_outLine +BABEL_OP3_401_29685_20141019_210959_inLine +BABEL_OP3_401_29685_20141019_210959_outLine +BABEL_OP3_401_31624_20141003_192655_inLine +BABEL_OP3_401_31624_20141003_192655_outLine +BABEL_OP3_401_31628_20140923_145349_inLine +BABEL_OP3_401_31628_20140923_145349_outLine +BABEL_OP3_401_32708_20141003_200927_inLine +BABEL_OP3_401_32708_20141003_200927_outLine +BABEL_OP3_401_33111_20141122_223105_inLine +BABEL_OP3_401_33111_20141122_223105_outLine +BABEL_OP3_401_33672_20140930_132456_inLine +BABEL_OP3_401_33672_20140930_132456_outLine +BABEL_OP3_401_33672_20140930_133426_inLine +BABEL_OP3_401_33672_20140930_133426_outLine +BABEL_OP3_401_35143_20141010_163440_inLine +BABEL_OP3_401_35143_20141010_163440_outLine +BABEL_OP3_401_38878_20141031_201014_inLine +BABEL_OP3_401_38878_20141031_201014_outLine +BABEL_OP3_401_40713_20141003_155735_inLine +BABEL_OP3_401_40713_20141003_155735_outLine +BABEL_OP3_401_41618_20141028_201644_inLine +BABEL_OP3_401_41618_20141028_201644_outLine +BABEL_OP3_401_42619_20141104_204106_inLine +BABEL_OP3_401_42619_20141104_204106_outLine +BABEL_OP3_401_42834_20141103_204826_inLine +BABEL_OP3_401_42834_20141103_204826_outLine +BABEL_OP3_401_43646_20140917_164218_inLine +BABEL_OP3_401_43646_20140917_164218_outLine +BABEL_OP3_401_45560_20140930_190100_inLine +BABEL_OP3_401_45560_20140930_190100_outLine +BABEL_OP3_401_46881_20140922_175212_inLine +BABEL_OP3_401_46881_20140922_175212_outLine +BABEL_OP3_401_47283_20141006_193958_inLine +BABEL_OP3_401_47283_20141006_193958_outLine +BABEL_OP3_401_51407_20141027_182114_inLine +BABEL_OP3_401_51407_20141027_182114_outLine +BABEL_OP3_401_52725_20141123_224942_inLine +BABEL_OP3_401_52725_20141123_224942_outLine +BABEL_OP3_401_52818_20140922_184227_inLine +BABEL_OP3_401_52818_20140922_184227_outLine +BABEL_OP3_401_54162_20141107_221207_inLine +BABEL_OP3_401_54162_20141107_221207_outLine +BABEL_OP3_401_56677_20141020_160804_inLine +BABEL_OP3_401_56677_20141020_160804_outLine +BABEL_OP3_401_57065_20140924_135508_inLine +BABEL_OP3_401_57065_20140924_135508_outLine +BABEL_OP3_401_60310_20141017_165419_inLine +BABEL_OP3_401_60310_20141017_165419_outLine +BABEL_OP3_401_63906_20141124_212323_inLine +BABEL_OP3_401_63906_20141124_212323_outLine +BABEL_OP3_401_64398_20140922_165727_inLine +BABEL_OP3_401_64398_20140922_165727_outLine +BABEL_OP3_401_73022_20141111_173204_inLine +BABEL_OP3_401_73022_20141111_173204_outLine +BABEL_OP3_401_74921_20140924_165937_inLine +BABEL_OP3_401_74921_20140924_165937_outLine +BABEL_OP3_401_77744_20141014_125609_inLine +BABEL_OP3_401_77744_20141014_125609_outLine +BABEL_OP3_401_77744_20141014_140124_inLine +BABEL_OP3_401_77744_20141014_140124_outLine +BABEL_OP3_401_79167_20140925_132420_inLine +BABEL_OP3_401_79167_20140925_132420_outLine +BABEL_OP3_401_81287_20141001_145404_inLine +BABEL_OP3_401_81287_20141001_145404_outLine +BABEL_OP3_401_82224_20141111_175445_inLine +BABEL_OP3_401_82224_20141111_175445_outLine +BABEL_OP3_401_87073_20140917_201716_inLine +BABEL_OP3_401_87073_20140917_201716_outLine +BABEL_OP3_401_91977_20140925_184203_inLine +BABEL_OP3_401_91977_20140925_184203_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list new file mode 100644 index 00000000000..550224d6e16 --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list @@ -0,0 +1,392 @@ +BABEL_OP3_401_10524_20141119_213218_inLine +BABEL_OP3_401_10524_20141119_213218_outLine +BABEL_OP3_401_10647_20141119_154922_inLine +BABEL_OP3_401_10647_20141119_154922_outLine +BABEL_OP3_401_10901_20141021_124158_inLine +BABEL_OP3_401_10901_20141021_124158_outLine +BABEL_OP3_401_10966_20141019_192715_inLine +BABEL_OP3_401_10966_20141019_192715_outLine +BABEL_OP3_401_11581_20141030_214939_inLine +BABEL_OP3_401_11581_20141030_214939_outLine +BABEL_OP3_401_11663_20141105_143103_inLine +BABEL_OP3_401_11663_20141105_143103_outLine +BABEL_OP3_401_11673_20140917_163413_inLine +BABEL_OP3_401_11673_20140917_163413_outLine +BABEL_OP3_401_11797_20140929_205622_inLine +BABEL_OP3_401_11797_20140929_205622_outLine +BABEL_OP3_401_12036_20141002_134817_inLine +BABEL_OP3_401_12036_20141002_134817_outLine +BABEL_OP3_401_12242_20141014_143019_inLine +BABEL_OP3_401_12242_20141014_143019_outLine +BABEL_OP3_401_12635_20141117_185400_inLine +BABEL_OP3_401_12635_20141117_185400_outLine +BABEL_OP3_401_12767_20141001_130658_inLine +BABEL_OP3_401_12767_20141001_130658_outLine +BABEL_OP3_401_12851_20140919_135242_inLine +BABEL_OP3_401_12851_20140919_135242_outLine +BABEL_OP3_401_13184_20141110_163330_inLine +BABEL_OP3_401_13184_20141110_163330_outLine +BABEL_OP3_401_13184_20141110_163902_inLine +BABEL_OP3_401_13184_20141110_163902_outLine +BABEL_OP3_401_13490_20141110_152643_inLine +BABEL_OP3_401_13490_20141110_152643_outLine +BABEL_OP3_401_13561_20141027_154606_inLine +BABEL_OP3_401_13561_20141027_154606_outLine +BABEL_OP3_401_13664_20140922_131741_inLine +BABEL_OP3_401_13664_20140922_131741_outLine +BABEL_OP3_401_13709_20141118_170840_inLine +BABEL_OP3_401_13709_20141118_170840_outLine +BABEL_OP3_401_13744_20140919_122844_inLine +BABEL_OP3_401_13744_20140919_122844_outLine +BABEL_OP3_401_14719_20141017_215720_inLine +BABEL_OP3_401_14719_20141017_215720_outLine +BABEL_OP3_401_14725_20140929_155627_inLine +BABEL_OP3_401_14725_20140929_155627_outLine +BABEL_OP3_401_14807_20141030_232134_inLine +BABEL_OP3_401_14807_20141030_232134_outLine +BABEL_OP3_401_14814_20141014_184415_inLine +BABEL_OP3_401_14814_20141014_184415_outLine +BABEL_OP3_401_14972_20141028_200051_inLine +BABEL_OP3_401_14972_20141028_200051_outLine +BABEL_OP3_401_15702_20140923_180447_inLine +BABEL_OP3_401_15702_20140923_180447_outLine +BABEL_OP3_401_15730_20140924_135900_inLine +BABEL_OP3_401_15730_20140924_135900_outLine +BABEL_OP3_401_16749_20141112_193028_inLine +BABEL_OP3_401_16749_20141112_193028_outLine +BABEL_OP3_401_16839_20141110_174923_inLine +BABEL_OP3_401_16839_20141110_174923_outLine +BABEL_OP3_401_16886_20141017_152623_inLine +BABEL_OP3_401_16886_20141017_152623_outLine +BABEL_OP3_401_16924_20140923_164321_inLine +BABEL_OP3_401_16924_20140923_164321_outLine +BABEL_OP3_401_17320_20141125_170435_inLine +BABEL_OP3_401_17320_20141125_170435_outLine +BABEL_OP3_401_17420_20141118_190621_inLine +BABEL_OP3_401_17420_20141118_190621_outLine +BABEL_OP3_401_17615_20140924_144400_inLine +BABEL_OP3_401_17615_20140924_144400_outLine +BABEL_OP3_401_18078_20141113_162556_inLine +BABEL_OP3_401_18078_20141113_162556_outLine +BABEL_OP3_401_18380_20141023_154240_inLine +BABEL_OP3_401_18380_20141023_154240_outLine +BABEL_OP3_401_18566_20141120_004140_inLine +BABEL_OP3_401_18566_20141120_004140_outLine +BABEL_OP3_401_18924_20141030_205111_inLine +BABEL_OP3_401_18924_20141030_205111_outLine +BABEL_OP3_401_18939_20141001_200418_inLine +BABEL_OP3_401_18939_20141001_200418_outLine +BABEL_OP3_401_19134_20141030_191814_inLine +BABEL_OP3_401_19134_20141030_191814_outLine +BABEL_OP3_401_19134_20141030_192931_inLine +BABEL_OP3_401_19134_20141030_192931_outLine +BABEL_OP3_401_19773_20141101_211403_inLine +BABEL_OP3_401_19773_20141101_211403_outLine +BABEL_OP3_401_19818_20141103_184746_inLine +BABEL_OP3_401_19818_20141103_184746_outLine +BABEL_OP3_401_19818_20141103_185728_inLine +BABEL_OP3_401_19818_20141103_185728_outLine +BABEL_OP3_401_20133_20140919_173858_inLine +BABEL_OP3_401_20133_20140919_173858_outLine +BABEL_OP3_401_20922_20141110_190444_inLine +BABEL_OP3_401_20922_20141110_190444_outLine +BABEL_OP3_401_21206_20141003_120941_inLine +BABEL_OP3_401_21206_20141003_120941_outLine +BABEL_OP3_401_21206_20141003_122457_inLine +BABEL_OP3_401_21206_20141003_122457_outLine +BABEL_OP3_401_21327_20141020_204038_inLine +BABEL_OP3_401_21327_20141020_204038_outLine +BABEL_OP3_401_21807_20141029_214508_inLine +BABEL_OP3_401_21807_20141029_214508_outLine +BABEL_OP3_401_22446_20140929_133647_inLine +BABEL_OP3_401_22446_20140929_133647_outLine +BABEL_OP3_401_22624_20141001_141008_inLine +BABEL_OP3_401_22624_20141001_141008_outLine +BABEL_OP3_401_22918_20141114_145920_inLine +BABEL_OP3_401_22918_20141114_145920_outLine +BABEL_OP3_401_23006_20141014_190149_inLine +BABEL_OP3_401_23006_20141014_190149_outLine +BABEL_OP3_401_23046_20141014_150823_inLine +BABEL_OP3_401_23046_20141014_150823_outLine +BABEL_OP3_401_23092_20141010_141138_inLine +BABEL_OP3_401_23092_20141010_141138_outLine +BABEL_OP3_401_23153_20141018_201630_inLine +BABEL_OP3_401_23153_20141018_201630_outLine +BABEL_OP3_401_23980_20141018_192714_inLine +BABEL_OP3_401_23980_20141018_192714_outLine +BABEL_OP3_401_24270_20141030_195323_inLine +BABEL_OP3_401_24270_20141030_195323_outLine +BABEL_OP3_401_24569_20141016_182323_inLine +BABEL_OP3_401_24569_20141016_182323_outLine +BABEL_OP3_401_24586_20141117_160948_inLine +BABEL_OP3_401_24586_20141117_160948_outLine +BABEL_OP3_401_24590_20141017_175757_inLine +BABEL_OP3_401_24590_20141017_175757_outLine +BABEL_OP3_401_24679_20140919_185323_inLine +BABEL_OP3_401_24679_20140919_185323_outLine +BABEL_OP3_401_24982_20141008_150245_inLine +BABEL_OP3_401_24982_20141008_150245_outLine +BABEL_OP3_401_25412_20141031_171749_inLine +BABEL_OP3_401_25412_20141031_171749_outLine +BABEL_OP3_401_25719_20141110_191042_inLine +BABEL_OP3_401_25719_20141110_191042_outLine +BABEL_OP3_401_26507_20141118_210109_inLine +BABEL_OP3_401_26507_20141118_210109_outLine +BABEL_OP3_401_27203_20141030_164916_inLine +BABEL_OP3_401_27203_20141030_164916_outLine +BABEL_OP3_401_28522_20140927_172947_inLine +BABEL_OP3_401_28522_20140927_172947_outLine +BABEL_OP3_401_28600_20141021_194818_inLine +BABEL_OP3_401_28600_20141021_194818_outLine +BABEL_OP3_401_28814_20141112_190902_inLine +BABEL_OP3_401_28814_20141112_190902_outLine +BABEL_OP3_401_29021_20141118_205619_inLine +BABEL_OP3_401_29021_20141118_205619_outLine +BABEL_OP3_401_29323_20141113_190829_inLine +BABEL_OP3_401_29323_20141113_190829_outLine +BABEL_OP3_401_30013_20140927_141830_inLine +BABEL_OP3_401_30013_20140927_141830_outLine +BABEL_OP3_401_30058_20141118_221622_inLine +BABEL_OP3_401_30058_20141118_221622_outLine +BABEL_OP3_401_31346_20141103_145401_inLine +BABEL_OP3_401_31346_20141103_145401_outLine +BABEL_OP3_401_31992_20141001_135942_inLine +BABEL_OP3_401_31992_20141001_135942_outLine +BABEL_OP3_401_32122_20141016_212210_inLine +BABEL_OP3_401_32122_20141016_212210_outLine +BABEL_OP3_401_32328_20141018_200856_inLine +BABEL_OP3_401_32328_20141018_200856_outLine +BABEL_OP3_401_33273_20141021_153659_inLine +BABEL_OP3_401_33273_20141021_153659_outLine +BABEL_OP3_401_33355_20141001_174510_inLine +BABEL_OP3_401_33355_20141001_174510_outLine +BABEL_OP3_401_33497_20141106_201923_inLine +BABEL_OP3_401_33497_20141106_201923_outLine +BABEL_OP3_401_33913_20141020_135517_inLine +BABEL_OP3_401_33913_20141020_135517_outLine +BABEL_OP3_401_34197_20140919_193654_inLine +BABEL_OP3_401_34197_20140919_193654_outLine +BABEL_OP3_401_34328_20141020_142248_inLine +BABEL_OP3_401_34328_20141020_142248_outLine +BABEL_OP3_401_34679_20141006_155637_inLine +BABEL_OP3_401_34679_20141006_155637_outLine +BABEL_OP3_401_35139_20141002_182038_inLine +BABEL_OP3_401_35139_20141002_182038_outLine +BABEL_OP3_401_35467_20140919_155737_inLine +BABEL_OP3_401_35467_20140919_155737_outLine +BABEL_OP3_401_35467_20140919_162819_inLine +BABEL_OP3_401_35467_20140919_162819_outLine +BABEL_OP3_401_36894_20140921_162105_inLine +BABEL_OP3_401_36894_20140921_162105_outLine +BABEL_OP3_401_37285_20140929_192149_inLine +BABEL_OP3_401_37285_20140929_192149_outLine +BABEL_OP3_401_37290_20141031_174340_inLine +BABEL_OP3_401_37290_20141031_174340_outLine +BABEL_OP3_401_37598_20141031_155805_inLine +BABEL_OP3_401_37598_20141031_155805_outLine +BABEL_OP3_401_38340_20141003_182953_inLine +BABEL_OP3_401_38340_20141003_182953_outLine +BABEL_OP3_401_39307_20140922_113434_inLine +BABEL_OP3_401_39307_20140922_113434_outLine +BABEL_OP3_401_39426_20141114_165136_inLine +BABEL_OP3_401_39426_20141114_165136_outLine +BABEL_OP3_401_39920_20141118_215327_inLine +BABEL_OP3_401_39920_20141118_215327_outLine +BABEL_OP3_401_40557_20141014_182351_inLine +BABEL_OP3_401_40557_20141014_182351_outLine +BABEL_OP3_401_41592_20141020_140853_inLine +BABEL_OP3_401_41592_20141020_140853_outLine +BABEL_OP3_401_41598_20141113_151053_inLine +BABEL_OP3_401_41598_20141113_151053_outLine +BABEL_OP3_401_42029_20141113_160852_inLine +BABEL_OP3_401_42029_20141113_160852_outLine +BABEL_OP3_401_42155_20141028_185638_inLine +BABEL_OP3_401_42155_20141028_185638_outLine +BABEL_OP3_401_42434_20141019_233012_inLine +BABEL_OP3_401_42434_20141019_233012_outLine +BABEL_OP3_401_42497_20141002_144745_inLine +BABEL_OP3_401_42497_20141002_144745_outLine +BABEL_OP3_401_42771_20141028_135131_inLine +BABEL_OP3_401_42771_20141028_135131_outLine +BABEL_OP3_401_42942_20141018_160034_inLine +BABEL_OP3_401_42942_20141018_160034_outLine +BABEL_OP3_401_43286_20140923_144213_inLine +BABEL_OP3_401_43286_20140923_144213_outLine +BABEL_OP3_401_43784_20141008_215339_inLine +BABEL_OP3_401_43784_20141008_215339_outLine +BABEL_OP3_401_43788_20140925_172756_inLine +BABEL_OP3_401_43788_20140925_172756_outLine +BABEL_OP3_401_45201_20141127_132656_inLine +BABEL_OP3_401_45201_20141127_132656_outLine +BABEL_OP3_401_46261_20141021_185026_inLine +BABEL_OP3_401_46261_20141021_185026_outLine +BABEL_OP3_401_46310_20140930_153138_inLine +BABEL_OP3_401_46310_20140930_153138_outLine +BABEL_OP3_401_46550_20141006_181152_inLine +BABEL_OP3_401_46550_20141006_181152_outLine +BABEL_OP3_401_46558_20140924_164642_inLine +BABEL_OP3_401_46558_20140924_164642_outLine +BABEL_OP3_401_46589_20140924_191634_inLine +BABEL_OP3_401_46589_20140924_191634_outLine +BABEL_OP3_401_46681_20141002_163836_inLine +BABEL_OP3_401_46681_20141002_163836_outLine +BABEL_OP3_401_46688_20141001_201358_inLine +BABEL_OP3_401_46688_20141001_201358_outLine +BABEL_OP3_401_46770_20141111_221929_inLine +BABEL_OP3_401_46770_20141111_221929_outLine +BABEL_OP3_401_47487_20141016_162401_inLine +BABEL_OP3_401_47487_20141016_162401_outLine +BABEL_OP3_401_47866_20141124_164427_inLine +BABEL_OP3_401_47866_20141124_164427_outLine +BABEL_OP3_401_47878_20141030_173221_inLine +BABEL_OP3_401_47878_20141030_173221_outLine +BABEL_OP3_401_48243_20141006_175215_inLine +BABEL_OP3_401_48243_20141006_175215_outLine +BABEL_OP3_401_48610_20141001_225254_inLine +BABEL_OP3_401_48610_20141001_225254_outLine +BABEL_OP3_401_49001_20141014_165716_inLine +BABEL_OP3_401_49001_20141014_165716_outLine +BABEL_OP3_401_49306_20141124_193818_inLine +BABEL_OP3_401_49306_20141124_193818_outLine +BABEL_OP3_401_50427_20141028_152244_inLine +BABEL_OP3_401_50427_20141028_152244_outLine +BABEL_OP3_401_51968_20141019_151724_inLine +BABEL_OP3_401_51968_20141019_151724_outLine +BABEL_OP3_401_52404_20140924_182534_inLine +BABEL_OP3_401_52404_20140924_182534_outLine +BABEL_OP3_401_53957_20141020_142913_inLine +BABEL_OP3_401_53957_20141020_142913_outLine +BABEL_OP3_401_54074_20141021_142528_inLine +BABEL_OP3_401_54074_20141021_142528_outLine +BABEL_OP3_401_56331_20141124_184702_inLine +BABEL_OP3_401_56331_20141124_184702_outLine +BABEL_OP3_401_57529_20141017_181551_inLine +BABEL_OP3_401_57529_20141017_181551_outLine +BABEL_OP3_401_57542_20141122_182629_inLine +BABEL_OP3_401_57542_20141122_182629_outLine +BABEL_OP3_401_58006_20141124_153854_inLine +BABEL_OP3_401_58006_20141124_153854_outLine +BABEL_OP3_401_58006_20141124_155107_inLine +BABEL_OP3_401_58006_20141124_155107_outLine +BABEL_OP3_401_58734_20140930_173126_inLine +BABEL_OP3_401_58734_20140930_173126_outLine +BABEL_OP3_401_58821_20140930_211254_inLine +BABEL_OP3_401_58821_20140930_211254_outLine +BABEL_OP3_401_59078_20141030_203852_inLine +BABEL_OP3_401_59078_20141030_203852_outLine +BABEL_OP3_401_59078_20141030_205139_inLine +BABEL_OP3_401_59078_20141030_205139_outLine +BABEL_OP3_401_60026_20141002_115024_inLine +BABEL_OP3_401_60026_20141002_115024_outLine +BABEL_OP3_401_60474_20141015_154855_inLine +BABEL_OP3_401_60474_20141015_154855_outLine +BABEL_OP3_401_65077_20140917_151315_inLine +BABEL_OP3_401_65077_20140917_151315_outLine +BABEL_OP3_401_65367_20141111_163221_inLine +BABEL_OP3_401_65367_20141111_163221_outLine +BABEL_OP3_401_66472_20141027_173935_inLine +BABEL_OP3_401_66472_20141027_173935_outLine +BABEL_OP3_401_68068_20140925_140055_inLine +BABEL_OP3_401_68068_20140925_140055_outLine +BABEL_OP3_401_68384_20141020_225435_inLine +BABEL_OP3_401_68384_20141020_225435_outLine +BABEL_OP3_401_68385_20140919_175351_inLine +BABEL_OP3_401_68385_20140919_175351_outLine +BABEL_OP3_401_68748_20140925_160756_inLine +BABEL_OP3_401_68748_20140925_160756_outLine +BABEL_OP3_401_69474_20140930_190551_inLine +BABEL_OP3_401_69474_20140930_190551_outLine +BABEL_OP3_401_69636_20140924_174446_inLine +BABEL_OP3_401_69636_20140924_174446_outLine +BABEL_OP3_401_71566_20141001_171842_inLine +BABEL_OP3_401_71566_20141001_171842_outLine +BABEL_OP3_401_72040_20141009_171306_inLine +BABEL_OP3_401_72040_20141009_171306_outLine +BABEL_OP3_401_72110_20141001_122146_inLine +BABEL_OP3_401_72110_20141001_122146_outLine +BABEL_OP3_401_72844_20140919_154733_inLine +BABEL_OP3_401_72844_20140919_154733_outLine +BABEL_OP3_401_72844_20140919_162600_inLine +BABEL_OP3_401_72844_20140919_162600_outLine +BABEL_OP3_401_73430_20140930_142250_inLine +BABEL_OP3_401_73430_20140930_142250_outLine +BABEL_OP3_401_73591_20140904_190044_inLine +BABEL_OP3_401_73591_20140904_190044_outLine +BABEL_OP3_401_74667_20141017_173017_inLine +BABEL_OP3_401_74667_20141017_173017_outLine +BABEL_OP3_401_74799_20141030_203910_inLine +BABEL_OP3_401_74799_20141030_203910_outLine +BABEL_OP3_401_75505_20140917_155231_inLine +BABEL_OP3_401_75505_20140917_155231_outLine +BABEL_OP3_401_76126_20141018_171804_inLine +BABEL_OP3_401_76126_20141018_171804_outLine +BABEL_OP3_401_76437_20140904_161741_inLine +BABEL_OP3_401_76437_20140904_161741_outLine +BABEL_OP3_401_76444_20141014_203500_inLine +BABEL_OP3_401_76444_20141014_203500_outLine +BABEL_OP3_401_76499_20141022_151625_inLine +BABEL_OP3_401_76499_20141022_151625_outLine +BABEL_OP3_401_78482_20141104_155857_inLine +BABEL_OP3_401_78482_20141104_155857_outLine +BABEL_OP3_401_79080_20141112_120644_inLine +BABEL_OP3_401_79080_20141112_120644_outLine +BABEL_OP3_401_79131_20141125_193444_inLine +BABEL_OP3_401_79131_20141125_193444_outLine +BABEL_OP3_401_79995_20141020_232746_inLine +BABEL_OP3_401_79995_20141020_232746_outLine +BABEL_OP3_401_80136_20141112_134414_inLine +BABEL_OP3_401_80136_20141112_134414_outLine +BABEL_OP3_401_80306_20141110_184642_inLine +BABEL_OP3_401_80306_20141110_184642_outLine +BABEL_OP3_401_80439_20141015_141847_inLine +BABEL_OP3_401_80439_20141015_141847_outLine +BABEL_OP3_401_80559_20141003_131820_inLine +BABEL_OP3_401_80559_20141003_131820_outLine +BABEL_OP3_401_81433_20141027_184533_inLine +BABEL_OP3_401_81433_20141027_184533_outLine +BABEL_OP3_401_81622_20141021_162012_inLine +BABEL_OP3_401_81622_20141021_162012_outLine +BABEL_OP3_401_82035_20141030_173356_inLine +BABEL_OP3_401_82035_20141030_173356_outLine +BABEL_OP3_401_82035_20141030_174442_inLine +BABEL_OP3_401_82035_20141030_174442_outLine +BABEL_OP3_401_84547_20140917_192745_inLine +BABEL_OP3_401_84547_20140917_192745_outLine +BABEL_OP3_401_84547_20140917_194346_inLine +BABEL_OP3_401_84547_20140917_194346_outLine +BABEL_OP3_401_86676_20140924_200749_inLine +BABEL_OP3_401_86676_20140924_200749_outLine +BABEL_OP3_401_87871_20141018_185934_inLine +BABEL_OP3_401_87871_20141018_185934_outLine +BABEL_OP3_401_87921_20141010_173551_inLine +BABEL_OP3_401_87921_20141010_173551_outLine +BABEL_OP3_401_88783_20141022_171250_inLine +BABEL_OP3_401_88783_20141022_171250_outLine +BABEL_OP3_401_90737_20141020_180826_inLine +BABEL_OP3_401_90737_20141020_180826_outLine +BABEL_OP3_401_91891_20141001_130023_inLine +BABEL_OP3_401_91891_20141001_130023_outLine +BABEL_OP3_401_92065_20141017_191557_inLine +BABEL_OP3_401_92065_20141017_191557_outLine +BABEL_OP3_401_92736_20141017_194915_inLine +BABEL_OP3_401_92736_20141017_194915_outLine +BABEL_OP3_401_92740_20140926_150615_inLine +BABEL_OP3_401_92740_20140926_150615_outLine +BABEL_OP3_401_93490_20141106_171428_inLine +BABEL_OP3_401_93490_20141106_171428_outLine +BABEL_OP3_401_94745_20140923_154933_inLine +BABEL_OP3_401_94745_20140923_154933_outLine +BABEL_OP3_401_95077_20141010_153959_inLine +BABEL_OP3_401_95077_20141010_153959_outLine +BABEL_OP3_401_95294_20140923_173007_inLine +BABEL_OP3_401_95294_20140923_173007_outLine +BABEL_OP3_401_95446_20141112_154248_inLine +BABEL_OP3_401_95446_20141112_154248_outLine +BABEL_OP3_401_95663_20140917_182410_inLine +BABEL_OP3_401_95663_20140917_182410_outLine +BABEL_OP3_401_96324_20140921_170922_inLine +BABEL_OP3_401_96324_20140921_170922_outLine +BABEL_OP3_401_97376_20140929_154000_inLine +BABEL_OP3_401_97376_20140929_154000_outLine +BABEL_OP3_401_97772_20140917_144539_inLine +BABEL_OP3_401_97772_20140917_144539_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/training.list b/egs/babel/s5d/conf/lists/401-mongolian/training.list new file mode 100644 index 00000000000..ebad291922b --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/training.list @@ -0,0 +1,518 @@ +BABEL_OP3_401_10524_20141119_213218_inLine +BABEL_OP3_401_10524_20141119_213218_outLine +BABEL_OP3_401_10647_20141119_154922_inLine +BABEL_OP3_401_10647_20141119_154922_outLine +BABEL_OP3_401_10901_20141021_124158_inLine +BABEL_OP3_401_10901_20141021_124158_outLine +BABEL_OP3_401_10966_20141019_192715_inLine +BABEL_OP3_401_10966_20141019_192715_outLine +BABEL_OP3_401_11581_20141030_214939_inLine +BABEL_OP3_401_11581_20141030_214939_outLine +BABEL_OP3_401_11663_20141105_143103_inLine +BABEL_OP3_401_11663_20141105_143103_outLine +BABEL_OP3_401_11673_20140917_163413_inLine +BABEL_OP3_401_11673_20140917_163413_outLine +BABEL_OP3_401_11797_20140929_205622_inLine +BABEL_OP3_401_11797_20140929_205622_outLine +BABEL_OP3_401_12036_20141002_134817_inLine +BABEL_OP3_401_12036_20141002_134817_outLine +BABEL_OP3_401_12242_20141014_143019_inLine +BABEL_OP3_401_12242_20141014_143019_outLine +BABEL_OP3_401_12635_20141117_185400_inLine +BABEL_OP3_401_12635_20141117_185400_outLine +BABEL_OP3_401_12767_20141001_130658_inLine +BABEL_OP3_401_12767_20141001_130658_outLine +BABEL_OP3_401_12851_20140919_135242_inLine +BABEL_OP3_401_12851_20140919_135242_outLine +BABEL_OP3_401_13030_20141015_163112_inLine +BABEL_OP3_401_13030_20141015_163112_outLine +BABEL_OP3_401_13184_20141110_163330_inLine +BABEL_OP3_401_13184_20141110_163330_outLine +BABEL_OP3_401_13184_20141110_163902_inLine +BABEL_OP3_401_13184_20141110_163902_outLine +BABEL_OP3_401_13324_20141002_165637_inLine +BABEL_OP3_401_13324_20141002_165637_outLine +BABEL_OP3_401_13490_20141110_152643_inLine +BABEL_OP3_401_13490_20141110_152643_outLine +BABEL_OP3_401_13561_20141027_154606_inLine +BABEL_OP3_401_13561_20141027_154606_outLine +BABEL_OP3_401_13586_20141023_193242_inLine +BABEL_OP3_401_13586_20141023_193242_outLine +BABEL_OP3_401_13664_20140922_131741_inLine +BABEL_OP3_401_13664_20140922_131741_outLine +BABEL_OP3_401_13709_20141118_170840_inLine +BABEL_OP3_401_13709_20141118_170840_outLine +BABEL_OP3_401_13744_20140919_122844_inLine +BABEL_OP3_401_13744_20140919_122844_outLine +BABEL_OP3_401_14560_20140922_140509_inLine +BABEL_OP3_401_14560_20140922_140509_outLine +BABEL_OP3_401_14719_20141017_215720_inLine +BABEL_OP3_401_14719_20141017_215720_outLine +BABEL_OP3_401_14725_20140929_155627_inLine +BABEL_OP3_401_14725_20140929_155627_outLine +BABEL_OP3_401_14807_20141030_232134_inLine +BABEL_OP3_401_14807_20141030_232134_outLine +BABEL_OP3_401_14814_20141014_184415_inLine +BABEL_OP3_401_14814_20141014_184415_outLine +BABEL_OP3_401_14972_20141028_200051_inLine +BABEL_OP3_401_14972_20141028_200051_outLine +BABEL_OP3_401_15702_20140923_180447_inLine +BABEL_OP3_401_15702_20140923_180447_outLine +BABEL_OP3_401_15730_20140924_135900_inLine +BABEL_OP3_401_15730_20140924_135900_outLine +BABEL_OP3_401_15902_20140930_144526_inLine +BABEL_OP3_401_15902_20140930_144526_outLine +BABEL_OP3_401_16475_20141016_143941_inLine +BABEL_OP3_401_16475_20141016_143941_outLine +BABEL_OP3_401_16749_20141112_193028_inLine +BABEL_OP3_401_16749_20141112_193028_outLine +BABEL_OP3_401_16839_20141110_174923_inLine +BABEL_OP3_401_16839_20141110_174923_outLine +BABEL_OP3_401_16886_20141017_152623_inLine +BABEL_OP3_401_16886_20141017_152623_outLine +BABEL_OP3_401_16924_20140923_164321_inLine +BABEL_OP3_401_16924_20140923_164321_outLine +BABEL_OP3_401_17320_20141125_170435_inLine +BABEL_OP3_401_17320_20141125_170435_outLine +BABEL_OP3_401_17420_20141118_190621_inLine +BABEL_OP3_401_17420_20141118_190621_outLine +BABEL_OP3_401_17567_20141023_213629_inLine +BABEL_OP3_401_17567_20141023_213629_outLine +BABEL_OP3_401_17615_20140924_144400_inLine +BABEL_OP3_401_17615_20140924_144400_outLine +BABEL_OP3_401_17923_20141002_172711_inLine +BABEL_OP3_401_17923_20141002_172711_outLine +BABEL_OP3_401_18078_20141113_162556_inLine +BABEL_OP3_401_18078_20141113_162556_outLine +BABEL_OP3_401_18380_20141023_154240_inLine +BABEL_OP3_401_18380_20141023_154240_outLine +BABEL_OP3_401_18566_20141120_004140_inLine +BABEL_OP3_401_18566_20141120_004140_outLine +BABEL_OP3_401_18924_20141030_205111_inLine +BABEL_OP3_401_18924_20141030_205111_outLine +BABEL_OP3_401_18939_20141001_200418_inLine +BABEL_OP3_401_18939_20141001_200418_outLine +BABEL_OP3_401_19101_20141029_183652_inLine +BABEL_OP3_401_19101_20141029_183652_outLine +BABEL_OP3_401_19134_20141030_191814_inLine +BABEL_OP3_401_19134_20141030_191814_outLine +BABEL_OP3_401_19134_20141030_192931_inLine +BABEL_OP3_401_19134_20141030_192931_outLine +BABEL_OP3_401_19722_20140930_200553_inLine +BABEL_OP3_401_19722_20140930_200553_outLine +BABEL_OP3_401_19773_20141101_211403_inLine +BABEL_OP3_401_19773_20141101_211403_outLine +BABEL_OP3_401_19818_20141103_184746_inLine +BABEL_OP3_401_19818_20141103_184746_outLine +BABEL_OP3_401_19818_20141103_185728_inLine +BABEL_OP3_401_19818_20141103_185728_outLine +BABEL_OP3_401_20133_20140919_173858_inLine +BABEL_OP3_401_20133_20140919_173858_outLine +BABEL_OP3_401_20916_20140922_174215_inLine +BABEL_OP3_401_20916_20140922_174215_outLine +BABEL_OP3_401_20922_20141110_190444_inLine +BABEL_OP3_401_20922_20141110_190444_outLine +BABEL_OP3_401_21206_20141003_120941_inLine +BABEL_OP3_401_21206_20141003_120941_outLine +BABEL_OP3_401_21206_20141003_122457_inLine +BABEL_OP3_401_21206_20141003_122457_outLine +BABEL_OP3_401_21327_20141020_204038_inLine +BABEL_OP3_401_21327_20141020_204038_outLine +BABEL_OP3_401_21807_20141029_214508_inLine +BABEL_OP3_401_21807_20141029_214508_outLine +BABEL_OP3_401_22321_20140929_180456_inLine +BABEL_OP3_401_22321_20140929_180456_outLine +BABEL_OP3_401_22446_20140929_133647_inLine +BABEL_OP3_401_22446_20140929_133647_outLine +BABEL_OP3_401_22624_20141001_141008_inLine +BABEL_OP3_401_22624_20141001_141008_outLine +BABEL_OP3_401_22918_20141114_145920_inLine +BABEL_OP3_401_22918_20141114_145920_outLine +BABEL_OP3_401_23006_20141014_190149_inLine +BABEL_OP3_401_23006_20141014_190149_outLine +BABEL_OP3_401_23046_20141014_150823_inLine +BABEL_OP3_401_23046_20141014_150823_outLine +BABEL_OP3_401_23092_20141010_141138_inLine +BABEL_OP3_401_23092_20141010_141138_outLine +BABEL_OP3_401_23153_20141018_201630_inLine +BABEL_OP3_401_23153_20141018_201630_outLine +BABEL_OP3_401_23893_20141125_213344_inLine +BABEL_OP3_401_23893_20141125_213344_outLine +BABEL_OP3_401_23980_20141018_192714_inLine +BABEL_OP3_401_23980_20141018_192714_outLine +BABEL_OP3_401_24270_20141030_195323_inLine +BABEL_OP3_401_24270_20141030_195323_outLine +BABEL_OP3_401_24290_20141124_184351_inLine +BABEL_OP3_401_24290_20141124_184351_outLine +BABEL_OP3_401_24323_20141017_151036_inLine +BABEL_OP3_401_24323_20141017_151036_outLine +BABEL_OP3_401_24470_20141029_145653_inLine +BABEL_OP3_401_24470_20141029_145653_outLine +BABEL_OP3_401_24569_20141016_182323_inLine +BABEL_OP3_401_24569_20141016_182323_outLine +BABEL_OP3_401_24586_20141117_160948_inLine +BABEL_OP3_401_24586_20141117_160948_outLine +BABEL_OP3_401_24590_20141017_175757_inLine +BABEL_OP3_401_24590_20141017_175757_outLine +BABEL_OP3_401_24605_20141001_142727_inLine +BABEL_OP3_401_24605_20141001_142727_outLine +BABEL_OP3_401_24679_20140919_185323_inLine +BABEL_OP3_401_24679_20140919_185323_outLine +BABEL_OP3_401_24982_20141008_150245_inLine +BABEL_OP3_401_24982_20141008_150245_outLine +BABEL_OP3_401_25412_20141031_171749_inLine +BABEL_OP3_401_25412_20141031_171749_outLine +BABEL_OP3_401_25719_20141110_191042_inLine +BABEL_OP3_401_25719_20141110_191042_outLine +BABEL_OP3_401_25961_20140929_183632_inLine +BABEL_OP3_401_25961_20140929_183632_outLine +BABEL_OP3_401_26072_20141112_173131_inLine +BABEL_OP3_401_26072_20141112_173131_outLine +BABEL_OP3_401_26398_20141204_001557_inLine +BABEL_OP3_401_26398_20141204_001557_outLine +BABEL_OP3_401_26507_20141118_210109_inLine +BABEL_OP3_401_26507_20141118_210109_outLine +BABEL_OP3_401_26574_20141103_163656_inLine +BABEL_OP3_401_26574_20141103_163656_outLine +BABEL_OP3_401_26999_20141101_213851_inLine +BABEL_OP3_401_26999_20141101_213851_outLine +BABEL_OP3_401_27042_20141017_184608_inLine +BABEL_OP3_401_27042_20141017_184608_outLine +BABEL_OP3_401_27203_20141030_164916_inLine +BABEL_OP3_401_27203_20141030_164916_outLine +BABEL_OP3_401_27841_20141113_200006_inLine +BABEL_OP3_401_27841_20141113_200006_outLine +BABEL_OP3_401_28522_20140927_172947_inLine +BABEL_OP3_401_28522_20140927_172947_outLine +BABEL_OP3_401_28600_20141021_194818_inLine +BABEL_OP3_401_28600_20141021_194818_outLine +BABEL_OP3_401_28775_20141003_162126_inLine +BABEL_OP3_401_28775_20141003_162126_outLine +BABEL_OP3_401_28814_20141112_190902_inLine +BABEL_OP3_401_28814_20141112_190902_outLine +BABEL_OP3_401_29021_20141118_205619_inLine +BABEL_OP3_401_29021_20141118_205619_outLine +BABEL_OP3_401_29076_20141031_003943_inLine +BABEL_OP3_401_29076_20141031_003943_outLine +BABEL_OP3_401_29323_20141113_190829_inLine +BABEL_OP3_401_29323_20141113_190829_outLine +BABEL_OP3_401_29404_20141121_153054_inLine +BABEL_OP3_401_29404_20141121_153054_outLine +BABEL_OP3_401_29685_20141019_210404_inLine +BABEL_OP3_401_29685_20141019_210404_outLine +BABEL_OP3_401_29685_20141019_210959_inLine +BABEL_OP3_401_29685_20141019_210959_outLine +BABEL_OP3_401_30013_20140927_141830_inLine +BABEL_OP3_401_30013_20140927_141830_outLine +BABEL_OP3_401_30058_20141118_221622_inLine +BABEL_OP3_401_30058_20141118_221622_outLine +BABEL_OP3_401_31346_20141103_145401_inLine +BABEL_OP3_401_31346_20141103_145401_outLine +BABEL_OP3_401_31624_20141003_192655_inLine +BABEL_OP3_401_31624_20141003_192655_outLine +BABEL_OP3_401_31628_20140923_145349_inLine +BABEL_OP3_401_31628_20140923_145349_outLine +BABEL_OP3_401_31992_20141001_135942_inLine +BABEL_OP3_401_31992_20141001_135942_outLine +BABEL_OP3_401_32122_20141016_212210_inLine +BABEL_OP3_401_32122_20141016_212210_outLine +BABEL_OP3_401_32328_20141018_200856_inLine +BABEL_OP3_401_32328_20141018_200856_outLine +BABEL_OP3_401_32708_20141003_200927_inLine +BABEL_OP3_401_32708_20141003_200927_outLine +BABEL_OP3_401_33111_20141122_223105_inLine +BABEL_OP3_401_33111_20141122_223105_outLine +BABEL_OP3_401_33273_20141021_153659_inLine +BABEL_OP3_401_33273_20141021_153659_outLine +BABEL_OP3_401_33355_20141001_174510_inLine +BABEL_OP3_401_33355_20141001_174510_outLine +BABEL_OP3_401_33497_20141106_201923_inLine +BABEL_OP3_401_33497_20141106_201923_outLine +BABEL_OP3_401_33672_20140930_132456_inLine +BABEL_OP3_401_33672_20140930_132456_outLine +BABEL_OP3_401_33672_20140930_133426_inLine +BABEL_OP3_401_33672_20140930_133426_outLine +BABEL_OP3_401_33913_20141020_135517_inLine +BABEL_OP3_401_33913_20141020_135517_outLine +BABEL_OP3_401_34197_20140919_193654_inLine +BABEL_OP3_401_34197_20140919_193654_outLine +BABEL_OP3_401_34328_20141020_142248_inLine +BABEL_OP3_401_34328_20141020_142248_outLine +BABEL_OP3_401_34679_20141006_155637_inLine +BABEL_OP3_401_34679_20141006_155637_outLine +BABEL_OP3_401_35139_20141002_182038_inLine +BABEL_OP3_401_35139_20141002_182038_outLine +BABEL_OP3_401_35143_20141010_163440_inLine +BABEL_OP3_401_35143_20141010_163440_outLine +BABEL_OP3_401_35467_20140919_155737_inLine +BABEL_OP3_401_35467_20140919_155737_outLine +BABEL_OP3_401_35467_20140919_162819_inLine +BABEL_OP3_401_35467_20140919_162819_outLine +BABEL_OP3_401_36894_20140921_162105_inLine +BABEL_OP3_401_36894_20140921_162105_outLine +BABEL_OP3_401_37285_20140929_192149_inLine +BABEL_OP3_401_37285_20140929_192149_outLine +BABEL_OP3_401_37290_20141031_174340_inLine +BABEL_OP3_401_37290_20141031_174340_outLine +BABEL_OP3_401_37598_20141031_155805_inLine +BABEL_OP3_401_37598_20141031_155805_outLine +BABEL_OP3_401_38340_20141003_182953_inLine +BABEL_OP3_401_38340_20141003_182953_outLine +BABEL_OP3_401_38878_20141031_201014_inLine +BABEL_OP3_401_38878_20141031_201014_outLine +BABEL_OP3_401_39307_20140922_113434_inLine +BABEL_OP3_401_39307_20140922_113434_outLine +BABEL_OP3_401_39426_20141114_165136_inLine +BABEL_OP3_401_39426_20141114_165136_outLine +BABEL_OP3_401_39920_20141118_215327_inLine +BABEL_OP3_401_39920_20141118_215327_outLine +BABEL_OP3_401_40557_20141014_182351_inLine +BABEL_OP3_401_40557_20141014_182351_outLine +BABEL_OP3_401_40713_20141003_155735_inLine +BABEL_OP3_401_40713_20141003_155735_outLine +BABEL_OP3_401_41592_20141020_140853_inLine +BABEL_OP3_401_41592_20141020_140853_outLine +BABEL_OP3_401_41598_20141113_151053_inLine +BABEL_OP3_401_41598_20141113_151053_outLine +BABEL_OP3_401_41618_20141028_201644_inLine +BABEL_OP3_401_41618_20141028_201644_outLine +BABEL_OP3_401_42029_20141113_160852_inLine +BABEL_OP3_401_42029_20141113_160852_outLine +BABEL_OP3_401_42155_20141028_185638_inLine +BABEL_OP3_401_42155_20141028_185638_outLine +BABEL_OP3_401_42434_20141019_233012_inLine +BABEL_OP3_401_42434_20141019_233012_outLine +BABEL_OP3_401_42497_20141002_144745_inLine +BABEL_OP3_401_42497_20141002_144745_outLine +BABEL_OP3_401_42619_20141104_204106_inLine +BABEL_OP3_401_42619_20141104_204106_outLine +BABEL_OP3_401_42771_20141028_135131_inLine +BABEL_OP3_401_42771_20141028_135131_outLine +BABEL_OP3_401_42834_20141103_204826_inLine +BABEL_OP3_401_42834_20141103_204826_outLine +BABEL_OP3_401_42942_20141018_160034_inLine +BABEL_OP3_401_42942_20141018_160034_outLine +BABEL_OP3_401_43286_20140923_144213_inLine +BABEL_OP3_401_43286_20140923_144213_outLine +BABEL_OP3_401_43646_20140917_164218_inLine +BABEL_OP3_401_43646_20140917_164218_outLine +BABEL_OP3_401_43784_20141008_215339_inLine +BABEL_OP3_401_43784_20141008_215339_outLine +BABEL_OP3_401_43788_20140925_172756_inLine +BABEL_OP3_401_43788_20140925_172756_outLine +BABEL_OP3_401_45201_20141127_132656_inLine +BABEL_OP3_401_45201_20141127_132656_outLine +BABEL_OP3_401_45560_20140930_190100_inLine +BABEL_OP3_401_45560_20140930_190100_outLine +BABEL_OP3_401_46261_20141021_185026_inLine +BABEL_OP3_401_46261_20141021_185026_outLine +BABEL_OP3_401_46310_20140930_153138_inLine +BABEL_OP3_401_46310_20140930_153138_outLine +BABEL_OP3_401_46550_20141006_181152_inLine +BABEL_OP3_401_46550_20141006_181152_outLine +BABEL_OP3_401_46558_20140924_164642_inLine +BABEL_OP3_401_46558_20140924_164642_outLine +BABEL_OP3_401_46589_20140924_191634_inLine +BABEL_OP3_401_46589_20140924_191634_outLine +BABEL_OP3_401_46681_20141002_163836_inLine +BABEL_OP3_401_46681_20141002_163836_outLine +BABEL_OP3_401_46688_20141001_201358_inLine +BABEL_OP3_401_46688_20141001_201358_outLine +BABEL_OP3_401_46770_20141111_221929_inLine +BABEL_OP3_401_46770_20141111_221929_outLine +BABEL_OP3_401_46881_20140922_175212_inLine +BABEL_OP3_401_46881_20140922_175212_outLine +BABEL_OP3_401_47283_20141006_193958_inLine +BABEL_OP3_401_47283_20141006_193958_outLine +BABEL_OP3_401_47487_20141016_162401_inLine +BABEL_OP3_401_47487_20141016_162401_outLine +BABEL_OP3_401_47866_20141124_164427_inLine +BABEL_OP3_401_47866_20141124_164427_outLine +BABEL_OP3_401_47878_20141030_173221_inLine +BABEL_OP3_401_47878_20141030_173221_outLine +BABEL_OP3_401_48243_20141006_175215_inLine +BABEL_OP3_401_48243_20141006_175215_outLine +BABEL_OP3_401_48610_20141001_225254_inLine +BABEL_OP3_401_48610_20141001_225254_outLine +BABEL_OP3_401_49001_20141014_165716_inLine +BABEL_OP3_401_49001_20141014_165716_outLine +BABEL_OP3_401_49306_20141124_193818_inLine +BABEL_OP3_401_49306_20141124_193818_outLine +BABEL_OP3_401_50427_20141028_152244_inLine +BABEL_OP3_401_50427_20141028_152244_outLine +BABEL_OP3_401_51407_20141027_182114_inLine +BABEL_OP3_401_51407_20141027_182114_outLine +BABEL_OP3_401_51968_20141019_151724_inLine +BABEL_OP3_401_51968_20141019_151724_outLine +BABEL_OP3_401_52404_20140924_182534_inLine +BABEL_OP3_401_52404_20140924_182534_outLine +BABEL_OP3_401_52725_20141123_224942_inLine +BABEL_OP3_401_52725_20141123_224942_outLine +BABEL_OP3_401_52818_20140922_184227_inLine +BABEL_OP3_401_52818_20140922_184227_outLine +BABEL_OP3_401_53957_20141020_142913_inLine +BABEL_OP3_401_53957_20141020_142913_outLine +BABEL_OP3_401_54074_20141021_142528_inLine +BABEL_OP3_401_54074_20141021_142528_outLine +BABEL_OP3_401_54162_20141107_221207_inLine +BABEL_OP3_401_54162_20141107_221207_outLine +BABEL_OP3_401_56331_20141124_184702_inLine +BABEL_OP3_401_56331_20141124_184702_outLine +BABEL_OP3_401_56677_20141020_160804_inLine +BABEL_OP3_401_56677_20141020_160804_outLine +BABEL_OP3_401_57065_20140924_135508_inLine +BABEL_OP3_401_57065_20140924_135508_outLine +BABEL_OP3_401_57529_20141017_181551_inLine +BABEL_OP3_401_57529_20141017_181551_outLine +BABEL_OP3_401_57542_20141122_182629_inLine +BABEL_OP3_401_57542_20141122_182629_outLine +BABEL_OP3_401_58006_20141124_153854_inLine +BABEL_OP3_401_58006_20141124_153854_outLine +BABEL_OP3_401_58006_20141124_155107_inLine +BABEL_OP3_401_58006_20141124_155107_outLine +BABEL_OP3_401_58734_20140930_173126_inLine +BABEL_OP3_401_58734_20140930_173126_outLine +BABEL_OP3_401_58821_20140930_211254_inLine +BABEL_OP3_401_58821_20140930_211254_outLine +BABEL_OP3_401_59078_20141030_203852_inLine +BABEL_OP3_401_59078_20141030_203852_outLine +BABEL_OP3_401_59078_20141030_205139_inLine +BABEL_OP3_401_59078_20141030_205139_outLine +BABEL_OP3_401_60026_20141002_115024_inLine +BABEL_OP3_401_60026_20141002_115024_outLine +BABEL_OP3_401_60310_20141017_165419_inLine +BABEL_OP3_401_60310_20141017_165419_outLine +BABEL_OP3_401_60474_20141015_154855_inLine +BABEL_OP3_401_60474_20141015_154855_outLine +BABEL_OP3_401_63906_20141124_212323_inLine +BABEL_OP3_401_63906_20141124_212323_outLine +BABEL_OP3_401_64398_20140922_165727_inLine +BABEL_OP3_401_64398_20140922_165727_outLine +BABEL_OP3_401_65077_20140917_151315_inLine +BABEL_OP3_401_65077_20140917_151315_outLine +BABEL_OP3_401_65367_20141111_163221_inLine +BABEL_OP3_401_65367_20141111_163221_outLine +BABEL_OP3_401_66472_20141027_173935_inLine +BABEL_OP3_401_66472_20141027_173935_outLine +BABEL_OP3_401_68068_20140925_140055_inLine +BABEL_OP3_401_68068_20140925_140055_outLine +BABEL_OP3_401_68384_20141020_225435_inLine +BABEL_OP3_401_68384_20141020_225435_outLine +BABEL_OP3_401_68385_20140919_175351_inLine +BABEL_OP3_401_68385_20140919_175351_outLine +BABEL_OP3_401_68748_20140925_160756_inLine +BABEL_OP3_401_68748_20140925_160756_outLine +BABEL_OP3_401_69474_20140930_190551_inLine +BABEL_OP3_401_69474_20140930_190551_outLine +BABEL_OP3_401_69636_20140924_174446_inLine +BABEL_OP3_401_69636_20140924_174446_outLine +BABEL_OP3_401_71566_20141001_171842_inLine +BABEL_OP3_401_71566_20141001_171842_outLine +BABEL_OP3_401_72040_20141009_171306_inLine +BABEL_OP3_401_72040_20141009_171306_outLine +BABEL_OP3_401_72110_20141001_122146_inLine +BABEL_OP3_401_72110_20141001_122146_outLine +BABEL_OP3_401_72844_20140919_154733_inLine +BABEL_OP3_401_72844_20140919_154733_outLine +BABEL_OP3_401_72844_20140919_162600_inLine +BABEL_OP3_401_72844_20140919_162600_outLine +BABEL_OP3_401_73022_20141111_173204_inLine +BABEL_OP3_401_73022_20141111_173204_outLine +BABEL_OP3_401_73430_20140930_142250_inLine +BABEL_OP3_401_73430_20140930_142250_outLine +BABEL_OP3_401_73591_20140904_190044_inLine +BABEL_OP3_401_73591_20140904_190044_outLine +BABEL_OP3_401_74667_20141017_173017_inLine +BABEL_OP3_401_74667_20141017_173017_outLine +BABEL_OP3_401_74799_20141030_203910_inLine +BABEL_OP3_401_74799_20141030_203910_outLine +BABEL_OP3_401_74921_20140924_165937_inLine +BABEL_OP3_401_74921_20140924_165937_outLine +BABEL_OP3_401_75505_20140917_155231_inLine +BABEL_OP3_401_75505_20140917_155231_outLine +BABEL_OP3_401_76126_20141018_171804_inLine +BABEL_OP3_401_76126_20141018_171804_outLine +BABEL_OP3_401_76437_20140904_161741_inLine +BABEL_OP3_401_76437_20140904_161741_outLine +BABEL_OP3_401_76444_20141014_203500_inLine +BABEL_OP3_401_76444_20141014_203500_outLine +BABEL_OP3_401_76499_20141022_151625_inLine +BABEL_OP3_401_76499_20141022_151625_outLine +BABEL_OP3_401_77744_20141014_125609_inLine +BABEL_OP3_401_77744_20141014_125609_outLine +BABEL_OP3_401_77744_20141014_140124_inLine +BABEL_OP3_401_77744_20141014_140124_outLine +BABEL_OP3_401_78482_20141104_155857_inLine +BABEL_OP3_401_78482_20141104_155857_outLine +BABEL_OP3_401_79080_20141112_120644_inLine +BABEL_OP3_401_79080_20141112_120644_outLine +BABEL_OP3_401_79131_20141125_193444_inLine +BABEL_OP3_401_79131_20141125_193444_outLine +BABEL_OP3_401_79167_20140925_132420_inLine +BABEL_OP3_401_79167_20140925_132420_outLine +BABEL_OP3_401_79995_20141020_232746_inLine +BABEL_OP3_401_79995_20141020_232746_outLine +BABEL_OP3_401_80136_20141112_134414_inLine +BABEL_OP3_401_80136_20141112_134414_outLine +BABEL_OP3_401_80306_20141110_184642_inLine +BABEL_OP3_401_80306_20141110_184642_outLine +BABEL_OP3_401_80439_20141015_141847_inLine +BABEL_OP3_401_80439_20141015_141847_outLine +BABEL_OP3_401_80559_20141003_131820_inLine +BABEL_OP3_401_80559_20141003_131820_outLine +BABEL_OP3_401_81287_20141001_145404_inLine +BABEL_OP3_401_81287_20141001_145404_outLine +BABEL_OP3_401_81433_20141027_184533_inLine +BABEL_OP3_401_81433_20141027_184533_outLine +BABEL_OP3_401_81622_20141021_162012_inLine +BABEL_OP3_401_81622_20141021_162012_outLine +BABEL_OP3_401_82035_20141030_173356_inLine +BABEL_OP3_401_82035_20141030_173356_outLine +BABEL_OP3_401_82035_20141030_174442_inLine +BABEL_OP3_401_82035_20141030_174442_outLine +BABEL_OP3_401_82224_20141111_175445_inLine +BABEL_OP3_401_82224_20141111_175445_outLine +BABEL_OP3_401_84547_20140917_192745_inLine +BABEL_OP3_401_84547_20140917_192745_outLine +BABEL_OP3_401_84547_20140917_194346_inLine +BABEL_OP3_401_84547_20140917_194346_outLine +BABEL_OP3_401_86676_20140924_200749_inLine +BABEL_OP3_401_86676_20140924_200749_outLine +BABEL_OP3_401_87073_20140917_201716_inLine +BABEL_OP3_401_87073_20140917_201716_outLine +BABEL_OP3_401_87871_20141018_185934_inLine +BABEL_OP3_401_87871_20141018_185934_outLine +BABEL_OP3_401_87921_20141010_173551_inLine +BABEL_OP3_401_87921_20141010_173551_outLine +BABEL_OP3_401_88783_20141022_171250_inLine +BABEL_OP3_401_88783_20141022_171250_outLine +BABEL_OP3_401_90737_20141020_180826_inLine +BABEL_OP3_401_90737_20141020_180826_outLine +BABEL_OP3_401_91891_20141001_130023_inLine +BABEL_OP3_401_91891_20141001_130023_outLine +BABEL_OP3_401_91977_20140925_184203_inLine +BABEL_OP3_401_91977_20140925_184203_outLine +BABEL_OP3_401_92065_20141017_191557_inLine +BABEL_OP3_401_92065_20141017_191557_outLine +BABEL_OP3_401_92736_20141017_194915_inLine +BABEL_OP3_401_92736_20141017_194915_outLine +BABEL_OP3_401_92740_20140926_150615_inLine +BABEL_OP3_401_92740_20140926_150615_outLine +BABEL_OP3_401_93490_20141106_171428_inLine +BABEL_OP3_401_93490_20141106_171428_outLine +BABEL_OP3_401_94745_20140923_154933_inLine +BABEL_OP3_401_94745_20140923_154933_outLine +BABEL_OP3_401_95077_20141010_153959_inLine +BABEL_OP3_401_95077_20141010_153959_outLine +BABEL_OP3_401_95294_20140923_173007_inLine +BABEL_OP3_401_95294_20140923_173007_outLine +BABEL_OP3_401_95446_20141112_154248_inLine +BABEL_OP3_401_95446_20141112_154248_outLine +BABEL_OP3_401_95663_20140917_182410_inLine +BABEL_OP3_401_95663_20140917_182410_outLine +BABEL_OP3_401_96324_20140921_170922_inLine +BABEL_OP3_401_96324_20140921_170922_outLine +BABEL_OP3_401_97376_20140929_154000_inLine +BABEL_OP3_401_97376_20140929_154000_outLine +BABEL_OP3_401_97772_20140917_144539_inLine +BABEL_OP3_401_97772_20140917_144539_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list b/egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list new file mode 100644 index 00000000000..45d13cc017e --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list @@ -0,0 +1,530 @@ +BABEL_OP3_401_10184_20141027_150129_inLine +BABEL_OP3_401_10184_20141027_150129_outLine +BABEL_OP3_401_12321_20141101_210546_inLine +BABEL_OP3_401_12321_20141101_210546_outLine +BABEL_OP3_401_13178_20141101_181249_inLine +BABEL_OP3_401_13178_20141101_181249_outLine +BABEL_OP3_401_13189_20141114_170101_inLine +BABEL_OP3_401_13189_20141114_170101_outLine +BABEL_OP3_401_13189_20141114_174825_inLine +BABEL_OP3_401_13189_20141114_174825_outLine +BABEL_OP3_401_13427_20141027_145236_inLine +BABEL_OP3_401_13427_20141027_145236_outLine +BABEL_OP3_401_13792_20141001_135314_inLine +BABEL_OP3_401_13792_20141001_135314_outLine +BABEL_OP3_401_14440_20141101_191122_inLine +BABEL_OP3_401_14440_20141101_191122_outLine +BABEL_OP3_401_15042_20141125_181147_inLine +BABEL_OP3_401_15042_20141125_181147_outLine +BABEL_OP3_401_17280_20141016_160258_inLine +BABEL_OP3_401_17280_20141016_160258_outLine +BABEL_OP3_401_17496_20141103_155636_inLine +BABEL_OP3_401_17496_20141103_155636_outLine +BABEL_OP3_401_17496_20141103_160636_inLine +BABEL_OP3_401_17496_20141103_160636_outLine +BABEL_OP3_401_18118_20141125_212628_inLine +BABEL_OP3_401_18118_20141125_212628_outLine +BABEL_OP3_401_19130_20141125_202758_inLine +BABEL_OP3_401_19130_20141125_202758_outLine +BABEL_OP3_401_20738_20141118_152747_inLine +BABEL_OP3_401_20738_20141118_152747_outLine +BABEL_OP3_401_20800_20141002_124944_inLine +BABEL_OP3_401_20800_20141002_124944_outLine +BABEL_OP3_401_21435_20141121_182922_inLine +BABEL_OP3_401_21435_20141121_182922_outLine +BABEL_OP3_401_21624_20141124_180637_inLine +BABEL_OP3_401_21624_20141124_180637_outLine +BABEL_OP3_401_22170_20141118_234144_inLine +BABEL_OP3_401_22170_20141118_234144_outLine +BABEL_OP3_401_22280_20141029_152053_inLine +BABEL_OP3_401_22280_20141029_152053_outLine +BABEL_OP3_401_23151_20141113_164930_inLine +BABEL_OP3_401_23151_20141113_164930_outLine +BABEL_OP3_401_23151_20141113_165631_inLine +BABEL_OP3_401_23151_20141113_165631_outLine +BABEL_OP3_401_24017_20141104_165843_inLine +BABEL_OP3_401_24017_20141104_165843_outLine +BABEL_OP3_401_24017_20141104_170453_inLine +BABEL_OP3_401_24017_20141104_170453_outLine +BABEL_OP3_401_24033_20141111_211829_inLine +BABEL_OP3_401_24033_20141111_211829_outLine +BABEL_OP3_401_24239_20141207_015922_inLine +BABEL_OP3_401_24239_20141207_015922_outLine +BABEL_OP3_401_24501_20141117_164716_inLine +BABEL_OP3_401_24501_20141117_164716_outLine +BABEL_OP3_401_25015_20141125_191714_inLine +BABEL_OP3_401_25015_20141125_191714_outLine +BABEL_OP3_401_30653_20141119_225659_inLine +BABEL_OP3_401_30653_20141119_225659_outLine +BABEL_OP3_401_30869_20141105_163908_inLine +BABEL_OP3_401_30869_20141105_163908_outLine +BABEL_OP3_401_30869_20141105_165054_inLine +BABEL_OP3_401_30869_20141105_165054_outLine +BABEL_OP3_401_31182_20141103_193721_inLine +BABEL_OP3_401_31182_20141103_193721_outLine +BABEL_OP3_401_31583_20141017_202348_inLine +BABEL_OP3_401_31583_20141017_202348_outLine +BABEL_OP3_401_32832_20141106_155802_inLine +BABEL_OP3_401_32832_20141106_155802_outLine +BABEL_OP3_401_33635_20141018_183504_inLine +BABEL_OP3_401_33635_20141018_183504_outLine +BABEL_OP3_401_33635_20141018_204625_inLine +BABEL_OP3_401_33635_20141018_204625_outLine +BABEL_OP3_401_33840_20141105_190509_inLine +BABEL_OP3_401_33840_20141105_190509_outLine +BABEL_OP3_401_34336_20141015_173115_inLine +BABEL_OP3_401_34336_20141015_173115_outLine +BABEL_OP3_401_35706_20141123_232430_inLine +BABEL_OP3_401_35706_20141123_232430_outLine +BABEL_OP3_401_36059_20141120_202614_inLine +BABEL_OP3_401_36059_20141120_202614_outLine +BABEL_OP3_401_36594_20141118_225937_inLine +BABEL_OP3_401_36594_20141118_225937_outLine +BABEL_OP3_401_36669_20141028_143332_inLine +BABEL_OP3_401_36669_20141028_143332_outLine +BABEL_OP3_401_36900_20141105_173543_inLine +BABEL_OP3_401_36900_20141105_173543_outLine +BABEL_OP3_401_38076_20141101_181606_inLine +BABEL_OP3_401_38076_20141101_181606_outLine +BABEL_OP3_401_38125_20141207_005829_inLine +BABEL_OP3_401_38125_20141207_005829_outLine +BABEL_OP3_401_38125_20141207_010858_inLine +BABEL_OP3_401_38125_20141207_010858_outLine +BABEL_OP3_401_38588_20141016_200521_inLine +BABEL_OP3_401_38588_20141016_200521_outLine +BABEL_OP3_401_39099_20141204_002759_inLine +BABEL_OP3_401_39099_20141204_002759_outLine +BABEL_OP3_401_40740_20141110_180540_inLine +BABEL_OP3_401_40740_20141110_180540_outLine +BABEL_OP3_401_41109_20141111_174909_inLine +BABEL_OP3_401_41109_20141111_174909_outLine +BABEL_OP3_401_41233_20141104_180556_inLine +BABEL_OP3_401_41233_20141104_180556_outLine +BABEL_OP3_401_41609_20140930_160252_inLine +BABEL_OP3_401_41609_20140930_160252_outLine +BABEL_OP3_401_42718_20141203_164339_inLine +BABEL_OP3_401_42718_20141203_164339_outLine +BABEL_OP3_401_42718_20141203_165811_inLine +BABEL_OP3_401_42718_20141203_165811_outLine +BABEL_OP3_401_44255_20141114_190226_inLine +BABEL_OP3_401_44255_20141114_190226_outLine +BABEL_OP3_401_44290_20141125_182137_inLine +BABEL_OP3_401_44290_20141125_182137_outLine +BABEL_OP3_401_45770_20140930_173734_inLine +BABEL_OP3_401_45770_20140930_173734_outLine +BABEL_OP3_401_46008_20141127_210910_inLine +BABEL_OP3_401_46008_20141127_210910_outLine +BABEL_OP3_401_46763_20141119_173306_inLine +BABEL_OP3_401_46763_20141119_173306_outLine +BABEL_OP3_401_48907_20141125_203242_inLine +BABEL_OP3_401_48907_20141125_203242_outLine +BABEL_OP3_401_49287_20141102_150144_inLine +BABEL_OP3_401_49287_20141102_150144_outLine +BABEL_OP3_401_49502_20140924_135047_inLine +BABEL_OP3_401_49502_20140924_135047_outLine +BABEL_OP3_401_49637_20140929_203313_inLine +BABEL_OP3_401_49637_20140929_203313_outLine +BABEL_OP3_401_49775_20140917_162425_inLine +BABEL_OP3_401_49775_20140917_162425_outLine +BABEL_OP3_401_49812_20141111_182212_inLine +BABEL_OP3_401_49812_20141111_182212_outLine +BABEL_OP3_401_49902_20141015_154547_inLine +BABEL_OP3_401_49902_20141015_154547_outLine +BABEL_OP3_401_50090_20141031_162652_inLine +BABEL_OP3_401_50090_20141031_162652_outLine +BABEL_OP3_401_50175_20140923_130231_inLine +BABEL_OP3_401_50175_20140923_130231_outLine +BABEL_OP3_401_50958_20141018_223514_inLine +BABEL_OP3_401_50958_20141018_223514_outLine +BABEL_OP3_401_51417_20141110_191727_inLine +BABEL_OP3_401_51417_20141110_191727_outLine +BABEL_OP3_401_51530_20141204_001348_inLine +BABEL_OP3_401_51530_20141204_001348_outLine +BABEL_OP3_401_53072_20141127_201357_inLine +BABEL_OP3_401_53072_20141127_201357_outLine +BABEL_OP3_401_53415_20141118_232010_inLine +BABEL_OP3_401_53415_20141118_232010_outLine +BABEL_OP3_401_53492_20141124_201111_inLine +BABEL_OP3_401_53492_20141124_201111_outLine +BABEL_OP3_401_53665_20141125_180322_inLine +BABEL_OP3_401_53665_20141125_180322_outLine +BABEL_OP3_401_54160_20140930_215406_inLine +BABEL_OP3_401_54160_20140930_215406_outLine +BABEL_OP3_401_54405_20141027_133437_inLine +BABEL_OP3_401_54405_20141027_133437_outLine +BABEL_OP3_401_55742_20141003_153216_inLine +BABEL_OP3_401_55742_20141003_153216_outLine +BABEL_OP3_401_55818_20140930_191724_inLine +BABEL_OP3_401_55818_20140930_191724_outLine +BABEL_OP3_401_55950_20141125_195752_inLine +BABEL_OP3_401_55950_20141125_195752_outLine +BABEL_OP3_401_56019_20141118_211141_inLine +BABEL_OP3_401_56019_20141118_211141_outLine +BABEL_OP3_401_56523_20141017_152325_inLine +BABEL_OP3_401_56523_20141017_152325_outLine +BABEL_OP3_401_56743_20141016_193127_inLine +BABEL_OP3_401_56743_20141016_193127_outLine +BABEL_OP3_401_57067_20141110_211445_inLine +BABEL_OP3_401_57067_20141110_211445_outLine +BABEL_OP3_401_57609_20141028_162956_inLine +BABEL_OP3_401_57609_20141028_162956_outLine +BABEL_OP3_401_57650_20141117_142921_inLine +BABEL_OP3_401_57650_20141117_142921_outLine +BABEL_OP3_401_57654_20141002_120228_inLine +BABEL_OP3_401_57654_20141002_120228_outLine +BABEL_OP3_401_57678_20141015_161604_inLine +BABEL_OP3_401_57678_20141015_161604_outLine +BABEL_OP3_401_58585_20141112_192259_inLine +BABEL_OP3_401_58585_20141112_192259_outLine +BABEL_OP3_401_58850_20141017_141308_inLine +BABEL_OP3_401_58850_20141017_141308_outLine +BABEL_OP3_401_58926_20141003_143419_inLine +BABEL_OP3_401_58926_20141003_143419_outLine +BABEL_OP3_401_59291_20141017_162350_inLine +BABEL_OP3_401_59291_20141017_162350_outLine +BABEL_OP3_401_59864_20141206_195010_inLine +BABEL_OP3_401_59864_20141206_195010_outLine +BABEL_OP3_401_60626_20141003_151111_inLine +BABEL_OP3_401_60626_20141003_151111_outLine +BABEL_OP3_401_60661_20141002_182507_inLine +BABEL_OP3_401_60661_20141002_182507_outLine +BABEL_OP3_401_60836_20141013_164932_inLine +BABEL_OP3_401_60836_20141013_164932_outLine +BABEL_OP3_401_61219_20141015_175439_inLine +BABEL_OP3_401_61219_20141015_175439_outLine +BABEL_OP3_401_61357_20141113_164017_inLine +BABEL_OP3_401_61357_20141113_164017_outLine +BABEL_OP3_401_61435_20141104_205806_inLine +BABEL_OP3_401_61435_20141104_205806_outLine +BABEL_OP3_401_62177_20141114_161832_inLine +BABEL_OP3_401_62177_20141114_161832_outLine +BABEL_OP3_401_62289_20141204_011459_inLine +BABEL_OP3_401_62289_20141204_011459_outLine +BABEL_OP3_401_62289_20141204_012356_inLine +BABEL_OP3_401_62289_20141204_012356_outLine +BABEL_OP3_401_62430_20141117_174830_inLine +BABEL_OP3_401_62430_20141117_174830_outLine +BABEL_OP3_401_62835_20141020_153234_inLine +BABEL_OP3_401_62835_20141020_153234_outLine +BABEL_OP3_401_63220_20141101_205612_inLine +BABEL_OP3_401_63220_20141101_205612_outLine +BABEL_OP3_401_63523_20141204_010313_inLine +BABEL_OP3_401_63523_20141204_010313_outLine +BABEL_OP3_401_63757_20141029_150937_inLine +BABEL_OP3_401_63757_20141029_150937_outLine +BABEL_OP3_401_63938_20141114_163623_inLine +BABEL_OP3_401_63938_20141114_163623_outLine +BABEL_OP3_401_64350_20141002_131743_inLine +BABEL_OP3_401_64350_20141002_131743_outLine +BABEL_OP3_401_64638_20140923_193255_inLine +BABEL_OP3_401_64638_20140923_193255_outLine +BABEL_OP3_401_64759_20140930_133630_inLine +BABEL_OP3_401_64759_20140930_133630_outLine +BABEL_OP3_401_64768_20141015_185430_inLine +BABEL_OP3_401_64768_20141015_185430_outLine +BABEL_OP3_401_64796_20140922_122936_inLine +BABEL_OP3_401_64796_20140922_122936_outLine +BABEL_OP3_401_65298_20141113_154021_inLine +BABEL_OP3_401_65298_20141113_154021_outLine +BABEL_OP3_401_65477_20141017_155857_inLine +BABEL_OP3_401_65477_20141017_155857_outLine +BABEL_OP3_401_65882_20141003_133913_inLine +BABEL_OP3_401_65882_20141003_133913_outLine +BABEL_OP3_401_66045_20141023_123024_inLine +BABEL_OP3_401_66045_20141023_123024_outLine +BABEL_OP3_401_66177_20141118_200110_inLine +BABEL_OP3_401_66177_20141118_200110_outLine +BABEL_OP3_401_66967_20140929_190454_inLine +BABEL_OP3_401_66967_20140929_190454_outLine +BABEL_OP3_401_67373_20141003_140545_inLine +BABEL_OP3_401_67373_20141003_140545_outLine +BABEL_OP3_401_67592_20141102_134846_outLine +BABEL_OP3_401_67794_20141003_133705_inLine +BABEL_OP3_401_67794_20141003_133705_outLine +BABEL_OP3_401_67964_20141201_174143_inLine +BABEL_OP3_401_67964_20141201_174143_outLine +BABEL_OP3_401_67999_20141111_153758_inLine +BABEL_OP3_401_67999_20141111_153758_outLine +BABEL_OP3_401_68182_20141119_114536_inLine +BABEL_OP3_401_68182_20141119_114536_outLine +BABEL_OP3_401_68182_20141119_115542_inLine +BABEL_OP3_401_68182_20141119_115542_outLine +BABEL_OP3_401_69992_20140930_195445_inLine +BABEL_OP3_401_69992_20140930_195445_outLine +BABEL_OP3_401_70110_20140917_141249_inLine +BABEL_OP3_401_70110_20140917_141249_outLine +BABEL_OP3_401_70386_20141015_182629_inLine +BABEL_OP3_401_70386_20141015_182629_outLine +BABEL_OP3_401_70601_20141016_160902_inLine +BABEL_OP3_401_70601_20141016_160902_outLine +BABEL_OP3_401_70713_20141118_164200_inLine +BABEL_OP3_401_70713_20141118_164200_outLine +BABEL_OP3_401_71038_20141112_182205_inLine +BABEL_OP3_401_71038_20141112_182205_outLine +BABEL_OP3_401_71038_20141112_183801_inLine +BABEL_OP3_401_71038_20141112_183801_outLine +BABEL_OP3_401_71038_20141112_184910_inLine +BABEL_OP3_401_71038_20141112_184910_outLine +BABEL_OP3_401_71282_20141113_172102_inLine +BABEL_OP3_401_71282_20141113_172102_outLine +BABEL_OP3_401_71333_20141014_190834_inLine +BABEL_OP3_401_71333_20141014_190834_outLine +BABEL_OP3_401_71704_20141002_173424_inLine +BABEL_OP3_401_71704_20141002_173424_outLine +BABEL_OP3_401_71780_20141006_202842_inLine +BABEL_OP3_401_71780_20141006_202842_outLine +BABEL_OP3_401_72349_20141125_020034_inLine +BABEL_OP3_401_72349_20141125_020034_outLine +BABEL_OP3_401_72587_20141107_174322_inLine +BABEL_OP3_401_72587_20141107_174322_outLine +BABEL_OP3_401_72733_20141126_185701_inLine +BABEL_OP3_401_72733_20141126_185701_outLine +BABEL_OP3_401_73072_20141001_214124_inLine +BABEL_OP3_401_73072_20141001_214124_outLine +BABEL_OP3_401_73119_20141016_201748_inLine +BABEL_OP3_401_73119_20141016_201748_outLine +BABEL_OP3_401_73301_20141014_154044_inLine +BABEL_OP3_401_73301_20141014_154044_outLine +BABEL_OP3_401_73622_20141001_214706_inLine +BABEL_OP3_401_73622_20141001_214706_outLine +BABEL_OP3_401_73757_20141022_145713_inLine +BABEL_OP3_401_73757_20141022_145713_outLine +BABEL_OP3_401_73837_20141014_174244_inLine +BABEL_OP3_401_73837_20141014_174244_outLine +BABEL_OP3_401_74111_20141102_152314_inLine +BABEL_OP3_401_74280_20140917_171519_inLine +BABEL_OP3_401_74280_20140917_171519_outLine +BABEL_OP3_401_74455_20141113_142847_inLine +BABEL_OP3_401_74455_20141113_142847_outLine +BABEL_OP3_401_74641_20141029_170835_inLine +BABEL_OP3_401_74641_20141029_170835_outLine +BABEL_OP3_401_74728_20141125_185810_inLine +BABEL_OP3_401_74728_20141125_185810_outLine +BABEL_OP3_401_75223_20140929_144010_inLine +BABEL_OP3_401_75223_20140929_144010_outLine +BABEL_OP3_401_75869_20141122_162915_inLine +BABEL_OP3_401_75869_20141122_162915_outLine +BABEL_OP3_401_75869_20141122_163817_inLine +BABEL_OP3_401_75869_20141122_163817_outLine +BABEL_OP3_401_75993_20141003_155108_inLine +BABEL_OP3_401_75993_20141003_155108_outLine +BABEL_OP3_401_76155_20141018_235119_inLine +BABEL_OP3_401_76155_20141018_235119_outLine +BABEL_OP3_401_76372_20141122_205123_inLine +BABEL_OP3_401_76372_20141122_205123_outLine +BABEL_OP3_401_76756_20141031_190329_inLine +BABEL_OP3_401_76756_20141031_190329_outLine +BABEL_OP3_401_76773_20141002_161621_inLine +BABEL_OP3_401_76773_20141002_161621_outLine +BABEL_OP3_401_77112_20141008_135410_inLine +BABEL_OP3_401_77112_20141008_135410_outLine +BABEL_OP3_401_77391_20141014_202916_inLine +BABEL_OP3_401_77391_20141014_202916_outLine +BABEL_OP3_401_77391_20141014_204156_inLine +BABEL_OP3_401_77391_20141014_204156_outLine +BABEL_OP3_401_77427_20141019_151638_inLine +BABEL_OP3_401_77427_20141019_151638_outLine +BABEL_OP3_401_78360_20141112_174704_inLine +BABEL_OP3_401_78360_20141112_174704_outLine +BABEL_OP3_401_78454_20141030_190417_inLine +BABEL_OP3_401_78454_20141030_190417_outLine +BABEL_OP3_401_78609_20141101_190650_inLine +BABEL_OP3_401_78609_20141101_190650_outLine +BABEL_OP3_401_78609_20141101_191730_inLine +BABEL_OP3_401_78609_20141101_191730_outLine +BABEL_OP3_401_78943_20141015_141252_inLine +BABEL_OP3_401_78943_20141015_141252_outLine +BABEL_OP3_401_78976_20141016_202006_inLine +BABEL_OP3_401_78976_20141016_202006_outLine +BABEL_OP3_401_79505_20141125_151308_inLine +BABEL_OP3_401_79505_20141125_151308_outLine +BABEL_OP3_401_79590_20141019_151813_inLine +BABEL_OP3_401_79590_20141019_151813_outLine +BABEL_OP3_401_79820_20141015_191402_inLine +BABEL_OP3_401_79820_20141015_191402_outLine +BABEL_OP3_401_79858_20140930_180452_inLine +BABEL_OP3_401_79858_20140930_180452_outLine +BABEL_OP3_401_80577_20141124_151617_inLine +BABEL_OP3_401_80577_20141124_151617_outLine +BABEL_OP3_401_80622_20141031_193633_inLine +BABEL_OP3_401_80622_20141031_193633_outLine +BABEL_OP3_401_81229_20141017_145439_inLine +BABEL_OP3_401_81229_20141017_145439_outLine +BABEL_OP3_401_82030_20141126_190214_inLine +BABEL_OP3_401_82030_20141126_190214_outLine +BABEL_OP3_401_82637_20140922_152004_inLine +BABEL_OP3_401_82637_20140922_152004_outLine +BABEL_OP3_401_82863_20141020_125644_inLine +BABEL_OP3_401_82863_20141020_125644_outLine +BABEL_OP3_401_82979_20141016_150329_inLine +BABEL_OP3_401_82979_20141016_150329_outLine +BABEL_OP3_401_83062_20141124_210713_inLine +BABEL_OP3_401_83062_20141124_210713_outLine +BABEL_OP3_401_83366_20141107_185153_inLine +BABEL_OP3_401_83366_20141107_185153_outLine +BABEL_OP3_401_83775_20141016_165202_inLine +BABEL_OP3_401_83775_20141016_165202_outLine +BABEL_OP3_401_83783_20141029_142056_inLine +BABEL_OP3_401_83783_20141029_142056_outLine +BABEL_OP3_401_84055_20141118_213900_inLine +BABEL_OP3_401_84055_20141118_213900_outLine +BABEL_OP3_401_84061_20141019_160653_inLine +BABEL_OP3_401_84061_20141019_160653_outLine +BABEL_OP3_401_84125_20140919_142411_inLine +BABEL_OP3_401_84125_20140919_142411_outLine +BABEL_OP3_401_84583_20141028_135606_inLine +BABEL_OP3_401_84583_20141028_135606_outLine +BABEL_OP3_401_84605_20141013_223927_inLine +BABEL_OP3_401_84605_20141013_223927_outLine +BABEL_OP3_401_84737_20141114_223714_inLine +BABEL_OP3_401_84737_20141114_223714_outLine +BABEL_OP3_401_84768_20141001_160652_inLine +BABEL_OP3_401_84768_20141001_160652_outLine +BABEL_OP3_401_85048_20141030_163324_inLine +BABEL_OP3_401_85048_20141030_163324_outLine +BABEL_OP3_401_85179_20141105_155540_inLine +BABEL_OP3_401_85179_20141105_155540_outLine +BABEL_OP3_401_85248_20141114_150825_inLine +BABEL_OP3_401_85248_20141114_150825_outLine +BABEL_OP3_401_85248_20141114_152742_inLine +BABEL_OP3_401_85248_20141114_152742_outLine +BABEL_OP3_401_85325_20141127_141209_inLine +BABEL_OP3_401_85325_20141127_141209_outLine +BABEL_OP3_401_85340_20141006_165058_inLine +BABEL_OP3_401_85340_20141006_165058_outLine +BABEL_OP3_401_86472_20140924_120802_inLine +BABEL_OP3_401_86472_20140924_120802_outLine +BABEL_OP3_401_86748_20141117_205420_inLine +BABEL_OP3_401_86748_20141117_205420_outLine +BABEL_OP3_401_86860_20141204_001000_inLine +BABEL_OP3_401_86860_20141204_001000_outLine +BABEL_OP3_401_86888_20141101_175833_inLine +BABEL_OP3_401_86888_20141101_175833_outLine +BABEL_OP3_401_86952_20141003_103859_inLine +BABEL_OP3_401_86952_20141003_103859_outLine +BABEL_OP3_401_87074_20141006_143605_inLine +BABEL_OP3_401_87074_20141006_143605_outLine +BABEL_OP3_401_87489_20141118_173238_inLine +BABEL_OP3_401_87489_20141118_173238_outLine +BABEL_OP3_401_87545_20141204_001833_inLine +BABEL_OP3_401_87545_20141204_001833_outLine +BABEL_OP3_401_87629_20141028_191608_inLine +BABEL_OP3_401_87629_20141028_191608_outLine +BABEL_OP3_401_87693_20141003_190102_inLine +BABEL_OP3_401_87693_20141003_190102_outLine +BABEL_OP3_401_88372_20141125_142302_inLine +BABEL_OP3_401_88372_20141125_142302_outLine +BABEL_OP3_401_88601_20141023_164043_inLine +BABEL_OP3_401_88601_20141023_164043_outLine +BABEL_OP3_401_88669_20141031_182135_inLine +BABEL_OP3_401_88669_20141031_182135_outLine +BABEL_OP3_401_88812_20141203_173638_inLine +BABEL_OP3_401_88812_20141203_173638_outLine +BABEL_OP3_401_88812_20141203_180453_inLine +BABEL_OP3_401_88812_20141203_180453_outLine +BABEL_OP3_401_89045_20140917_131337_inLine +BABEL_OP3_401_89045_20140917_131337_outLine +BABEL_OP3_401_89059_20141111_185303_inLine +BABEL_OP3_401_89059_20141111_185303_outLine +BABEL_OP3_401_89457_20141020_143004_inLine +BABEL_OP3_401_89457_20141020_143004_outLine +BABEL_OP3_401_89560_20141102_161259_inLine +BABEL_OP3_401_89560_20141102_161259_outLine +BABEL_OP3_401_89888_20141002_173642_inLine +BABEL_OP3_401_89888_20141002_173642_outLine +BABEL_OP3_401_89888_20141002_175247_inLine +BABEL_OP3_401_89888_20141002_175247_outLine +BABEL_OP3_401_89943_20141014_163254_inLine +BABEL_OP3_401_89943_20141014_163254_outLine +BABEL_OP3_401_89943_20141014_165144_inLine +BABEL_OP3_401_89943_20141014_165144_outLine +BABEL_OP3_401_90080_20141124_210928_inLine +BABEL_OP3_401_90080_20141124_210928_outLine +BABEL_OP3_401_91080_20141107_184614_inLine +BABEL_OP3_401_91080_20141107_184614_outLine +BABEL_OP3_401_91336_20141022_164858_inLine +BABEL_OP3_401_91336_20141022_164858_outLine +BABEL_OP3_401_91372_20141126_174359_inLine +BABEL_OP3_401_91372_20141126_174359_outLine +BABEL_OP3_401_91825_20140930_140910_inLine +BABEL_OP3_401_91825_20140930_140910_outLine +BABEL_OP3_401_91825_20140930_142615_inLine +BABEL_OP3_401_91825_20140930_142615_outLine +BABEL_OP3_401_91930_20141117_203237_inLine +BABEL_OP3_401_91930_20141117_203237_outLine +BABEL_OP3_401_91944_20141002_002457_inLine +BABEL_OP3_401_91944_20141002_002457_outLine +BABEL_OP3_401_92096_20141122_181058_inLine +BABEL_OP3_401_92096_20141122_181058_outLine +BABEL_OP3_401_92176_20141022_194334_inLine +BABEL_OP3_401_92176_20141022_194334_outLine +BABEL_OP3_401_92356_20141113_184902_inLine +BABEL_OP3_401_92356_20141113_184902_outLine +BABEL_OP3_401_92509_20140919_170134_inLine +BABEL_OP3_401_92509_20140919_170134_outLine +BABEL_OP3_401_92557_20141113_141949_inLine +BABEL_OP3_401_92557_20141113_141949_outLine +BABEL_OP3_401_92886_20141008_194243_inLine +BABEL_OP3_401_92886_20141008_194243_outLine +BABEL_OP3_401_92942_20141031_154005_inLine +BABEL_OP3_401_92942_20141031_154005_outLine +BABEL_OP3_401_93469_20141204_000050_inLine +BABEL_OP3_401_93469_20141204_000050_outLine +BABEL_OP3_401_93515_20141207_011722_inLine +BABEL_OP3_401_93515_20141207_011722_outLine +BABEL_OP3_401_93604_20141206_154822_inLine +BABEL_OP3_401_93604_20141206_154822_outLine +BABEL_OP3_401_93861_20141022_174829_inLine +BABEL_OP3_401_93861_20141022_174829_outLine +BABEL_OP3_401_94141_20141125_195408_inLine +BABEL_OP3_401_94141_20141125_195408_outLine +BABEL_OP3_401_94409_20141019_155250_inLine +BABEL_OP3_401_94409_20141019_155250_outLine +BABEL_OP3_401_95269_20141016_175058_inLine +BABEL_OP3_401_95269_20141016_175058_outLine +BABEL_OP3_401_95269_20141016_175950_inLine +BABEL_OP3_401_95269_20141016_175950_outLine +BABEL_OP3_401_95399_20141021_140337_inLine +BABEL_OP3_401_95399_20141021_140337_outLine +BABEL_OP3_401_96059_20141201_200308_inLine +BABEL_OP3_401_96059_20141201_200308_outLine +BABEL_OP3_401_96190_20141013_142533_inLine +BABEL_OP3_401_96190_20141013_142533_outLine +BABEL_OP3_401_96405_20141013_185112_inLine +BABEL_OP3_401_96405_20141013_185112_outLine +BABEL_OP3_401_96405_20141013_195512_inLine +BABEL_OP3_401_96405_20141013_195512_outLine +BABEL_OP3_401_96584_20141114_205949_inLine +BABEL_OP3_401_96584_20141114_205949_outLine +BABEL_OP3_401_96934_20141015_153021_inLine +BABEL_OP3_401_96934_20141015_153021_outLine +BABEL_OP3_401_97097_20141122_194201_inLine +BABEL_OP3_401_97097_20141122_194201_outLine +BABEL_OP3_401_97731_20141105_135405_inLine +BABEL_OP3_401_97731_20141105_135405_outLine +BABEL_OP3_401_97896_20141021_124204_inLine +BABEL_OP3_401_97896_20141021_124204_outLine +BABEL_OP3_401_98365_20141029_133629_inLine +BABEL_OP3_401_98365_20141029_133629_outLine +BABEL_OP3_401_98580_20141021_140835_inLine +BABEL_OP3_401_98580_20141021_140835_outLine +BABEL_OP3_401_98888_20141019_153225_inLine +BABEL_OP3_401_98888_20141019_153225_outLine +BABEL_OP3_401_98888_20141019_160421_inLine +BABEL_OP3_401_98888_20141019_160421_outLine +BABEL_OP3_401_99264_20141104_195940_inLine +BABEL_OP3_401_99264_20141104_195940_outLine +BABEL_OP3_401_99264_20141104_200707_inLine +BABEL_OP3_401_99264_20141104_200707_outLine +BABEL_OP3_401_99289_20141122_150548_inLine +BABEL_OP3_401_99289_20141122_150548_outLine +BABEL_OP3_401_99487_20141001_154915_inLine +BABEL_OP3_401_99487_20141001_154915_outLine +BABEL_OP3_401_99487_20141001_155922_inLine +BABEL_OP3_401_99487_20141001_155922_outLine +BABEL_OP3_401_99516_20140924_152057_inLine +BABEL_OP3_401_99516_20140924_152057_outLine +BABEL_OP3_401_99718_20141003_130643_inLine +BABEL_OP3_401_99718_20141003_130643_outLine +BABEL_OP3_401_99813_20141027_183714_inLine +BABEL_OP3_401_99813_20141027_183714_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/dev.2h.list b/egs/babel/s5d/conf/lists/402-javanese/dev.2h.list new file mode 100644 index 00000000000..46233026964 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/dev.2h.list @@ -0,0 +1,122 @@ +BABEL_OP3_402_10184_20141119_194233_inLine +BABEL_OP3_402_10184_20141119_194233_outLine +BABEL_OP3_402_11581_20141124_181058_inLine +BABEL_OP3_402_11581_20141124_181058_outLine +BABEL_OP3_402_15535_20150104_232347_inLine +BABEL_OP3_402_15535_20150104_232347_outLine +BABEL_OP3_402_20133_20140911_170812_inLine +BABEL_OP3_402_20133_20140911_170812_outLine +BABEL_OP3_402_21393_20150304_163256_inLine +BABEL_OP3_402_21393_20150304_163256_outLine +BABEL_OP3_402_21581_20141107_151147_inLine +BABEL_OP3_402_21581_20141107_151147_outLine +BABEL_OP3_402_21807_20141125_194924_inLine +BABEL_OP3_402_21807_20141125_194924_outLine +BABEL_OP3_402_23046_20141103_212247_inLine +BABEL_OP3_402_23046_20141103_212247_outLine +BABEL_OP3_402_23505_20141029_003347_inLine +BABEL_OP3_402_23505_20141029_003347_outLine +BABEL_OP3_402_24982_20141027_223126_inLine +BABEL_OP3_402_24982_20141027_223126_outLine +BABEL_OP3_402_27590_20141227_191710_inLine +BABEL_OP3_402_27590_20141227_191710_outLine +BABEL_OP3_402_27841_20150112_180404_inLine +BABEL_OP3_402_27841_20150112_180404_outLine +BABEL_OP3_402_28012_20150105_215005_inLine +BABEL_OP3_402_28012_20150105_215005_outLine +BABEL_OP3_402_36293_20141001_145552_inLine +BABEL_OP3_402_36293_20141001_145552_outLine +BABEL_OP3_402_36505_20150106_201700_inLine +BABEL_OP3_402_36505_20150106_201700_outLine +BABEL_OP3_402_36894_20140919_222930_inLine +BABEL_OP3_402_36894_20140919_222930_outLine +BABEL_OP3_402_41592_20141118_011026_inLine +BABEL_OP3_402_41592_20141118_011026_outLine +BABEL_OP3_402_41598_20150201_142509_inLine +BABEL_OP3_402_41598_20150201_142509_outLine +BABEL_OP3_402_41745_20141108_162338_inLine +BABEL_OP3_402_41745_20141108_162338_outLine +BABEL_OP3_402_46261_20141112_161528_inLine +BABEL_OP3_402_46261_20141112_161528_outLine +BABEL_OP3_402_49118_20150201_023112_inLine +BABEL_OP3_402_49118_20150201_023112_outLine +BABEL_OP3_402_49437_20150112_204645_inLine +BABEL_OP3_402_49437_20150112_204645_outLine +BABEL_OP3_402_50427_20141119_174123_inLine +BABEL_OP3_402_50427_20141119_174123_outLine +BABEL_OP3_402_50549_20150113_123204_inLine +BABEL_OP3_402_50549_20150113_123204_outLine +BABEL_OP3_402_52490_20140916_192446_inLine +BABEL_OP3_402_52490_20140916_192446_outLine +BABEL_OP3_402_52717_20140923_130849_inLine +BABEL_OP3_402_52717_20140923_130849_outLine +BABEL_OP3_402_54162_20141116_183833_inLine +BABEL_OP3_402_54162_20141116_183833_outLine +BABEL_OP3_402_55968_20140912_204820_inLine +BABEL_OP3_402_55968_20140912_204820_outLine +BABEL_OP3_402_56306_20150103_203751_inLine +BABEL_OP3_402_56306_20150103_203751_outLine +BABEL_OP3_402_61731_20141008_152133_inLine +BABEL_OP3_402_61731_20141008_152133_outLine +BABEL_OP3_402_64494_20141012_193548_inLine +BABEL_OP3_402_64494_20141012_193548_outLine +BABEL_OP3_402_65882_20141102_005627_inLine +BABEL_OP3_402_65882_20141102_005627_outLine +BABEL_OP3_402_66519_20141107_200757_inLine +BABEL_OP3_402_66519_20141107_200757_outLine +BABEL_OP3_402_68068_20150119_135822_inLine +BABEL_OP3_402_68068_20150119_135822_outLine +BABEL_OP3_402_68182_20150111_002528_inLine +BABEL_OP3_402_68182_20150111_002528_outLine +BABEL_OP3_402_68289_20150216_010725_inLine +BABEL_OP3_402_68289_20150216_010725_outLine +BABEL_OP3_402_68385_20140911_143047_inLine +BABEL_OP3_402_68385_20140911_143047_outLine +BABEL_OP3_402_69746_20150110_165836_inLine +BABEL_OP3_402_69746_20150110_165836_outLine +BABEL_OP3_402_70343_20150212_004248_inLine +BABEL_OP3_402_70343_20150212_004248_outLine +BABEL_OP3_402_70386_20141116_170547_inLine +BABEL_OP3_402_70386_20141116_170547_outLine +BABEL_OP3_402_72324_20141201_191618_inLine +BABEL_OP3_402_72324_20141201_191618_outLine +BABEL_OP3_402_73511_20141226_133330_inLine +BABEL_OP3_402_73511_20141226_133330_outLine +BABEL_OP3_402_73837_20141101_183259_inLine +BABEL_OP3_402_73837_20141101_183259_outLine +BABEL_OP3_402_78398_20141107_225319_inLine +BABEL_OP3_402_78398_20141107_225319_outLine +BABEL_OP3_402_78454_20141128_203259_inLine +BABEL_OP3_402_78454_20141128_203259_outLine +BABEL_OP3_402_78604_20141031_181612_inLine +BABEL_OP3_402_78604_20141031_181612_outLine +BABEL_OP3_402_81433_20141121_014829_inLine +BABEL_OP3_402_81433_20141121_014829_outLine +BABEL_OP3_402_81553_20150124_004852_inLine +BABEL_OP3_402_81553_20150124_004852_outLine +BABEL_OP3_402_82935_20150104_005835_inLine +BABEL_OP3_402_82935_20150104_005835_outLine +BABEL_OP3_402_86467_20140920_125939_inLine +BABEL_OP3_402_86467_20140920_125939_outLine +BABEL_OP3_402_86748_20150131_001317_inLine +BABEL_OP3_402_86748_20150131_001317_outLine +BABEL_OP3_402_87921_20141225_203350_inLine +BABEL_OP3_402_87921_20141225_203350_outLine +BABEL_OP3_402_88445_20141205_204305_inLine +BABEL_OP3_402_88445_20141205_204305_outLine +BABEL_OP3_402_89203_20150131_215344_inLine +BABEL_OP3_402_89203_20150131_215344_outLine +BABEL_OP3_402_89457_20141117_212710_inLine +BABEL_OP3_402_89457_20141117_212710_outLine +BABEL_OP3_402_92176_20141216_022926_inLine +BABEL_OP3_402_92176_20141216_022926_outLine +BABEL_OP3_402_92176_20141222_021733_inLine +BABEL_OP3_402_92176_20141222_021733_outLine +BABEL_OP3_402_93632_20150119_150118_inLine +BABEL_OP3_402_93632_20150119_150118_outLine +BABEL_OP3_402_95399_20141111_162356_inLine +BABEL_OP3_402_95399_20141111_162356_outLine +BABEL_OP3_402_96584_20150107_184515_inLine +BABEL_OP3_402_96584_20150107_184515_outLine +BABEL_OP3_402_99401_20141024_202205_inLine +BABEL_OP3_402_99401_20141024_202205_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/dev.list b/egs/babel/s5d/conf/lists/402-javanese/dev.list new file mode 100644 index 00000000000..46233026964 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/dev.list @@ -0,0 +1,122 @@ +BABEL_OP3_402_10184_20141119_194233_inLine +BABEL_OP3_402_10184_20141119_194233_outLine +BABEL_OP3_402_11581_20141124_181058_inLine +BABEL_OP3_402_11581_20141124_181058_outLine +BABEL_OP3_402_15535_20150104_232347_inLine +BABEL_OP3_402_15535_20150104_232347_outLine +BABEL_OP3_402_20133_20140911_170812_inLine +BABEL_OP3_402_20133_20140911_170812_outLine +BABEL_OP3_402_21393_20150304_163256_inLine +BABEL_OP3_402_21393_20150304_163256_outLine +BABEL_OP3_402_21581_20141107_151147_inLine +BABEL_OP3_402_21581_20141107_151147_outLine +BABEL_OP3_402_21807_20141125_194924_inLine +BABEL_OP3_402_21807_20141125_194924_outLine +BABEL_OP3_402_23046_20141103_212247_inLine +BABEL_OP3_402_23046_20141103_212247_outLine +BABEL_OP3_402_23505_20141029_003347_inLine +BABEL_OP3_402_23505_20141029_003347_outLine +BABEL_OP3_402_24982_20141027_223126_inLine +BABEL_OP3_402_24982_20141027_223126_outLine +BABEL_OP3_402_27590_20141227_191710_inLine +BABEL_OP3_402_27590_20141227_191710_outLine +BABEL_OP3_402_27841_20150112_180404_inLine +BABEL_OP3_402_27841_20150112_180404_outLine +BABEL_OP3_402_28012_20150105_215005_inLine +BABEL_OP3_402_28012_20150105_215005_outLine +BABEL_OP3_402_36293_20141001_145552_inLine +BABEL_OP3_402_36293_20141001_145552_outLine +BABEL_OP3_402_36505_20150106_201700_inLine +BABEL_OP3_402_36505_20150106_201700_outLine +BABEL_OP3_402_36894_20140919_222930_inLine +BABEL_OP3_402_36894_20140919_222930_outLine +BABEL_OP3_402_41592_20141118_011026_inLine +BABEL_OP3_402_41592_20141118_011026_outLine +BABEL_OP3_402_41598_20150201_142509_inLine +BABEL_OP3_402_41598_20150201_142509_outLine +BABEL_OP3_402_41745_20141108_162338_inLine +BABEL_OP3_402_41745_20141108_162338_outLine +BABEL_OP3_402_46261_20141112_161528_inLine +BABEL_OP3_402_46261_20141112_161528_outLine +BABEL_OP3_402_49118_20150201_023112_inLine +BABEL_OP3_402_49118_20150201_023112_outLine +BABEL_OP3_402_49437_20150112_204645_inLine +BABEL_OP3_402_49437_20150112_204645_outLine +BABEL_OP3_402_50427_20141119_174123_inLine +BABEL_OP3_402_50427_20141119_174123_outLine +BABEL_OP3_402_50549_20150113_123204_inLine +BABEL_OP3_402_50549_20150113_123204_outLine +BABEL_OP3_402_52490_20140916_192446_inLine +BABEL_OP3_402_52490_20140916_192446_outLine +BABEL_OP3_402_52717_20140923_130849_inLine +BABEL_OP3_402_52717_20140923_130849_outLine +BABEL_OP3_402_54162_20141116_183833_inLine +BABEL_OP3_402_54162_20141116_183833_outLine +BABEL_OP3_402_55968_20140912_204820_inLine +BABEL_OP3_402_55968_20140912_204820_outLine +BABEL_OP3_402_56306_20150103_203751_inLine +BABEL_OP3_402_56306_20150103_203751_outLine +BABEL_OP3_402_61731_20141008_152133_inLine +BABEL_OP3_402_61731_20141008_152133_outLine +BABEL_OP3_402_64494_20141012_193548_inLine +BABEL_OP3_402_64494_20141012_193548_outLine +BABEL_OP3_402_65882_20141102_005627_inLine +BABEL_OP3_402_65882_20141102_005627_outLine +BABEL_OP3_402_66519_20141107_200757_inLine +BABEL_OP3_402_66519_20141107_200757_outLine +BABEL_OP3_402_68068_20150119_135822_inLine +BABEL_OP3_402_68068_20150119_135822_outLine +BABEL_OP3_402_68182_20150111_002528_inLine +BABEL_OP3_402_68182_20150111_002528_outLine +BABEL_OP3_402_68289_20150216_010725_inLine +BABEL_OP3_402_68289_20150216_010725_outLine +BABEL_OP3_402_68385_20140911_143047_inLine +BABEL_OP3_402_68385_20140911_143047_outLine +BABEL_OP3_402_69746_20150110_165836_inLine +BABEL_OP3_402_69746_20150110_165836_outLine +BABEL_OP3_402_70343_20150212_004248_inLine +BABEL_OP3_402_70343_20150212_004248_outLine +BABEL_OP3_402_70386_20141116_170547_inLine +BABEL_OP3_402_70386_20141116_170547_outLine +BABEL_OP3_402_72324_20141201_191618_inLine +BABEL_OP3_402_72324_20141201_191618_outLine +BABEL_OP3_402_73511_20141226_133330_inLine +BABEL_OP3_402_73511_20141226_133330_outLine +BABEL_OP3_402_73837_20141101_183259_inLine +BABEL_OP3_402_73837_20141101_183259_outLine +BABEL_OP3_402_78398_20141107_225319_inLine +BABEL_OP3_402_78398_20141107_225319_outLine +BABEL_OP3_402_78454_20141128_203259_inLine +BABEL_OP3_402_78454_20141128_203259_outLine +BABEL_OP3_402_78604_20141031_181612_inLine +BABEL_OP3_402_78604_20141031_181612_outLine +BABEL_OP3_402_81433_20141121_014829_inLine +BABEL_OP3_402_81433_20141121_014829_outLine +BABEL_OP3_402_81553_20150124_004852_inLine +BABEL_OP3_402_81553_20150124_004852_outLine +BABEL_OP3_402_82935_20150104_005835_inLine +BABEL_OP3_402_82935_20150104_005835_outLine +BABEL_OP3_402_86467_20140920_125939_inLine +BABEL_OP3_402_86467_20140920_125939_outLine +BABEL_OP3_402_86748_20150131_001317_inLine +BABEL_OP3_402_86748_20150131_001317_outLine +BABEL_OP3_402_87921_20141225_203350_inLine +BABEL_OP3_402_87921_20141225_203350_outLine +BABEL_OP3_402_88445_20141205_204305_inLine +BABEL_OP3_402_88445_20141205_204305_outLine +BABEL_OP3_402_89203_20150131_215344_inLine +BABEL_OP3_402_89203_20150131_215344_outLine +BABEL_OP3_402_89457_20141117_212710_inLine +BABEL_OP3_402_89457_20141117_212710_outLine +BABEL_OP3_402_92176_20141216_022926_inLine +BABEL_OP3_402_92176_20141216_022926_outLine +BABEL_OP3_402_92176_20141222_021733_inLine +BABEL_OP3_402_92176_20141222_021733_outLine +BABEL_OP3_402_93632_20150119_150118_inLine +BABEL_OP3_402_93632_20150119_150118_outLine +BABEL_OP3_402_95399_20141111_162356_inLine +BABEL_OP3_402_95399_20141111_162356_outLine +BABEL_OP3_402_96584_20150107_184515_inLine +BABEL_OP3_402_96584_20150107_184515_outLine +BABEL_OP3_402_99401_20141024_202205_inLine +BABEL_OP3_402_99401_20141024_202205_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/eval.list b/egs/babel/s5d/conf/lists/402-javanese/eval.list new file mode 100644 index 00000000000..e0b81487a54 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/eval.list @@ -0,0 +1,188 @@ +BABEL_OP3_402_10036_20141124_025321_inLine +BABEL_OP3_402_10036_20141124_025321_outLine +BABEL_OP3_402_10974_20141130_234329_inLine +BABEL_OP3_402_10974_20141130_234329_outLine +BABEL_OP3_402_12036_20141009_181351_inLine +BABEL_OP3_402_12036_20141009_181351_outLine +BABEL_OP3_402_12321_20150111_184045_inLine +BABEL_OP3_402_12321_20150111_184045_outLine +BABEL_OP3_402_12321_20150111_185205_inLine +BABEL_OP3_402_12321_20150111_185205_outLine +BABEL_OP3_402_13040_20141030_172740_inLine +BABEL_OP3_402_13040_20141030_172740_outLine +BABEL_OP3_402_13490_20141201_021241_inLine +BABEL_OP3_402_13490_20141201_021241_outLine +BABEL_OP3_402_13490_20141204_021416_inLine +BABEL_OP3_402_13490_20141204_021416_outLine +BABEL_OP3_402_14719_20150114_153747_inLine +BABEL_OP3_402_14719_20150114_153747_outLine +BABEL_OP3_402_15024_20141112_173834_inLine +BABEL_OP3_402_15024_20141112_173834_outLine +BABEL_OP3_402_15730_20141001_154550_inLine +BABEL_OP3_402_15730_20141001_154550_outLine +BABEL_OP3_402_16938_20141118_194456_inLine +BABEL_OP3_402_16938_20141118_194456_outLine +BABEL_OP3_402_17165_20141115_171729_inLine +BABEL_OP3_402_17165_20141115_171729_outLine +BABEL_OP3_402_19749_20150130_162450_inLine +BABEL_OP3_402_19749_20150130_162450_outLine +BABEL_OP3_402_19818_20141213_194147_inLine +BABEL_OP3_402_19818_20141213_194147_outLine +BABEL_OP3_402_21206_20141019_210210_inLine +BABEL_OP3_402_21206_20141019_210210_outLine +BABEL_OP3_402_23395_20141120_192928_inLine +BABEL_OP3_402_23395_20141120_192928_outLine +BABEL_OP3_402_23628_20141123_183457_inLine +BABEL_OP3_402_23628_20141123_183457_outLine +BABEL_OP3_402_26388_20141031_184504_inLine +BABEL_OP3_402_26388_20141031_184504_outLine +BABEL_OP3_402_28419_20141107_004309_inLine +BABEL_OP3_402_28419_20141107_004309_outLine +BABEL_OP3_402_28814_20150108_133247_inLine +BABEL_OP3_402_28814_20150108_133247_outLine +BABEL_OP3_402_29023_20141016_155119_inLine +BABEL_OP3_402_29023_20141016_155119_outLine +BABEL_OP3_402_30395_20141126_165430_inLine +BABEL_OP3_402_30395_20141126_165430_outLine +BABEL_OP3_402_30653_20150301_200332_inLine +BABEL_OP3_402_30653_20150301_200332_outLine +BABEL_OP3_402_33497_20141228_021512_inLine +BABEL_OP3_402_33497_20141228_021512_outLine +BABEL_OP3_402_33497_20141228_022418_inLine +BABEL_OP3_402_33497_20141228_022418_outLine +BABEL_OP3_402_34197_20140926_180505_inLine +BABEL_OP3_402_34197_20140926_180505_outLine +BABEL_OP3_402_35202_20150201_211802_inLine +BABEL_OP3_402_35202_20150201_211802_outLine +BABEL_OP3_402_36669_20141112_195148_inLine +BABEL_OP3_402_36669_20141112_195148_outLine +BABEL_OP3_402_36990_20141114_221330_inLine +BABEL_OP3_402_36990_20141114_221330_outLine +BABEL_OP3_402_38664_20141123_163506_inLine +BABEL_OP3_402_38664_20141123_163506_outLine +BABEL_OP3_402_38741_20141020_160936_inLine +BABEL_OP3_402_38741_20141020_160936_outLine +BABEL_OP3_402_40713_20141019_145210_inLine +BABEL_OP3_402_40713_20141019_145210_outLine +BABEL_OP3_402_44347_20150111_142153_inLine +BABEL_OP3_402_44347_20150111_142153_outLine +BABEL_OP3_402_44420_20141031_175058_inLine +BABEL_OP3_402_44420_20141031_175058_outLine +BABEL_OP3_402_44531_20150302_195023_inLine +BABEL_OP3_402_44531_20150302_195023_outLine +BABEL_OP3_402_45642_20140923_154729_inLine +BABEL_OP3_402_45642_20140923_154729_outLine +BABEL_OP3_402_46681_20141013_161421_inLine +BABEL_OP3_402_46681_20141013_161421_outLine +BABEL_OP3_402_46976_20141119_183300_inLine +BABEL_OP3_402_46976_20141119_183300_outLine +BABEL_OP3_402_49775_20140915_151515_inLine +BABEL_OP3_402_49775_20140915_151515_outLine +BABEL_OP3_402_51407_20141228_213554_inLine +BABEL_OP3_402_51407_20141228_213554_outLine +BABEL_OP3_402_51955_20141103_200423_inLine +BABEL_OP3_402_51955_20141103_200423_outLine +BABEL_OP3_402_52694_20141123_140609_inLine +BABEL_OP3_402_52694_20141123_140609_outLine +BABEL_OP3_402_53419_20141226_140523_inLine +BABEL_OP3_402_53419_20141226_140523_outLine +BABEL_OP3_402_53917_20150201_201004_inLine +BABEL_OP3_402_53917_20150201_201004_outLine +BABEL_OP3_402_54841_20150108_004608_inLine +BABEL_OP3_402_54841_20150108_004608_outLine +BABEL_OP3_402_56743_20141108_140926_inLine +BABEL_OP3_402_56743_20141108_140926_outLine +BABEL_OP3_402_56826_20141224_134149_inLine +BABEL_OP3_402_56826_20141224_134149_outLine +BABEL_OP3_402_58103_20141104_192009_inLine +BABEL_OP3_402_58103_20141104_192009_outLine +BABEL_OP3_402_58926_20141014_174318_inLine +BABEL_OP3_402_58926_20141014_174318_outLine +BABEL_OP3_402_59091_20150104_000026_inLine +BABEL_OP3_402_59091_20150104_000026_outLine +BABEL_OP3_402_59928_20140929_174836_inLine +BABEL_OP3_402_59928_20140929_174836_outLine +BABEL_OP3_402_59993_20141103_183340_inLine +BABEL_OP3_402_59993_20141103_183340_outLine +BABEL_OP3_402_60626_20141019_135020_inLine +BABEL_OP3_402_60626_20141019_135020_outLine +BABEL_OP3_402_61011_20141003_131410_inLine +BABEL_OP3_402_61011_20141003_131410_outLine +BABEL_OP3_402_61190_20141102_132003_inLine +BABEL_OP3_402_61190_20141102_132003_outLine +BABEL_OP3_402_61225_20140912_171906_inLine +BABEL_OP3_402_61225_20140912_171906_outLine +BABEL_OP3_402_63604_20141101_235656_inLine +BABEL_OP3_402_63604_20141101_235656_outLine +BABEL_OP3_402_64638_20141214_234141_inLine +BABEL_OP3_402_64638_20141214_234141_outLine +BABEL_OP3_402_66967_20140917_153139_inLine +BABEL_OP3_402_66967_20140917_153139_outLine +BABEL_OP3_402_69474_20150111_235831_inLine +BABEL_OP3_402_69474_20150111_235831_outLine +BABEL_OP3_402_71047_20150107_194822_inLine +BABEL_OP3_402_71047_20150107_194822_outLine +BABEL_OP3_402_72007_20141219_183621_inLine +BABEL_OP3_402_72007_20141219_183621_outLine +BABEL_OP3_402_73042_20141013_175542_inLine +BABEL_OP3_402_73042_20141013_175542_outLine +BABEL_OP3_402_73072_20140923_135906_inLine +BABEL_OP3_402_73072_20140923_135906_outLine +BABEL_OP3_402_74226_20141220_000133_inLine +BABEL_OP3_402_74226_20141220_000133_outLine +BABEL_OP3_402_74280_20140915_174124_inLine +BABEL_OP3_402_74280_20140915_174124_outLine +BABEL_OP3_402_76126_20141224_141342_inLine +BABEL_OP3_402_76126_20141224_141342_outLine +BABEL_OP3_402_77033_20150108_180731_inLine +BABEL_OP3_402_77033_20150108_180731_outLine +BABEL_OP3_402_77112_20140929_201352_inLine +BABEL_OP3_402_77112_20140929_201352_outLine +BABEL_OP3_402_77391_20141102_204007_inLine +BABEL_OP3_402_77391_20141102_204007_outLine +BABEL_OP3_402_77567_20140920_134449_inLine +BABEL_OP3_402_77567_20140920_134449_outLine +BABEL_OP3_402_77730_20141021_174646_inLine +BABEL_OP3_402_77730_20141021_174646_outLine +BABEL_OP3_402_78544_20141215_000405_inLine +BABEL_OP3_402_78544_20141215_000405_outLine +BABEL_OP3_402_79505_20150227_172147_inLine +BABEL_OP3_402_79505_20150227_172147_outLine +BABEL_OP3_402_81622_20141115_215444_inLine +BABEL_OP3_402_81622_20141115_215444_outLine +BABEL_OP3_402_82145_20150108_195326_inLine +BABEL_OP3_402_82145_20150108_195326_outLine +BABEL_OP3_402_82863_20141114_212757_inLine +BABEL_OP3_402_82863_20141114_212757_outLine +BABEL_OP3_402_84583_20141123_201337_inLine +BABEL_OP3_402_84583_20141123_201337_outLine +BABEL_OP3_402_87074_20141030_183257_inLine +BABEL_OP3_402_87074_20141030_183257_outLine +BABEL_OP3_402_87298_20141103_203537_inLine +BABEL_OP3_402_87298_20141103_203537_outLine +BABEL_OP3_402_88372_20150201_000904_inLine +BABEL_OP3_402_88372_20150201_000904_outLine +BABEL_OP3_402_88982_20141130_182335_inLine +BABEL_OP3_402_88982_20141130_182335_outLine +BABEL_OP3_402_91336_20141122_023555_inLine +BABEL_OP3_402_91336_20141122_023555_outLine +BABEL_OP3_402_92792_20150227_162129_inLine +BABEL_OP3_402_92792_20150227_162129_outLine +BABEL_OP3_402_93411_20141120_155834_inLine +BABEL_OP3_402_93411_20141120_155834_outLine +BABEL_OP3_402_94978_20150107_204930_inLine +BABEL_OP3_402_94978_20150107_204930_outLine +BABEL_OP3_402_95663_20141103_142815_inLine +BABEL_OP3_402_95663_20141103_142815_outLine +BABEL_OP3_402_96405_20141006_202624_inLine +BABEL_OP3_402_96405_20141006_202624_outLine +BABEL_OP3_402_96730_20150110_161027_inLine +BABEL_OP3_402_96730_20150110_161027_outLine +BABEL_OP3_402_96934_20141101_192258_inLine +BABEL_OP3_402_96934_20141101_192258_outLine +BABEL_OP3_402_97376_20141221_191608_inLine +BABEL_OP3_402_97376_20141221_191608_outLine +BABEL_OP3_402_97604_20150121_010739_inLine +BABEL_OP3_402_97604_20150121_010739_outLine +BABEL_OP3_402_98489_20141028_122528_inLine +BABEL_OP3_402_98489_20141028_122528_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/sub-train.list b/egs/babel/s5d/conf/lists/402-javanese/sub-train.list new file mode 100644 index 00000000000..58306104f42 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/sub-train.list @@ -0,0 +1,122 @@ +BABEL_OP3_402_16184_20141120_143943_inLine +BABEL_OP3_402_16184_20141120_143943_outLine +BABEL_OP3_402_16749_20150110_182247_inLine +BABEL_OP3_402_16749_20150110_182247_outLine +BABEL_OP3_402_17914_20150107_192833_inLine +BABEL_OP3_402_17914_20150107_192833_outLine +BABEL_OP3_402_20738_20150201_004014_inLine +BABEL_OP3_402_20738_20150201_004014_outLine +BABEL_OP3_402_20768_20150110_125415_inLine +BABEL_OP3_402_20768_20150110_125415_outLine +BABEL_OP3_402_20985_20141209_223858_inLine +BABEL_OP3_402_20985_20141209_223858_outLine +BABEL_OP3_402_21794_20141110_000434_inLine +BABEL_OP3_402_21794_20141110_000434_outLine +BABEL_OP3_402_22494_20150127_212514_inLine +BABEL_OP3_402_22494_20150127_212514_outLine +BABEL_OP3_402_24270_20141127_181536_inLine +BABEL_OP3_402_24270_20141127_181536_outLine +BABEL_OP3_402_31346_20150106_163812_inLine +BABEL_OP3_402_31346_20150106_163812_outLine +BABEL_OP3_402_31346_20150107_000948_inLine +BABEL_OP3_402_31346_20150107_000948_outLine +BABEL_OP3_402_31992_20141104_154739_inLine +BABEL_OP3_402_31992_20141104_154739_outLine +BABEL_OP3_402_34336_20141101_214014_inLine +BABEL_OP3_402_34336_20141101_214014_outLine +BABEL_OP3_402_34477_20141103_012729_inLine +BABEL_OP3_402_34477_20141103_012729_outLine +BABEL_OP3_402_34564_20150110_174105_inLine +BABEL_OP3_402_34564_20150110_174105_outLine +BABEL_OP3_402_38431_20150104_193523_inLine +BABEL_OP3_402_38431_20150104_193523_outLine +BABEL_OP3_402_39059_20150201_151819_inLine +BABEL_OP3_402_39059_20150201_151819_outLine +BABEL_OP3_402_41680_20140911_133458_inLine +BABEL_OP3_402_41680_20140911_133458_outLine +BABEL_OP3_402_43784_20141027_205748_inLine +BABEL_OP3_402_43784_20141027_205748_outLine +BABEL_OP3_402_45536_20150131_234119_inLine +BABEL_OP3_402_45536_20150131_234119_outLine +BABEL_OP3_402_46688_20140927_210143_inLine +BABEL_OP3_402_46688_20140927_210143_outLine +BABEL_OP3_402_48243_20141031_160102_inLine +BABEL_OP3_402_48243_20141031_160102_outLine +BABEL_OP3_402_49197_20141123_183541_inLine +BABEL_OP3_402_49197_20141123_183541_outLine +BABEL_OP3_402_49502_20150201_200343_inLine +BABEL_OP3_402_49502_20150201_200343_outLine +BABEL_OP3_402_50779_20141124_211935_inLine +BABEL_OP3_402_50779_20141124_211935_outLine +BABEL_OP3_402_50962_20141004_143222_inLine +BABEL_OP3_402_50962_20141004_143222_outLine +BABEL_OP3_402_51015_20141209_214156_inLine +BABEL_OP3_402_51015_20141209_214156_outLine +BABEL_OP3_402_52246_20141115_174547_inLine +BABEL_OP3_402_52246_20141115_174547_outLine +BABEL_OP3_402_54074_20141110_001507_inLine +BABEL_OP3_402_54074_20141110_001507_outLine +BABEL_OP3_402_56198_20141103_152946_inLine +BABEL_OP3_402_56198_20141103_152946_outLine +BABEL_OP3_402_57065_20141213_175712_inLine +BABEL_OP3_402_57065_20141213_175712_outLine +BABEL_OP3_402_58313_20141121_191107_inLine +BABEL_OP3_402_58313_20141121_191107_outLine +BABEL_OP3_402_58489_20150110_155118_inLine +BABEL_OP3_402_58489_20150110_155118_outLine +BABEL_OP3_402_59078_20141127_201549_inLine +BABEL_OP3_402_59078_20141127_201549_outLine +BABEL_OP3_402_64768_20141116_180927_inLine +BABEL_OP3_402_64768_20141116_180927_outLine +BABEL_OP3_402_64796_20141122_163640_inLine +BABEL_OP3_402_64796_20141122_163640_outLine +BABEL_OP3_402_65367_20150103_224736_inLine +BABEL_OP3_402_65367_20150103_224736_outLine +BABEL_OP3_402_65692_20141228_202914_inLine +BABEL_OP3_402_65692_20141228_202914_outLine +BABEL_OP3_402_66177_20150131_201057_inLine +BABEL_OP3_402_66177_20150131_201057_outLine +BABEL_OP3_402_70221_20141222_002645_inLine +BABEL_OP3_402_70221_20141222_002645_outLine +BABEL_OP3_402_73119_20141031_182314_inLine +BABEL_OP3_402_73119_20141031_182314_outLine +BABEL_OP3_402_73301_20141117_004450_inLine +BABEL_OP3_402_73301_20141117_004450_outLine +BABEL_OP3_402_76444_20141227_143452_inLine +BABEL_OP3_402_76444_20141227_143452_outLine +BABEL_OP3_402_76683_20141128_201732_inLine +BABEL_OP3_402_76683_20141128_201732_outLine +BABEL_OP3_402_78116_20141229_210212_inLine +BABEL_OP3_402_78116_20141229_210212_outLine +BABEL_OP3_402_78254_20141101_235022_inLine +BABEL_OP3_402_78254_20141101_235022_outLine +BABEL_OP3_402_79139_20141115_153558_inLine +BABEL_OP3_402_79139_20141115_153558_outLine +BABEL_OP3_402_81229_20141116_224932_inLine +BABEL_OP3_402_81229_20141116_224932_outLine +BABEL_OP3_402_81427_20141110_165047_inLine +BABEL_OP3_402_81427_20141110_165047_outLine +BABEL_OP3_402_82089_20141113_162038_inLine +BABEL_OP3_402_82089_20141113_162038_outLine +BABEL_OP3_402_83651_20141009_145412_inLine +BABEL_OP3_402_83651_20141009_145412_outLine +BABEL_OP3_402_85048_20141204_194855_inLine +BABEL_OP3_402_85048_20141204_194855_outLine +BABEL_OP3_402_85340_20141021_182050_inLine +BABEL_OP3_402_85340_20141021_182050_outLine +BABEL_OP3_402_86713_20150101_014831_inLine +BABEL_OP3_402_86713_20150101_014831_outLine +BABEL_OP3_402_87073_20140915_154336_inLine +BABEL_OP3_402_87073_20140915_154336_outLine +BABEL_OP3_402_87871_20141224_130949_inLine +BABEL_OP3_402_87871_20141224_130949_outLine +BABEL_OP3_402_88601_20141209_160621_inLine +BABEL_OP3_402_88601_20141209_160621_outLine +BABEL_OP3_402_93604_20150304_152208_inLine +BABEL_OP3_402_93604_20150304_152208_outLine +BABEL_OP3_402_93964_20141216_021155_inLine +BABEL_OP3_402_93964_20141216_021155_outLine +BABEL_OP3_402_94869_20140912_195117_inLine +BABEL_OP3_402_94869_20140912_195117_outLine +BABEL_OP3_402_95446_20150110_150658_inLine +BABEL_OP3_402_95446_20150110_150658_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list new file mode 100644 index 00000000000..4f81d9daca4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list @@ -0,0 +1,370 @@ +BABEL_OP3_402_10416_20141126_133029_inLine +BABEL_OP3_402_10416_20141126_133029_outLine +BABEL_OP3_402_10901_20141116_141701_inLine +BABEL_OP3_402_10901_20141116_141701_outLine +BABEL_OP3_402_12220_20141106_021950_inLine +BABEL_OP3_402_12220_20141106_021950_outLine +BABEL_OP3_402_12767_20140924_184905_inLine +BABEL_OP3_402_12767_20140924_184905_outLine +BABEL_OP3_402_13030_20141107_173701_inLine +BABEL_OP3_402_13030_20141107_173701_outLine +BABEL_OP3_402_13664_20140911_160207_inLine +BABEL_OP3_402_13664_20140911_160207_outLine +BABEL_OP3_402_13709_20150131_161040_inLine +BABEL_OP3_402_13709_20150131_161040_outLine +BABEL_OP3_402_14141_20150215_162503_inLine +BABEL_OP3_402_14141_20150215_162503_outLine +BABEL_OP3_402_14229_20141108_200257_inLine +BABEL_OP3_402_14229_20141108_200257_outLine +BABEL_OP3_402_14350_20141104_165111_inLine +BABEL_OP3_402_14350_20141104_165111_outLine +BABEL_OP3_402_14807_20141126_174048_inLine +BABEL_OP3_402_14807_20141126_174048_outLine +BABEL_OP3_402_14875_20140929_193054_inLine +BABEL_OP3_402_14875_20140929_193054_outLine +BABEL_OP3_402_14899_20140925_165651_inLine +BABEL_OP3_402_14899_20140925_165651_outLine +BABEL_OP3_402_14929_20141110_005633_inLine +BABEL_OP3_402_14929_20141110_005633_outLine +BABEL_OP3_402_14972_20141123_182012_inLine +BABEL_OP3_402_14972_20141123_182012_outLine +BABEL_OP3_402_15163_20141123_152731_inLine +BABEL_OP3_402_15163_20141123_152731_outLine +BABEL_OP3_402_15262_20140922_152302_inLine +BABEL_OP3_402_15262_20140922_152302_outLine +BABEL_OP3_402_15749_20150105_125933_inLine +BABEL_OP3_402_15749_20150105_125933_outLine +BABEL_OP3_402_16787_20141107_025835_inLine +BABEL_OP3_402_16787_20141107_025835_outLine +BABEL_OP3_402_17520_20141123_170854_inLine +BABEL_OP3_402_17520_20141123_170854_outLine +BABEL_OP3_402_17890_20150108_163627_inLine +BABEL_OP3_402_17890_20150108_163627_outLine +BABEL_OP3_402_18380_20141113_173424_inLine +BABEL_OP3_402_18380_20141113_173424_outLine +BABEL_OP3_402_19134_20141130_163804_inLine +BABEL_OP3_402_19134_20141130_163804_outLine +BABEL_OP3_402_19621_20141123_181810_inLine +BABEL_OP3_402_19621_20141123_181810_outLine +BABEL_OP3_402_19672_20141208_162907_inLine +BABEL_OP3_402_19672_20141208_162907_outLine +BABEL_OP3_402_19703_20141102_190851_inLine +BABEL_OP3_402_19703_20141102_190851_outLine +BABEL_OP3_402_20330_20150131_162055_inLine +BABEL_OP3_402_20330_20150131_162055_outLine +BABEL_OP3_402_20800_20141013_185736_inLine +BABEL_OP3_402_20800_20141013_185736_outLine +BABEL_OP3_402_20922_20150131_235414_inLine +BABEL_OP3_402_20922_20150131_235414_outLine +BABEL_OP3_402_21004_20150108_210410_inLine +BABEL_OP3_402_21004_20150108_210410_outLine +BABEL_OP3_402_22170_20150108_185847_inLine +BABEL_OP3_402_22170_20150108_185847_outLine +BABEL_OP3_402_23151_20150110_202409_inLine +BABEL_OP3_402_23151_20150110_202409_outLine +BABEL_OP3_402_23731_20141120_162409_inLine +BABEL_OP3_402_23731_20141120_162409_outLine +BABEL_OP3_402_23731_20141120_163618_inLine +BABEL_OP3_402_23731_20141120_163618_outLine +BABEL_OP3_402_24323_20141111_182649_inLine +BABEL_OP3_402_24323_20141111_182649_outLine +BABEL_OP3_402_24470_20141205_154028_inLine +BABEL_OP3_402_24470_20141205_154028_outLine +BABEL_OP3_402_24589_20141106_144156_inLine +BABEL_OP3_402_24589_20141106_144156_outLine +BABEL_OP3_402_25412_20141128_212603_inLine +BABEL_OP3_402_25412_20141128_212603_outLine +BABEL_OP3_402_26072_20150131_110154_inLine +BABEL_OP3_402_26072_20150131_110154_outLine +BABEL_OP3_402_26398_20150304_162600_inLine +BABEL_OP3_402_26398_20150304_162600_outLine +BABEL_OP3_402_28303_20141122_153440_inLine +BABEL_OP3_402_28303_20141122_153440_outLine +BABEL_OP3_402_29021_20150131_010036_inLine +BABEL_OP3_402_29021_20150131_010036_outLine +BABEL_OP3_402_29076_20141121_164742_inLine +BABEL_OP3_402_29076_20141121_164742_outLine +BABEL_OP3_402_29168_20140926_164602_inLine +BABEL_OP3_402_29168_20140926_164602_outLine +BABEL_OP3_402_29323_20150108_000937_inLine +BABEL_OP3_402_29323_20150108_000937_outLine +BABEL_OP3_402_30250_20140929_162020_inLine +BABEL_OP3_402_30250_20140929_162020_outLine +BABEL_OP3_402_31184_20141112_204308_inLine +BABEL_OP3_402_31184_20141112_204308_outLine +BABEL_OP3_402_31624_20141017_204521_inLine +BABEL_OP3_402_31624_20141017_204521_outLine +BABEL_OP3_402_32708_20141127_210435_inLine +BABEL_OP3_402_32708_20141127_210435_outLine +BABEL_OP3_402_32832_20150214_160609_inLine +BABEL_OP3_402_32832_20150214_160609_outLine +BABEL_OP3_402_32837_20150114_173357_inLine +BABEL_OP3_402_32837_20150114_173357_outLine +BABEL_OP3_402_33175_20141011_151643_inLine +BABEL_OP3_402_33175_20141011_151643_outLine +BABEL_OP3_402_33355_20141222_030242_inLine +BABEL_OP3_402_33355_20141222_030242_outLine +BABEL_OP3_402_33704_20150108_121853_inLine +BABEL_OP3_402_33704_20150108_121853_outLine +BABEL_OP3_402_33951_20141115_015656_inLine +BABEL_OP3_402_33951_20141115_015656_outLine +BABEL_OP3_402_34679_20141012_230850_inLine +BABEL_OP3_402_34679_20141012_230850_outLine +BABEL_OP3_402_34688_20141027_170150_inLine +BABEL_OP3_402_34688_20141027_170150_outLine +BABEL_OP3_402_35069_20150216_023523_inLine +BABEL_OP3_402_35069_20150216_023523_outLine +BABEL_OP3_402_35583_20150121_013548_inLine +BABEL_OP3_402_35583_20150121_013548_outLine +BABEL_OP3_402_37228_20150120_211131_inLine +BABEL_OP3_402_37228_20150120_211131_outLine +BABEL_OP3_402_37281_20141110_214558_inLine +BABEL_OP3_402_37281_20141110_214558_outLine +BABEL_OP3_402_37682_20141103_210556_inLine +BABEL_OP3_402_37682_20141103_210556_outLine +BABEL_OP3_402_37853_20150107_154609_inLine +BABEL_OP3_402_37853_20150107_154609_outLine +BABEL_OP3_402_38340_20141020_170141_inLine +BABEL_OP3_402_38340_20141020_170141_outLine +BABEL_OP3_402_39159_20140930_201318_inLine +BABEL_OP3_402_39159_20140930_201318_outLine +BABEL_OP3_402_39426_20150202_103633_inLine +BABEL_OP3_402_39426_20150202_103633_outLine +BABEL_OP3_402_39680_20150131_151358_inLine +BABEL_OP3_402_39680_20150131_151358_outLine +BABEL_OP3_402_39920_20150216_014707_inLine +BABEL_OP3_402_39920_20150216_014707_outLine +BABEL_OP3_402_41109_20150101_021923_inLine +BABEL_OP3_402_41109_20150101_021923_outLine +BABEL_OP3_402_43239_20150205_011521_inLine +BABEL_OP3_402_43239_20150205_011521_outLine +BABEL_OP3_402_43368_20141107_210043_inLine +BABEL_OP3_402_43368_20141107_210043_outLine +BABEL_OP3_402_43920_20141228_001637_inLine +BABEL_OP3_402_43920_20141228_001637_outLine +BABEL_OP3_402_44255_20150131_183155_inLine +BABEL_OP3_402_44255_20150131_183155_outLine +BABEL_OP3_402_44961_20140921_154533_inLine +BABEL_OP3_402_44961_20140921_154533_outLine +BABEL_OP3_402_46702_20140929_141902_inLine +BABEL_OP3_402_46702_20140929_141902_outLine +BABEL_OP3_402_46770_20150124_001351_inLine +BABEL_OP3_402_46770_20150124_001351_outLine +BABEL_OP3_402_46881_20141028_192343_inLine +BABEL_OP3_402_46881_20141028_192343_outLine +BABEL_OP3_402_47270_20150128_163211_inLine +BABEL_OP3_402_47270_20150128_163211_outLine +BABEL_OP3_402_48422_20150101_193320_inLine +BABEL_OP3_402_48422_20150101_193320_outLine +BABEL_OP3_402_48422_20150101_194803_inLine +BABEL_OP3_402_48422_20150101_194803_outLine +BABEL_OP3_402_48610_20140920_172026_inLine +BABEL_OP3_402_48610_20140920_172026_outLine +BABEL_OP3_402_48789_20141113_181720_inLine +BABEL_OP3_402_48789_20141113_181720_outLine +BABEL_OP3_402_49001_20141010_142908_inLine +BABEL_OP3_402_49001_20141010_142908_outLine +BABEL_OP3_402_49001_20141010_152312_inLine +BABEL_OP3_402_49001_20141010_152312_outLine +BABEL_OP3_402_49907_20141006_162735_inLine +BABEL_OP3_402_49907_20141006_162735_outLine +BABEL_OP3_402_50601_20141121_182643_inLine +BABEL_OP3_402_50601_20141121_182643_outLine +BABEL_OP3_402_50810_20140912_181008_inLine +BABEL_OP3_402_50810_20140912_181008_outLine +BABEL_OP3_402_51540_20150131_203108_inLine +BABEL_OP3_402_51540_20150131_203108_outLine +BABEL_OP3_402_51611_20141010_163542_inLine +BABEL_OP3_402_51611_20141010_163542_outLine +BABEL_OP3_402_51968_20141109_154701_inLine +BABEL_OP3_402_51968_20141109_154701_outLine +BABEL_OP3_402_52422_20150128_142229_inLine +BABEL_OP3_402_52422_20150128_142229_outLine +BABEL_OP3_402_52854_20140910_200850_inLine +BABEL_OP3_402_52854_20140910_200850_outLine +BABEL_OP3_402_52932_20141007_182635_inLine +BABEL_OP3_402_52932_20141007_182635_outLine +BABEL_OP3_402_54104_20141104_173741_inLine +BABEL_OP3_402_54104_20141104_173741_outLine +BABEL_OP3_402_54405_20141123_173044_inLine +BABEL_OP3_402_54405_20141123_173044_outLine +BABEL_OP3_402_55267_20141221_184118_inLine +BABEL_OP3_402_55267_20141221_184118_outLine +BABEL_OP3_402_56720_20141228_190653_inLine +BABEL_OP3_402_56720_20141228_190653_outLine +BABEL_OP3_402_57650_20150107_171335_inLine +BABEL_OP3_402_57650_20150107_171335_outLine +BABEL_OP3_402_57654_20141031_172711_inLine +BABEL_OP3_402_57654_20141031_172711_outLine +BABEL_OP3_402_57922_20141130_172609_inLine +BABEL_OP3_402_57922_20141130_172609_outLine +BABEL_OP3_402_58850_20141115_223848_inLine +BABEL_OP3_402_58850_20141115_223848_outLine +BABEL_OP3_402_59402_20150103_181612_inLine +BABEL_OP3_402_59402_20150103_181612_outLine +BABEL_OP3_402_60418_20141219_231820_inLine +BABEL_OP3_402_60418_20141219_231820_outLine +BABEL_OP3_402_60474_20141101_195523_inLine +BABEL_OP3_402_60474_20141101_195523_outLine +BABEL_OP3_402_61167_20141106_195710_inLine +BABEL_OP3_402_61167_20141106_195710_outLine +BABEL_OP3_402_61219_20141101_192955_inLine +BABEL_OP3_402_61219_20141101_192955_outLine +BABEL_OP3_402_61888_20150108_210230_inLine +BABEL_OP3_402_61888_20150108_210230_outLine +BABEL_OP3_402_62456_20141203_005134_inLine +BABEL_OP3_402_62456_20141203_005134_outLine +BABEL_OP3_402_62800_20141028_170241_inLine +BABEL_OP3_402_62800_20141028_170241_outLine +BABEL_OP3_402_62810_20140917_184635_inLine +BABEL_OP3_402_62810_20140917_184635_outLine +BABEL_OP3_402_63081_20141003_151638_inLine +BABEL_OP3_402_63081_20141003_151638_outLine +BABEL_OP3_402_64014_20150108_162849_inLine +BABEL_OP3_402_64014_20150108_162849_outLine +BABEL_OP3_402_64065_20141020_152452_inLine +BABEL_OP3_402_64065_20141020_152452_outLine +BABEL_OP3_402_64870_20141228_184201_inLine +BABEL_OP3_402_64870_20141228_184201_outLine +BABEL_OP3_402_65064_20141125_162638_inLine +BABEL_OP3_402_65064_20141125_162638_outLine +BABEL_OP3_402_65298_20150130_232120_inLine +BABEL_OP3_402_65298_20150130_232120_outLine +BABEL_OP3_402_65723_20141022_231832_inLine +BABEL_OP3_402_65723_20141022_231832_outLine +BABEL_OP3_402_66001_20140921_123931_inLine +BABEL_OP3_402_66001_20140921_123931_outLine +BABEL_OP3_402_66045_20141115_162944_inLine +BABEL_OP3_402_66045_20141115_162944_outLine +BABEL_OP3_402_67152_20150107_163104_inLine +BABEL_OP3_402_67152_20150107_163104_outLine +BABEL_OP3_402_67373_20141014_152719_inLine +BABEL_OP3_402_67373_20141014_152719_outLine +BABEL_OP3_402_68627_20141107_033600_inLine +BABEL_OP3_402_68627_20141107_033600_outLine +BABEL_OP3_402_69107_20141123_145802_inLine +BABEL_OP3_402_69107_20141123_145802_outLine +BABEL_OP3_402_69574_20140915_170204_inLine +BABEL_OP3_402_69574_20140915_170204_outLine +BABEL_OP3_402_70282_20141128_162640_inLine +BABEL_OP3_402_70282_20141128_162640_outLine +BABEL_OP3_402_70601_20141104_190522_inLine +BABEL_OP3_402_70601_20141104_190522_outLine +BABEL_OP3_402_70794_20141122_201302_inLine +BABEL_OP3_402_70794_20141122_201302_outLine +BABEL_OP3_402_71566_20150109_002519_inLine +BABEL_OP3_402_71566_20150109_002519_outLine +BABEL_OP3_402_71704_20141030_192615_inLine +BABEL_OP3_402_71704_20141030_192615_outLine +BABEL_OP3_402_72844_20150216_194719_inLine +BABEL_OP3_402_72844_20150216_194719_outLine +BABEL_OP3_402_73022_20150103_135209_inLine +BABEL_OP3_402_73022_20150103_135209_outLine +BABEL_OP3_402_73757_20141115_190524_inLine +BABEL_OP3_402_73757_20141115_190524_outLine +BABEL_OP3_402_74111_20150102_112305_inLine +BABEL_OP3_402_74111_20150102_112305_outLine +BABEL_OP3_402_74455_20150201_180158_inLine +BABEL_OP3_402_74455_20150201_180158_outLine +BABEL_OP3_402_74799_20141129_202734_inLine +BABEL_OP3_402_74799_20141129_202734_outLine +BABEL_OP3_402_75764_20150202_000719_inLine +BABEL_OP3_402_75764_20150202_000719_outLine +BABEL_OP3_402_75993_20141021_183118_inLine +BABEL_OP3_402_75993_20141021_183118_outLine +BABEL_OP3_402_78360_20150131_163647_inLine +BABEL_OP3_402_78360_20150131_163647_outLine +BABEL_OP3_402_78630_20140930_135924_inLine +BABEL_OP3_402_78630_20140930_135924_outLine +BABEL_OP3_402_79751_20141104_200346_inLine +BABEL_OP3_402_79751_20141104_200346_outLine +BABEL_OP3_402_79751_20141104_201600_inLine +BABEL_OP3_402_79751_20141104_201600_outLine +BABEL_OP3_402_80439_20141104_195124_inLine +BABEL_OP3_402_80439_20141104_195124_outLine +BABEL_OP3_402_82224_20150101_162311_inLine +BABEL_OP3_402_82224_20150101_162311_outLine +BABEL_OP3_402_82637_20141006_173314_inLine +BABEL_OP3_402_82637_20141006_173314_outLine +BABEL_OP3_402_83238_20141122_140740_inLine +BABEL_OP3_402_83238_20141122_140740_outLine +BABEL_OP3_402_83436_20141017_162042_inLine +BABEL_OP3_402_83436_20141017_162042_outLine +BABEL_OP3_402_84061_20141107_162356_inLine +BABEL_OP3_402_84061_20141107_162356_outLine +BABEL_OP3_402_84611_20141023_205020_inLine +BABEL_OP3_402_84611_20141023_205020_outLine +BABEL_OP3_402_84737_20150129_233418_inLine +BABEL_OP3_402_84737_20150129_233418_outLine +BABEL_OP3_402_84815_20141225_185456_inLine +BABEL_OP3_402_84815_20141225_185456_outLine +BABEL_OP3_402_85248_20150109_001722_inLine +BABEL_OP3_402_85248_20150109_001722_outLine +BABEL_OP3_402_86191_20141105_130254_inLine +BABEL_OP3_402_86191_20141105_130254_outLine +BABEL_OP3_402_86722_20141101_204411_inLine +BABEL_OP3_402_86722_20141101_204411_outLine +BABEL_OP3_402_86952_20141105_144737_inLine +BABEL_OP3_402_86952_20141105_144737_outLine +BABEL_OP3_402_87179_20150203_020351_inLine +BABEL_OP3_402_87179_20150203_020351_outLine +BABEL_OP3_402_88776_20140921_133554_inLine +BABEL_OP3_402_88776_20140921_133554_outLine +BABEL_OP3_402_88873_20140930_131622_inLine +BABEL_OP3_402_88873_20140930_131622_outLine +BABEL_OP3_402_89794_20141213_211839_inLine +BABEL_OP3_402_89794_20141213_211839_outLine +BABEL_OP3_402_89877_20150107_013739_inLine +BABEL_OP3_402_89877_20150107_013739_outLine +BABEL_OP3_402_89877_20150107_014426_inLine +BABEL_OP3_402_89877_20150107_014426_outLine +BABEL_OP3_402_90777_20141106_234557_inLine +BABEL_OP3_402_90777_20141106_234557_outLine +BABEL_OP3_402_91884_20150302_183207_inLine +BABEL_OP3_402_91884_20150302_183207_outLine +BABEL_OP3_402_91891_20150108_203636_inLine +BABEL_OP3_402_91891_20150108_203636_outLine +BABEL_OP3_402_91977_20141225_143539_inLine +BABEL_OP3_402_91977_20141225_143539_outLine +BABEL_OP3_402_92356_20150109_005846_inLine +BABEL_OP3_402_92356_20150109_005846_outLine +BABEL_OP3_402_92459_20141102_124516_inLine +BABEL_OP3_402_92459_20141102_124516_outLine +BABEL_OP3_402_92557_20150201_205110_inLine +BABEL_OP3_402_92557_20150201_205110_outLine +BABEL_OP3_402_92698_20141115_182138_inLine +BABEL_OP3_402_92698_20141115_182138_outLine +BABEL_OP3_402_93475_20141119_140615_inLine +BABEL_OP3_402_93475_20141119_140615_outLine +BABEL_OP3_402_93490_20150106_174211_inLine +BABEL_OP3_402_93490_20150106_174211_outLine +BABEL_OP3_402_94002_20141216_015659_inLine +BABEL_OP3_402_94002_20141216_015659_outLine +BABEL_OP3_402_94166_20150128_151103_inLine +BABEL_OP3_402_94166_20150128_151103_outLine +BABEL_OP3_402_94409_20141214_185032_inLine +BABEL_OP3_402_94409_20141214_185032_outLine +BABEL_OP3_402_94923_20141201_154601_inLine +BABEL_OP3_402_94923_20141201_154601_outLine +BABEL_OP3_402_96190_20141103_161533_inLine +BABEL_OP3_402_96190_20141103_161533_outLine +BABEL_OP3_402_96205_20141126_152921_inLine +BABEL_OP3_402_96205_20141126_152921_outLine +BABEL_OP3_402_97264_20150131_205411_inLine +BABEL_OP3_402_97264_20150131_205411_outLine +BABEL_OP3_402_97772_20140915_200919_inLine +BABEL_OP3_402_97772_20140915_200919_outLine +BABEL_OP3_402_97896_20141122_161128_inLine +BABEL_OP3_402_97896_20141122_161128_outLine +BABEL_OP3_402_98165_20141106_191239_inLine +BABEL_OP3_402_98165_20141106_191239_outLine +BABEL_OP3_402_98888_20141108_211953_inLine +BABEL_OP3_402_98888_20141108_211953_outLine +BABEL_OP3_402_99202_20141123_162817_inLine +BABEL_OP3_402_99202_20141123_162817_outLine +BABEL_OP3_402_99516_20140917_174712_inLine +BABEL_OP3_402_99516_20140917_174712_outLine +BABEL_OP3_402_99594_20141111_170413_inLine +BABEL_OP3_402_99594_20141111_170413_outLine +BABEL_OP3_402_99887_20150104_230431_inLine +BABEL_OP3_402_99887_20150104_230431_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/training.list b/egs/babel/s5d/conf/lists/402-javanese/training.list new file mode 100644 index 00000000000..ce7313fceeb --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/training.list @@ -0,0 +1,492 @@ +BABEL_OP3_402_10416_20141126_133029_inLine +BABEL_OP3_402_10416_20141126_133029_outLine +BABEL_OP3_402_10901_20141116_141701_inLine +BABEL_OP3_402_10901_20141116_141701_outLine +BABEL_OP3_402_12220_20141106_021950_inLine +BABEL_OP3_402_12220_20141106_021950_outLine +BABEL_OP3_402_12767_20140924_184905_inLine +BABEL_OP3_402_12767_20140924_184905_outLine +BABEL_OP3_402_13030_20141107_173701_inLine +BABEL_OP3_402_13030_20141107_173701_outLine +BABEL_OP3_402_13664_20140911_160207_inLine +BABEL_OP3_402_13664_20140911_160207_outLine +BABEL_OP3_402_13709_20150131_161040_inLine +BABEL_OP3_402_13709_20150131_161040_outLine +BABEL_OP3_402_14141_20150215_162503_inLine +BABEL_OP3_402_14141_20150215_162503_outLine +BABEL_OP3_402_14229_20141108_200257_inLine +BABEL_OP3_402_14229_20141108_200257_outLine +BABEL_OP3_402_14350_20141104_165111_inLine +BABEL_OP3_402_14350_20141104_165111_outLine +BABEL_OP3_402_14807_20141126_174048_inLine +BABEL_OP3_402_14807_20141126_174048_outLine +BABEL_OP3_402_14875_20140929_193054_inLine +BABEL_OP3_402_14875_20140929_193054_outLine +BABEL_OP3_402_14899_20140925_165651_inLine +BABEL_OP3_402_14899_20140925_165651_outLine +BABEL_OP3_402_14929_20141110_005633_inLine +BABEL_OP3_402_14929_20141110_005633_outLine +BABEL_OP3_402_14972_20141123_182012_inLine +BABEL_OP3_402_14972_20141123_182012_outLine +BABEL_OP3_402_15163_20141123_152731_inLine +BABEL_OP3_402_15163_20141123_152731_outLine +BABEL_OP3_402_15262_20140922_152302_inLine +BABEL_OP3_402_15262_20140922_152302_outLine +BABEL_OP3_402_15749_20150105_125933_inLine +BABEL_OP3_402_15749_20150105_125933_outLine +BABEL_OP3_402_16184_20141120_143943_inLine +BABEL_OP3_402_16184_20141120_143943_outLine +BABEL_OP3_402_16749_20150110_182247_inLine +BABEL_OP3_402_16749_20150110_182247_outLine +BABEL_OP3_402_16787_20141107_025835_inLine +BABEL_OP3_402_16787_20141107_025835_outLine +BABEL_OP3_402_17520_20141123_170854_inLine +BABEL_OP3_402_17520_20141123_170854_outLine +BABEL_OP3_402_17890_20150108_163627_inLine +BABEL_OP3_402_17890_20150108_163627_outLine +BABEL_OP3_402_17914_20150107_192833_inLine +BABEL_OP3_402_17914_20150107_192833_outLine +BABEL_OP3_402_18380_20141113_173424_inLine +BABEL_OP3_402_18380_20141113_173424_outLine +BABEL_OP3_402_19134_20141130_163804_inLine +BABEL_OP3_402_19134_20141130_163804_outLine +BABEL_OP3_402_19621_20141123_181810_inLine +BABEL_OP3_402_19621_20141123_181810_outLine +BABEL_OP3_402_19672_20141208_162907_inLine +BABEL_OP3_402_19672_20141208_162907_outLine +BABEL_OP3_402_19703_20141102_190851_inLine +BABEL_OP3_402_19703_20141102_190851_outLine +BABEL_OP3_402_20330_20150131_162055_inLine +BABEL_OP3_402_20330_20150131_162055_outLine +BABEL_OP3_402_20738_20150201_004014_inLine +BABEL_OP3_402_20738_20150201_004014_outLine +BABEL_OP3_402_20768_20150110_125415_inLine +BABEL_OP3_402_20768_20150110_125415_outLine +BABEL_OP3_402_20800_20141013_185736_inLine +BABEL_OP3_402_20800_20141013_185736_outLine +BABEL_OP3_402_20922_20150131_235414_inLine +BABEL_OP3_402_20922_20150131_235414_outLine +BABEL_OP3_402_20985_20141209_223858_inLine +BABEL_OP3_402_20985_20141209_223858_outLine +BABEL_OP3_402_21004_20150108_210410_inLine +BABEL_OP3_402_21004_20150108_210410_outLine +BABEL_OP3_402_21794_20141110_000434_inLine +BABEL_OP3_402_21794_20141110_000434_outLine +BABEL_OP3_402_22170_20150108_185847_inLine +BABEL_OP3_402_22170_20150108_185847_outLine +BABEL_OP3_402_22494_20150127_212514_inLine +BABEL_OP3_402_22494_20150127_212514_outLine +BABEL_OP3_402_23151_20150110_202409_inLine +BABEL_OP3_402_23151_20150110_202409_outLine +BABEL_OP3_402_23731_20141120_162409_inLine +BABEL_OP3_402_23731_20141120_162409_outLine +BABEL_OP3_402_23731_20141120_163618_inLine +BABEL_OP3_402_23731_20141120_163618_outLine +BABEL_OP3_402_24270_20141127_181536_inLine +BABEL_OP3_402_24270_20141127_181536_outLine +BABEL_OP3_402_24323_20141111_182649_inLine +BABEL_OP3_402_24323_20141111_182649_outLine +BABEL_OP3_402_24470_20141205_154028_inLine +BABEL_OP3_402_24470_20141205_154028_outLine +BABEL_OP3_402_24589_20141106_144156_inLine +BABEL_OP3_402_24589_20141106_144156_outLine +BABEL_OP3_402_25412_20141128_212603_inLine +BABEL_OP3_402_25412_20141128_212603_outLine +BABEL_OP3_402_26072_20150131_110154_inLine +BABEL_OP3_402_26072_20150131_110154_outLine +BABEL_OP3_402_26398_20150304_162600_inLine +BABEL_OP3_402_26398_20150304_162600_outLine +BABEL_OP3_402_28303_20141122_153440_inLine +BABEL_OP3_402_28303_20141122_153440_outLine +BABEL_OP3_402_29021_20150131_010036_inLine +BABEL_OP3_402_29021_20150131_010036_outLine +BABEL_OP3_402_29076_20141121_164742_inLine +BABEL_OP3_402_29076_20141121_164742_outLine +BABEL_OP3_402_29168_20140926_164602_inLine +BABEL_OP3_402_29168_20140926_164602_outLine +BABEL_OP3_402_29323_20150108_000937_inLine +BABEL_OP3_402_29323_20150108_000937_outLine +BABEL_OP3_402_30250_20140929_162020_inLine +BABEL_OP3_402_30250_20140929_162020_outLine +BABEL_OP3_402_31184_20141112_204308_inLine +BABEL_OP3_402_31184_20141112_204308_outLine +BABEL_OP3_402_31346_20150106_163812_inLine +BABEL_OP3_402_31346_20150106_163812_outLine +BABEL_OP3_402_31346_20150107_000948_inLine +BABEL_OP3_402_31346_20150107_000948_outLine +BABEL_OP3_402_31624_20141017_204521_inLine +BABEL_OP3_402_31624_20141017_204521_outLine +BABEL_OP3_402_31992_20141104_154739_inLine +BABEL_OP3_402_31992_20141104_154739_outLine +BABEL_OP3_402_32708_20141127_210435_inLine +BABEL_OP3_402_32708_20141127_210435_outLine +BABEL_OP3_402_32832_20150214_160609_inLine +BABEL_OP3_402_32832_20150214_160609_outLine +BABEL_OP3_402_32837_20150114_173357_inLine +BABEL_OP3_402_32837_20150114_173357_outLine +BABEL_OP3_402_33175_20141011_151643_inLine +BABEL_OP3_402_33175_20141011_151643_outLine +BABEL_OP3_402_33355_20141222_030242_inLine +BABEL_OP3_402_33355_20141222_030242_outLine +BABEL_OP3_402_33704_20150108_121853_inLine +BABEL_OP3_402_33704_20150108_121853_outLine +BABEL_OP3_402_33951_20141115_015656_inLine +BABEL_OP3_402_33951_20141115_015656_outLine +BABEL_OP3_402_34336_20141101_214014_inLine +BABEL_OP3_402_34336_20141101_214014_outLine +BABEL_OP3_402_34477_20141103_012729_inLine +BABEL_OP3_402_34477_20141103_012729_outLine +BABEL_OP3_402_34564_20150110_174105_inLine +BABEL_OP3_402_34564_20150110_174105_outLine +BABEL_OP3_402_34679_20141012_230850_inLine +BABEL_OP3_402_34679_20141012_230850_outLine +BABEL_OP3_402_34688_20141027_170150_inLine +BABEL_OP3_402_34688_20141027_170150_outLine +BABEL_OP3_402_35069_20150216_023523_inLine +BABEL_OP3_402_35069_20150216_023523_outLine +BABEL_OP3_402_35583_20150121_013548_inLine +BABEL_OP3_402_35583_20150121_013548_outLine +BABEL_OP3_402_37228_20150120_211131_inLine +BABEL_OP3_402_37228_20150120_211131_outLine +BABEL_OP3_402_37281_20141110_214558_inLine +BABEL_OP3_402_37281_20141110_214558_outLine +BABEL_OP3_402_37682_20141103_210556_inLine +BABEL_OP3_402_37682_20141103_210556_outLine +BABEL_OP3_402_37853_20150107_154609_inLine +BABEL_OP3_402_37853_20150107_154609_outLine +BABEL_OP3_402_38340_20141020_170141_inLine +BABEL_OP3_402_38340_20141020_170141_outLine +BABEL_OP3_402_38431_20150104_193523_inLine +BABEL_OP3_402_38431_20150104_193523_outLine +BABEL_OP3_402_39059_20150201_151819_inLine +BABEL_OP3_402_39059_20150201_151819_outLine +BABEL_OP3_402_39159_20140930_201318_inLine +BABEL_OP3_402_39159_20140930_201318_outLine +BABEL_OP3_402_39426_20150202_103633_inLine +BABEL_OP3_402_39426_20150202_103633_outLine +BABEL_OP3_402_39680_20150131_151358_inLine +BABEL_OP3_402_39680_20150131_151358_outLine +BABEL_OP3_402_39920_20150216_014707_inLine +BABEL_OP3_402_39920_20150216_014707_outLine +BABEL_OP3_402_41109_20150101_021923_inLine +BABEL_OP3_402_41109_20150101_021923_outLine +BABEL_OP3_402_41680_20140911_133458_inLine +BABEL_OP3_402_41680_20140911_133458_outLine +BABEL_OP3_402_43239_20150205_011521_inLine +BABEL_OP3_402_43239_20150205_011521_outLine +BABEL_OP3_402_43368_20141107_210043_inLine +BABEL_OP3_402_43368_20141107_210043_outLine +BABEL_OP3_402_43784_20141027_205748_inLine +BABEL_OP3_402_43784_20141027_205748_outLine +BABEL_OP3_402_43920_20141228_001637_inLine +BABEL_OP3_402_43920_20141228_001637_outLine +BABEL_OP3_402_44255_20150131_183155_inLine +BABEL_OP3_402_44255_20150131_183155_outLine +BABEL_OP3_402_44961_20140921_154533_inLine +BABEL_OP3_402_44961_20140921_154533_outLine +BABEL_OP3_402_45536_20150131_234119_inLine +BABEL_OP3_402_45536_20150131_234119_outLine +BABEL_OP3_402_46688_20140927_210143_inLine +BABEL_OP3_402_46688_20140927_210143_outLine +BABEL_OP3_402_46702_20140929_141902_inLine +BABEL_OP3_402_46702_20140929_141902_outLine +BABEL_OP3_402_46770_20150124_001351_inLine +BABEL_OP3_402_46770_20150124_001351_outLine +BABEL_OP3_402_46881_20141028_192343_inLine +BABEL_OP3_402_46881_20141028_192343_outLine +BABEL_OP3_402_47270_20150128_163211_inLine +BABEL_OP3_402_47270_20150128_163211_outLine +BABEL_OP3_402_48243_20141031_160102_inLine +BABEL_OP3_402_48243_20141031_160102_outLine +BABEL_OP3_402_48422_20150101_193320_inLine +BABEL_OP3_402_48422_20150101_193320_outLine +BABEL_OP3_402_48422_20150101_194803_inLine +BABEL_OP3_402_48422_20150101_194803_outLine +BABEL_OP3_402_48610_20140920_172026_inLine +BABEL_OP3_402_48610_20140920_172026_outLine +BABEL_OP3_402_48789_20141113_181720_inLine +BABEL_OP3_402_48789_20141113_181720_outLine +BABEL_OP3_402_49001_20141010_142908_inLine +BABEL_OP3_402_49001_20141010_142908_outLine +BABEL_OP3_402_49001_20141010_152312_inLine +BABEL_OP3_402_49001_20141010_152312_outLine +BABEL_OP3_402_49197_20141123_183541_inLine +BABEL_OP3_402_49197_20141123_183541_outLine +BABEL_OP3_402_49502_20150201_200343_inLine +BABEL_OP3_402_49502_20150201_200343_outLine +BABEL_OP3_402_49907_20141006_162735_inLine +BABEL_OP3_402_49907_20141006_162735_outLine +BABEL_OP3_402_50601_20141121_182643_inLine +BABEL_OP3_402_50601_20141121_182643_outLine +BABEL_OP3_402_50779_20141124_211935_inLine +BABEL_OP3_402_50779_20141124_211935_outLine +BABEL_OP3_402_50810_20140912_181008_inLine +BABEL_OP3_402_50810_20140912_181008_outLine +BABEL_OP3_402_50962_20141004_143222_inLine +BABEL_OP3_402_50962_20141004_143222_outLine +BABEL_OP3_402_51015_20141209_214156_inLine +BABEL_OP3_402_51015_20141209_214156_outLine +BABEL_OP3_402_51540_20150131_203108_inLine +BABEL_OP3_402_51540_20150131_203108_outLine +BABEL_OP3_402_51611_20141010_163542_inLine +BABEL_OP3_402_51611_20141010_163542_outLine +BABEL_OP3_402_51968_20141109_154701_inLine +BABEL_OP3_402_51968_20141109_154701_outLine +BABEL_OP3_402_52246_20141115_174547_inLine +BABEL_OP3_402_52246_20141115_174547_outLine +BABEL_OP3_402_52422_20150128_142229_inLine +BABEL_OP3_402_52422_20150128_142229_outLine +BABEL_OP3_402_52854_20140910_200850_inLine +BABEL_OP3_402_52854_20140910_200850_outLine +BABEL_OP3_402_52932_20141007_182635_inLine +BABEL_OP3_402_52932_20141007_182635_outLine +BABEL_OP3_402_54074_20141110_001507_inLine +BABEL_OP3_402_54074_20141110_001507_outLine +BABEL_OP3_402_54104_20141104_173741_inLine +BABEL_OP3_402_54104_20141104_173741_outLine +BABEL_OP3_402_54405_20141123_173044_inLine +BABEL_OP3_402_54405_20141123_173044_outLine +BABEL_OP3_402_55267_20141221_184118_inLine +BABEL_OP3_402_55267_20141221_184118_outLine +BABEL_OP3_402_56198_20141103_152946_inLine +BABEL_OP3_402_56198_20141103_152946_outLine +BABEL_OP3_402_56720_20141228_190653_inLine +BABEL_OP3_402_56720_20141228_190653_outLine +BABEL_OP3_402_57065_20141213_175712_inLine +BABEL_OP3_402_57065_20141213_175712_outLine +BABEL_OP3_402_57650_20150107_171335_inLine +BABEL_OP3_402_57650_20150107_171335_outLine +BABEL_OP3_402_57654_20141031_172711_inLine +BABEL_OP3_402_57654_20141031_172711_outLine +BABEL_OP3_402_57922_20141130_172609_inLine +BABEL_OP3_402_57922_20141130_172609_outLine +BABEL_OP3_402_58313_20141121_191107_inLine +BABEL_OP3_402_58313_20141121_191107_outLine +BABEL_OP3_402_58489_20150110_155118_inLine +BABEL_OP3_402_58489_20150110_155118_outLine +BABEL_OP3_402_58850_20141115_223848_inLine +BABEL_OP3_402_58850_20141115_223848_outLine +BABEL_OP3_402_59078_20141127_201549_inLine +BABEL_OP3_402_59078_20141127_201549_outLine +BABEL_OP3_402_59402_20150103_181612_inLine +BABEL_OP3_402_59402_20150103_181612_outLine +BABEL_OP3_402_60418_20141219_231820_inLine +BABEL_OP3_402_60418_20141219_231820_outLine +BABEL_OP3_402_60474_20141101_195523_inLine +BABEL_OP3_402_60474_20141101_195523_outLine +BABEL_OP3_402_61167_20141106_195710_inLine +BABEL_OP3_402_61167_20141106_195710_outLine +BABEL_OP3_402_61219_20141101_192955_inLine +BABEL_OP3_402_61219_20141101_192955_outLine +BABEL_OP3_402_61888_20150108_210230_inLine +BABEL_OP3_402_61888_20150108_210230_outLine +BABEL_OP3_402_62456_20141203_005134_inLine +BABEL_OP3_402_62456_20141203_005134_outLine +BABEL_OP3_402_62800_20141028_170241_inLine +BABEL_OP3_402_62800_20141028_170241_outLine +BABEL_OP3_402_62810_20140917_184635_inLine +BABEL_OP3_402_62810_20140917_184635_outLine +BABEL_OP3_402_63081_20141003_151638_inLine +BABEL_OP3_402_63081_20141003_151638_outLine +BABEL_OP3_402_64014_20150108_162849_inLine +BABEL_OP3_402_64014_20150108_162849_outLine +BABEL_OP3_402_64065_20141020_152452_inLine +BABEL_OP3_402_64065_20141020_152452_outLine +BABEL_OP3_402_64768_20141116_180927_inLine +BABEL_OP3_402_64768_20141116_180927_outLine +BABEL_OP3_402_64796_20141122_163640_inLine +BABEL_OP3_402_64796_20141122_163640_outLine +BABEL_OP3_402_64870_20141228_184201_inLine +BABEL_OP3_402_64870_20141228_184201_outLine +BABEL_OP3_402_65064_20141125_162638_inLine +BABEL_OP3_402_65064_20141125_162638_outLine +BABEL_OP3_402_65298_20150130_232120_inLine +BABEL_OP3_402_65298_20150130_232120_outLine +BABEL_OP3_402_65367_20150103_224736_inLine +BABEL_OP3_402_65367_20150103_224736_outLine +BABEL_OP3_402_65692_20141228_202914_inLine +BABEL_OP3_402_65692_20141228_202914_outLine +BABEL_OP3_402_65723_20141022_231832_inLine +BABEL_OP3_402_65723_20141022_231832_outLine +BABEL_OP3_402_66001_20140921_123931_inLine +BABEL_OP3_402_66001_20140921_123931_outLine +BABEL_OP3_402_66045_20141115_162944_inLine +BABEL_OP3_402_66045_20141115_162944_outLine +BABEL_OP3_402_66177_20150131_201057_inLine +BABEL_OP3_402_66177_20150131_201057_outLine +BABEL_OP3_402_67152_20150107_163104_inLine +BABEL_OP3_402_67152_20150107_163104_outLine +BABEL_OP3_402_67373_20141014_152719_inLine +BABEL_OP3_402_67373_20141014_152719_outLine +BABEL_OP3_402_68627_20141107_033600_inLine +BABEL_OP3_402_68627_20141107_033600_outLine +BABEL_OP3_402_69107_20141123_145802_inLine +BABEL_OP3_402_69107_20141123_145802_outLine +BABEL_OP3_402_69574_20140915_170204_inLine +BABEL_OP3_402_69574_20140915_170204_outLine +BABEL_OP3_402_70221_20141222_002645_inLine +BABEL_OP3_402_70221_20141222_002645_outLine +BABEL_OP3_402_70282_20141128_162640_inLine +BABEL_OP3_402_70282_20141128_162640_outLine +BABEL_OP3_402_70601_20141104_190522_inLine +BABEL_OP3_402_70601_20141104_190522_outLine +BABEL_OP3_402_70794_20141122_201302_inLine +BABEL_OP3_402_70794_20141122_201302_outLine +BABEL_OP3_402_71566_20150109_002519_inLine +BABEL_OP3_402_71566_20150109_002519_outLine +BABEL_OP3_402_71704_20141030_192615_inLine +BABEL_OP3_402_71704_20141030_192615_outLine +BABEL_OP3_402_72844_20150216_194719_inLine +BABEL_OP3_402_72844_20150216_194719_outLine +BABEL_OP3_402_73022_20150103_135209_inLine +BABEL_OP3_402_73022_20150103_135209_outLine +BABEL_OP3_402_73119_20141031_182314_inLine +BABEL_OP3_402_73119_20141031_182314_outLine +BABEL_OP3_402_73301_20141117_004450_inLine +BABEL_OP3_402_73301_20141117_004450_outLine +BABEL_OP3_402_73757_20141115_190524_inLine +BABEL_OP3_402_73757_20141115_190524_outLine +BABEL_OP3_402_74111_20150102_112305_inLine +BABEL_OP3_402_74111_20150102_112305_outLine +BABEL_OP3_402_74455_20150201_180158_inLine +BABEL_OP3_402_74455_20150201_180158_outLine +BABEL_OP3_402_74799_20141129_202734_inLine +BABEL_OP3_402_74799_20141129_202734_outLine +BABEL_OP3_402_75764_20150202_000719_inLine +BABEL_OP3_402_75764_20150202_000719_outLine +BABEL_OP3_402_75993_20141021_183118_inLine +BABEL_OP3_402_75993_20141021_183118_outLine +BABEL_OP3_402_76444_20141227_143452_inLine +BABEL_OP3_402_76444_20141227_143452_outLine +BABEL_OP3_402_76683_20141128_201732_inLine +BABEL_OP3_402_76683_20141128_201732_outLine +BABEL_OP3_402_78116_20141229_210212_inLine +BABEL_OP3_402_78116_20141229_210212_outLine +BABEL_OP3_402_78254_20141101_235022_inLine +BABEL_OP3_402_78254_20141101_235022_outLine +BABEL_OP3_402_78360_20150131_163647_inLine +BABEL_OP3_402_78360_20150131_163647_outLine +BABEL_OP3_402_78630_20140930_135924_inLine +BABEL_OP3_402_78630_20140930_135924_outLine +BABEL_OP3_402_79139_20141115_153558_inLine +BABEL_OP3_402_79139_20141115_153558_outLine +BABEL_OP3_402_79751_20141104_200346_inLine +BABEL_OP3_402_79751_20141104_200346_outLine +BABEL_OP3_402_79751_20141104_201600_inLine +BABEL_OP3_402_79751_20141104_201600_outLine +BABEL_OP3_402_80439_20141104_195124_inLine +BABEL_OP3_402_80439_20141104_195124_outLine +BABEL_OP3_402_81229_20141116_224932_inLine +BABEL_OP3_402_81229_20141116_224932_outLine +BABEL_OP3_402_81427_20141110_165047_inLine +BABEL_OP3_402_81427_20141110_165047_outLine +BABEL_OP3_402_82089_20141113_162038_inLine +BABEL_OP3_402_82089_20141113_162038_outLine +BABEL_OP3_402_82224_20150101_162311_inLine +BABEL_OP3_402_82224_20150101_162311_outLine +BABEL_OP3_402_82637_20141006_173314_inLine +BABEL_OP3_402_82637_20141006_173314_outLine +BABEL_OP3_402_83238_20141122_140740_inLine +BABEL_OP3_402_83238_20141122_140740_outLine +BABEL_OP3_402_83436_20141017_162042_inLine +BABEL_OP3_402_83436_20141017_162042_outLine +BABEL_OP3_402_83651_20141009_145412_inLine +BABEL_OP3_402_83651_20141009_145412_outLine +BABEL_OP3_402_84061_20141107_162356_inLine +BABEL_OP3_402_84061_20141107_162356_outLine +BABEL_OP3_402_84611_20141023_205020_inLine +BABEL_OP3_402_84611_20141023_205020_outLine +BABEL_OP3_402_84737_20150129_233418_inLine +BABEL_OP3_402_84737_20150129_233418_outLine +BABEL_OP3_402_84815_20141225_185456_inLine +BABEL_OP3_402_84815_20141225_185456_outLine +BABEL_OP3_402_85048_20141204_194855_inLine +BABEL_OP3_402_85048_20141204_194855_outLine +BABEL_OP3_402_85248_20150109_001722_inLine +BABEL_OP3_402_85248_20150109_001722_outLine +BABEL_OP3_402_85340_20141021_182050_inLine +BABEL_OP3_402_85340_20141021_182050_outLine +BABEL_OP3_402_86191_20141105_130254_inLine +BABEL_OP3_402_86191_20141105_130254_outLine +BABEL_OP3_402_86713_20150101_014831_inLine +BABEL_OP3_402_86713_20150101_014831_outLine +BABEL_OP3_402_86722_20141101_204411_inLine +BABEL_OP3_402_86722_20141101_204411_outLine +BABEL_OP3_402_86952_20141105_144737_inLine +BABEL_OP3_402_86952_20141105_144737_outLine +BABEL_OP3_402_87073_20140915_154336_inLine +BABEL_OP3_402_87073_20140915_154336_outLine +BABEL_OP3_402_87179_20150203_020351_inLine +BABEL_OP3_402_87179_20150203_020351_outLine +BABEL_OP3_402_87871_20141224_130949_inLine +BABEL_OP3_402_87871_20141224_130949_outLine +BABEL_OP3_402_88601_20141209_160621_inLine +BABEL_OP3_402_88601_20141209_160621_outLine +BABEL_OP3_402_88776_20140921_133554_inLine +BABEL_OP3_402_88776_20140921_133554_outLine +BABEL_OP3_402_88873_20140930_131622_inLine +BABEL_OP3_402_88873_20140930_131622_outLine +BABEL_OP3_402_89794_20141213_211839_inLine +BABEL_OP3_402_89794_20141213_211839_outLine +BABEL_OP3_402_89877_20150107_013739_inLine +BABEL_OP3_402_89877_20150107_013739_outLine +BABEL_OP3_402_89877_20150107_014426_inLine +BABEL_OP3_402_89877_20150107_014426_outLine +BABEL_OP3_402_90777_20141106_234557_inLine +BABEL_OP3_402_90777_20141106_234557_outLine +BABEL_OP3_402_91884_20150302_183207_inLine +BABEL_OP3_402_91884_20150302_183207_outLine +BABEL_OP3_402_91891_20150108_203636_inLine +BABEL_OP3_402_91891_20150108_203636_outLine +BABEL_OP3_402_91977_20141225_143539_inLine +BABEL_OP3_402_91977_20141225_143539_outLine +BABEL_OP3_402_92356_20150109_005846_inLine +BABEL_OP3_402_92356_20150109_005846_outLine +BABEL_OP3_402_92459_20141102_124516_inLine +BABEL_OP3_402_92459_20141102_124516_outLine +BABEL_OP3_402_92557_20150201_205110_inLine +BABEL_OP3_402_92557_20150201_205110_outLine +BABEL_OP3_402_92698_20141115_182138_inLine +BABEL_OP3_402_92698_20141115_182138_outLine +BABEL_OP3_402_93475_20141119_140615_inLine +BABEL_OP3_402_93475_20141119_140615_outLine +BABEL_OP3_402_93490_20150106_174211_inLine +BABEL_OP3_402_93490_20150106_174211_outLine +BABEL_OP3_402_93604_20150304_152208_inLine +BABEL_OP3_402_93604_20150304_152208_outLine +BABEL_OP3_402_93964_20141216_021155_inLine +BABEL_OP3_402_93964_20141216_021155_outLine +BABEL_OP3_402_94002_20141216_015659_inLine +BABEL_OP3_402_94002_20141216_015659_outLine +BABEL_OP3_402_94166_20150128_151103_inLine +BABEL_OP3_402_94166_20150128_151103_outLine +BABEL_OP3_402_94409_20141214_185032_inLine +BABEL_OP3_402_94409_20141214_185032_outLine +BABEL_OP3_402_94869_20140912_195117_inLine +BABEL_OP3_402_94869_20140912_195117_outLine +BABEL_OP3_402_94923_20141201_154601_inLine +BABEL_OP3_402_94923_20141201_154601_outLine +BABEL_OP3_402_95446_20150110_150658_inLine +BABEL_OP3_402_95446_20150110_150658_outLine +BABEL_OP3_402_96190_20141103_161533_inLine +BABEL_OP3_402_96190_20141103_161533_outLine +BABEL_OP3_402_96205_20141126_152921_inLine +BABEL_OP3_402_96205_20141126_152921_outLine +BABEL_OP3_402_97264_20150131_205411_inLine +BABEL_OP3_402_97264_20150131_205411_outLine +BABEL_OP3_402_97772_20140915_200919_inLine +BABEL_OP3_402_97772_20140915_200919_outLine +BABEL_OP3_402_97896_20141122_161128_inLine +BABEL_OP3_402_97896_20141122_161128_outLine +BABEL_OP3_402_98165_20141106_191239_inLine +BABEL_OP3_402_98165_20141106_191239_outLine +BABEL_OP3_402_98888_20141108_211953_inLine +BABEL_OP3_402_98888_20141108_211953_outLine +BABEL_OP3_402_99202_20141123_162817_inLine +BABEL_OP3_402_99202_20141123_162817_outLine +BABEL_OP3_402_99516_20140917_174712_inLine +BABEL_OP3_402_99516_20140917_174712_outLine +BABEL_OP3_402_99594_20141111_170413_inLine +BABEL_OP3_402_99594_20141111_170413_outLine +BABEL_OP3_402_99887_20150104_230431_inLine +BABEL_OP3_402_99887_20150104_230431_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list b/egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list new file mode 100644 index 00000000000..f37a27dda8a --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list @@ -0,0 +1,519 @@ +BABEL_OP3_402_10188_20140910_192244_inLine +BABEL_OP3_402_10411_20150414_130427_inLine +BABEL_OP3_402_10411_20150414_130427_outLine +BABEL_OP3_402_11352_20150313_163143_inLine +BABEL_OP3_402_11352_20150313_163143_outLine +BABEL_OP3_402_11797_20141023_002654_inLine +BABEL_OP3_402_11859_20150414_112255_inLine +BABEL_OP3_402_11859_20150414_112255_outLine +BABEL_OP3_402_12846_20150402_131845_inLine +BABEL_OP3_402_12846_20150402_131845_outLine +BABEL_OP3_402_13189_20150131_234926_inLine +BABEL_OP3_402_13189_20150131_234926_outLine +BABEL_OP3_402_13427_20141119_154114_outLine +BABEL_OP3_402_13483_20150212_013903_inLine +BABEL_OP3_402_13561_20141117_221410_inLine +BABEL_OP3_402_13561_20141117_221410_outLine +BABEL_OP3_402_13776_20150415_193538_inLine +BABEL_OP3_402_13776_20150415_193538_outLine +BABEL_OP3_402_13909_20150330_234000_inLine +BABEL_OP3_402_13909_20150330_234000_outLine +BABEL_OP3_402_14537_20150403_143039_inLine +BABEL_OP3_402_14537_20150403_143039_outLine +BABEL_OP3_402_14539_20150313_172051_inLine +BABEL_OP3_402_14539_20150313_172051_outLine +BABEL_OP3_402_15042_20150315_155219_inLine +BABEL_OP3_402_15042_20150315_155219_outLine +BABEL_OP3_402_15926_20141206_172139_inLine +BABEL_OP3_402_15926_20141206_172139_outLine +BABEL_OP3_402_17032_20141227_164236_inLine +BABEL_OP3_402_17032_20141227_164236_outLine +BABEL_OP3_402_17280_20141104_204141_inLine +BABEL_OP3_402_17440_20141226_140345_inLine +BABEL_OP3_402_17440_20141226_140345_outLine +BABEL_OP3_402_17615_20141214_223314_inLine +BABEL_OP3_402_17615_20141214_223314_outLine +BABEL_OP3_402_17615_20141214_231451_inLine +BABEL_OP3_402_17615_20141214_231451_outLine +BABEL_OP3_402_17923_20141107_150744_inLine +BABEL_OP3_402_18291_20150414_140952_inLine +BABEL_OP3_402_18291_20150414_140952_outLine +BABEL_OP3_402_18759_20150310_160837_inLine +BABEL_OP3_402_18759_20150310_160837_outLine +BABEL_OP3_402_19120_20150310_162359_inLine +BABEL_OP3_402_19120_20150310_162359_outLine +BABEL_OP3_402_19589_20150305_151023_inLine +BABEL_OP3_402_19589_20150305_151023_outLine +BABEL_OP3_402_19722_20141017_143042_inLine +BABEL_OP3_402_19722_20141017_143042_outLine +BABEL_OP3_402_19767_20150308_132354_inLine +BABEL_OP3_402_19767_20150308_132354_outLine +BABEL_OP3_402_19767_20150308_133241_inLine +BABEL_OP3_402_19767_20150308_133241_outLine +BABEL_OP3_402_19877_20150316_132035_inLine +BABEL_OP3_402_19877_20150316_132035_outLine +BABEL_OP3_402_19877_20150317_111220_inLine +BABEL_OP3_402_19877_20150317_111220_outLine +BABEL_OP3_402_21029_20141015_150255_inLine +BABEL_OP3_402_21029_20141015_150255_outLine +BABEL_OP3_402_21244_20150415_115154_inLine +BABEL_OP3_402_21244_20150415_115154_outLine +BABEL_OP3_402_22216_20141027_200224_inLine +BABEL_OP3_402_22612_20150106_171601_inLine +BABEL_OP3_402_22612_20150106_171601_outLine +BABEL_OP3_402_22641_20140919_145256_inLine +BABEL_OP3_402_22641_20140919_145256_outLine +BABEL_OP3_402_22826_20150316_130545_inLine +BABEL_OP3_402_22826_20150316_130545_outLine +BABEL_OP3_402_22965_20141011_161936_inLine +BABEL_OP3_402_23006_20141031_183901_inLine +BABEL_OP3_402_23006_20141031_185939_inLine +BABEL_OP3_402_23190_20141124_020320_inLine +BABEL_OP3_402_23980_20141115_204429_inLine +BABEL_OP3_402_23980_20141115_204429_outLine +BABEL_OP3_402_24231_20150302_210042_inLine +BABEL_OP3_402_24231_20150302_210042_outLine +BABEL_OP3_402_24569_20141226_184242_inLine +BABEL_OP3_402_24569_20141226_184242_outLine +BABEL_OP3_402_24586_20150315_000448_inLine +BABEL_OP3_402_24586_20150315_000448_outLine +BABEL_OP3_402_25961_20140916_191139_inLine +BABEL_OP3_402_25961_20140916_191139_outLine +BABEL_OP3_402_26574_20150105_134517_inLine +BABEL_OP3_402_26836_20141007_171841_inLine +BABEL_OP3_402_26836_20141007_171841_outLine +BABEL_OP3_402_27082_20141116_214244_inLine +BABEL_OP3_402_27082_20141116_214244_outLine +BABEL_OP3_402_27203_20150131_212241_inLine +BABEL_OP3_402_27478_20150310_153450_inLine +BABEL_OP3_402_27478_20150310_153450_outLine +BABEL_OP3_402_27478_20150310_154447_inLine +BABEL_OP3_402_27478_20150310_154447_outLine +BABEL_OP3_402_27490_20150312_182925_inLine +BABEL_OP3_402_27490_20150312_182925_outLine +BABEL_OP3_402_28422_20141206_152718_inLine +BABEL_OP3_402_28422_20141206_152718_outLine +BABEL_OP3_402_28477_20150108_171338_inLine +BABEL_OP3_402_28477_20150108_171338_outLine +BABEL_OP3_402_28522_20141208_162113_inLine +BABEL_OP3_402_28522_20141208_162113_outLine +BABEL_OP3_402_28585_20150103_145903_inLine +BABEL_OP3_402_28585_20150103_145903_outLine +BABEL_OP3_402_28775_20141028_172233_inLine +BABEL_OP3_402_28775_20141028_172233_outLine +BABEL_OP3_402_29039_20141226_192341_inLine +BABEL_OP3_402_29039_20141226_192341_outLine +BABEL_OP3_402_29352_20150416_231130_inLine +BABEL_OP3_402_29352_20150416_231130_outLine +BABEL_OP3_402_29416_20150111_173045_inLine +BABEL_OP3_402_29416_20150111_173045_outLine +BABEL_OP3_402_29439_20150309_224912_inLine +BABEL_OP3_402_29439_20150309_224912_outLine +BABEL_OP3_402_30058_20150214_030757_inLine +BABEL_OP3_402_30058_20150214_030757_outLine +BABEL_OP3_402_30084_20150330_183021_inLine +BABEL_OP3_402_30084_20150330_183021_outLine +BABEL_OP3_402_30180_20141122_150213_inLine +BABEL_OP3_402_30497_20150315_020936_inLine +BABEL_OP3_402_30497_20150315_020936_outLine +BABEL_OP3_402_31484_20150108_183810_inLine +BABEL_OP3_402_31484_20150108_183810_outLine +BABEL_OP3_402_32122_20141113_151807_inLine +BABEL_OP3_402_32122_20141113_151807_outLine +BABEL_OP3_402_32861_20150131_115104_inLine +BABEL_OP3_402_32861_20150131_115104_outLine +BABEL_OP3_402_33216_20150417_123910_inLine +BABEL_OP3_402_33216_20150417_123910_outLine +BABEL_OP3_402_33424_20150330_235558_inLine +BABEL_OP3_402_33424_20150330_235558_outLine +BABEL_OP3_402_33424_20150331_001041_inLine +BABEL_OP3_402_33424_20150331_001041_outLine +BABEL_OP3_402_33476_20141115_210108_inLine +BABEL_OP3_402_33476_20141115_210108_outLine +BABEL_OP3_402_34486_20150412_192213_inLine +BABEL_OP3_402_34486_20150412_192213_outLine +BABEL_OP3_402_34826_20150112_001042_inLine +BABEL_OP3_402_34826_20150112_001042_outLine +BABEL_OP3_402_35467_20141028_184845_inLine +BABEL_OP3_402_35467_20141028_184845_outLine +BABEL_OP3_402_35885_20150323_112854_inLine +BABEL_OP3_402_35885_20150323_112854_outLine +BABEL_OP3_402_35885_20150323_114745_inLine +BABEL_OP3_402_35885_20150323_114745_outLine +BABEL_OP3_402_36017_20150314_194053_inLine +BABEL_OP3_402_36017_20150314_194053_outLine +BABEL_OP3_402_36039_20150216_122319_inLine +BABEL_OP3_402_36039_20150216_122319_outLine +BABEL_OP3_402_36059_20150314_212955_inLine +BABEL_OP3_402_36059_20150314_212955_outLine +BABEL_OP3_402_36594_20150215_003738_inLine +BABEL_OP3_402_36594_20150215_003738_outLine +BABEL_OP3_402_36642_20150319_114619_inLine +BABEL_OP3_402_36642_20150319_114619_outLine +BABEL_OP3_402_38689_20141217_212559_inLine +BABEL_OP3_402_38689_20141217_212559_outLine +BABEL_OP3_402_38979_20150215_215359_inLine +BABEL_OP3_402_38979_20150215_215359_outLine +BABEL_OP3_402_41174_20141115_214537_inLine +BABEL_OP3_402_41174_20141115_214537_outLine +BABEL_OP3_402_41272_20150314_043848_inLine +BABEL_OP3_402_41272_20150314_043848_outLine +BABEL_OP3_402_41469_20140919_165056_inLine +BABEL_OP3_402_41469_20140919_165056_outLine +BABEL_OP3_402_41542_20150131_173858_inLine +BABEL_OP3_402_41890_20150304_001538_inLine +BABEL_OP3_402_41890_20150304_001538_outLine +BABEL_OP3_402_42155_20141204_014246_inLine +BABEL_OP3_402_42155_20141204_014246_outLine +BABEL_OP3_402_42231_20150108_013906_inLine +BABEL_OP3_402_42231_20150108_013906_outLine +BABEL_OP3_402_42299_20150415_172418_inLine +BABEL_OP3_402_42299_20150415_172418_outLine +BABEL_OP3_402_42497_20141011_142043_inLine +BABEL_OP3_402_42771_20141125_154524_inLine +BABEL_OP3_402_42771_20141125_154524_outLine +BABEL_OP3_402_43285_20141208_172008_inLine +BABEL_OP3_402_43285_20141208_172008_outLine +BABEL_OP3_402_43388_20141124_193332_inLine +BABEL_OP3_402_43388_20141124_193332_outLine +BABEL_OP3_402_44114_20150416_181819_inLine +BABEL_OP3_402_44114_20150416_181819_outLine +BABEL_OP3_402_44114_20150416_183630_inLine +BABEL_OP3_402_44114_20150416_183630_outLine +BABEL_OP3_402_45140_20150314_190952_inLine +BABEL_OP3_402_45140_20150314_190952_outLine +BABEL_OP3_402_45235_20150201_004752_inLine +BABEL_OP3_402_45235_20150201_004752_outLine +BABEL_OP3_402_45771_20150412_195546_inLine +BABEL_OP3_402_45771_20150412_195546_outLine +BABEL_OP3_402_45777_20141106_211401_inLine +BABEL_OP3_402_45777_20141106_211401_outLine +BABEL_OP3_402_45851_20150315_161428_inLine +BABEL_OP3_402_45851_20150315_161428_outLine +BABEL_OP3_402_45851_20150315_162642_inLine +BABEL_OP3_402_45851_20150315_162642_outLine +BABEL_OP3_402_46066_20150103_140632_inLine +BABEL_OP3_402_46066_20150103_140632_outLine +BABEL_OP3_402_46169_20141229_163719_inLine +BABEL_OP3_402_46169_20141229_163719_outLine +BABEL_OP3_402_46315_20141229_191221_inLine +BABEL_OP3_402_46315_20141229_191221_outLine +BABEL_OP3_402_46330_20150112_002124_inLine +BABEL_OP3_402_46330_20150112_002124_outLine +BABEL_OP3_402_46589_20141217_181108_inLine +BABEL_OP3_402_46589_20141217_181108_outLine +BABEL_OP3_402_47215_20141013_000842_inLine +BABEL_OP3_402_47487_20141110_190705_inLine +BABEL_OP3_402_47487_20141110_190705_outLine +BABEL_OP3_402_47802_20141201_001110_inLine +BABEL_OP3_402_47802_20141201_001110_outLine +BABEL_OP3_402_47878_20141124_200607_inLine +BABEL_OP3_402_47878_20141124_200607_outLine +BABEL_OP3_402_48016_20150417_192509_inLine +BABEL_OP3_402_48016_20150417_192509_outLine +BABEL_OP3_402_48758_20150313_180048_inLine +BABEL_OP3_402_48758_20150313_180048_outLine +BABEL_OP3_402_48907_20150308_125109_inLine +BABEL_OP3_402_48907_20150308_125109_outLine +BABEL_OP3_402_49216_20141102_152914_inLine +BABEL_OP3_402_49216_20141102_152914_outLine +BABEL_OP3_402_49767_20150416_181833_inLine +BABEL_OP3_402_49767_20150416_181833_outLine +BABEL_OP3_402_49945_20150326_155920_inLine +BABEL_OP3_402_49945_20150326_155920_outLine +BABEL_OP3_402_50745_20150314_184529_inLine +BABEL_OP3_402_50745_20150314_184529_outLine +BABEL_OP3_402_51417_20150106_233844_outLine +BABEL_OP3_402_51819_20150108_181637_inLine +BABEL_OP3_402_51819_20150108_181637_outLine +BABEL_OP3_402_52381_20150117_151106_inLine +BABEL_OP3_402_52447_20150331_155658_inLine +BABEL_OP3_402_52447_20150331_155658_outLine +BABEL_OP3_402_52614_20150305_202702_inLine +BABEL_OP3_402_52614_20150305_202702_outLine +BABEL_OP3_402_53665_20150305_203655_inLine +BABEL_OP3_402_53665_20150305_203655_outLine +BABEL_OP3_402_54390_20141022_220633_inLine +BABEL_OP3_402_54477_20141224_161244_inLine +BABEL_OP3_402_54567_20141116_185111_inLine +BABEL_OP3_402_54567_20141116_185111_outLine +BABEL_OP3_402_54827_20150316_003319_inLine +BABEL_OP3_402_54827_20150316_003319_outLine +BABEL_OP3_402_54827_20150316_134423_inLine +BABEL_OP3_402_54827_20150316_134423_outLine +BABEL_OP3_402_55013_20150305_194735_inLine +BABEL_OP3_402_55013_20150305_194735_outLine +BABEL_OP3_402_55106_20150203_162853_inLine +BABEL_OP3_402_55106_20150203_162853_outLine +BABEL_OP3_402_55259_20141105_180934_inLine +BABEL_OP3_402_55259_20141105_180934_outLine +BABEL_OP3_402_55349_20150310_153012_inLine +BABEL_OP3_402_55349_20150310_153012_outLine +BABEL_OP3_402_55381_20150204_201519_inLine +BABEL_OP3_402_55381_20150204_203110_inLine +BABEL_OP3_402_56076_20150306_190854_inLine +BABEL_OP3_402_56076_20150306_190854_outLine +BABEL_OP3_402_56307_20141206_163118_inLine +BABEL_OP3_402_56307_20141206_163118_outLine +BABEL_OP3_402_56523_20141119_203619_inLine +BABEL_OP3_402_56523_20141119_203619_outLine +BABEL_OP3_402_57067_20150103_190024_inLine +BABEL_OP3_402_57067_20150103_190024_outLine +BABEL_OP3_402_57464_20150314_153140_inLine +BABEL_OP3_402_57464_20150314_153140_outLine +BABEL_OP3_402_57548_20141121_144924_inLine +BABEL_OP3_402_57548_20141121_144924_outLine +BABEL_OP3_402_57566_20150215_212628_inLine +BABEL_OP3_402_57566_20150215_212628_outLine +BABEL_OP3_402_58047_20141120_201112_inLine +BABEL_OP3_402_58047_20141120_201112_outLine +BABEL_OP3_402_58107_20141228_011533_inLine +BABEL_OP3_402_58107_20141228_014953_inLine +BABEL_OP3_402_58145_20141207_150852_inLine +BABEL_OP3_402_58145_20141207_150852_outLine +BABEL_OP3_402_58585_20150131_223219_inLine +BABEL_OP3_402_58585_20150131_223219_outLine +BABEL_OP3_402_58717_20150204_003429_inLine +BABEL_OP3_402_58717_20150204_003429_outLine +BABEL_OP3_402_58815_20150106_203552_inLine +BABEL_OP3_402_58815_20150106_203552_outLine +BABEL_OP3_402_58821_20150112_000647_inLine +BABEL_OP3_402_58821_20150112_000647_outLine +BABEL_OP3_402_59028_20150331_004006_inLine +BABEL_OP3_402_59028_20150331_004006_outLine +BABEL_OP3_402_59291_20150114_175706_inLine +BABEL_OP3_402_59291_20150114_175706_outLine +BABEL_OP3_402_59635_20150101_154832_inLine +BABEL_OP3_402_59635_20150101_154832_outLine +BABEL_OP3_402_59898_20141103_202730_outLine +BABEL_OP3_402_60115_20141206_190510_inLine +BABEL_OP3_402_60115_20141206_190510_outLine +BABEL_OP3_402_60299_20150413_191144_inLine +BABEL_OP3_402_60299_20150413_191144_outLine +BABEL_OP3_402_60310_20141224_122329_inLine +BABEL_OP3_402_60310_20141224_122329_outLine +BABEL_OP3_402_60436_20150305_163917_inLine +BABEL_OP3_402_60436_20150305_163917_outLine +BABEL_OP3_402_60498_20150402_122035_inLine +BABEL_OP3_402_60498_20150402_122035_outLine +BABEL_OP3_402_61348_20141112_165406_outLine +BABEL_OP3_402_61348_20141116_174305_outLine +BABEL_OP3_402_61348_20141116_175022_outLine +BABEL_OP3_402_61678_20141126_211128_inLine +BABEL_OP3_402_61963_20150201_203302_inLine +BABEL_OP3_402_61963_20150201_203302_outLine +BABEL_OP3_402_61971_20150311_025217_inLine +BABEL_OP3_402_61971_20150311_025217_outLine +BABEL_OP3_402_61971_20150311_032439_inLine +BABEL_OP3_402_61971_20150311_032439_outLine +BABEL_OP3_402_62362_20150415_200843_inLine +BABEL_OP3_402_62362_20150415_200843_outLine +BABEL_OP3_402_62724_20141225_182011_inLine +BABEL_OP3_402_62724_20141225_182011_outLine +BABEL_OP3_402_62734_20141110_172820_inLine +BABEL_OP3_402_62734_20141110_172820_outLine +BABEL_OP3_402_63307_20141122_114633_outLine +BABEL_OP3_402_63445_20140925_160334_inLine +BABEL_OP3_402_63445_20140925_160334_outLine +BABEL_OP3_402_63523_20150409_150241_inLine +BABEL_OP3_402_63523_20150409_150241_outLine +BABEL_OP3_402_63648_20150317_142220_inLine +BABEL_OP3_402_63648_20150317_142220_outLine +BABEL_OP3_402_63648_20150317_143418_inLine +BABEL_OP3_402_63648_20150317_143418_outLine +BABEL_OP3_402_63757_20141127_190053_inLine +BABEL_OP3_402_63938_20150216_054808_inLine +BABEL_OP3_402_63938_20150216_054808_outLine +BABEL_OP3_402_63999_20150329_140522_inLine +BABEL_OP3_402_63999_20150329_140522_outLine +BABEL_OP3_402_63999_20150329_144023_inLine +BABEL_OP3_402_63999_20150329_144023_outLine +BABEL_OP3_402_64722_20150409_172232_inLine +BABEL_OP3_402_64722_20150409_172232_outLine +BABEL_OP3_402_65077_20140915_211109_inLine +BABEL_OP3_402_65561_20150108_014921_inLine +BABEL_OP3_402_65561_20150108_014921_outLine +BABEL_OP3_402_65640_20150313_234015_inLine +BABEL_OP3_402_65640_20150313_234015_outLine +BABEL_OP3_402_66305_20150314_195357_inLine +BABEL_OP3_402_66305_20150314_195357_outLine +BABEL_OP3_402_66971_20150327_134302_inLine +BABEL_OP3_402_66971_20150327_134302_outLine +BABEL_OP3_402_67085_20150307_155234_inLine +BABEL_OP3_402_67085_20150307_155234_outLine +BABEL_OP3_402_67622_20141001_173720_inLine +BABEL_OP3_402_67622_20141001_173720_outLine +BABEL_OP3_402_67659_20141102_162850_inLine +BABEL_OP3_402_67659_20141102_162850_outLine +BABEL_OP3_402_67964_20150313_144207_inLine +BABEL_OP3_402_67964_20150313_144207_outLine +BABEL_OP3_402_67999_20150103_202040_outLine +BABEL_OP3_402_68748_20150131_213425_inLine +BABEL_OP3_402_68748_20150131_213425_outLine +BABEL_OP3_402_68924_20141213_175705_inLine +BABEL_OP3_402_68924_20141213_175705_outLine +BABEL_OP3_402_69633_20150103_195020_inLine +BABEL_OP3_402_69633_20150103_195020_outLine +BABEL_OP3_402_70110_20140927_010427_inLine +BABEL_OP3_402_70110_20140927_010427_outLine +BABEL_OP3_402_71038_20150203_000908_inLine +BABEL_OP3_402_71038_20150203_000908_outLine +BABEL_OP3_402_71189_20150411_001925_inLine +BABEL_OP3_402_71189_20150411_001925_outLine +BABEL_OP3_402_71333_20141103_014203_inLine +BABEL_OP3_402_71333_20141103_014203_outLine +BABEL_OP3_402_71780_20141019_173617_inLine +BABEL_OP3_402_71780_20141019_173617_outLine +BABEL_OP3_402_71850_20150415_111357_inLine +BABEL_OP3_402_71850_20150415_111357_outLine +BABEL_OP3_402_73005_20150307_164753_inLine +BABEL_OP3_402_73005_20150307_164753_outLine +BABEL_OP3_402_73430_20150208_181645_inLine +BABEL_OP3_402_73430_20150208_181645_outLine +BABEL_OP3_402_73446_20150412_180706_inLine +BABEL_OP3_402_73446_20150412_180706_outLine +BABEL_OP3_402_73518_20150103_211617_inLine +BABEL_OP3_402_73518_20150103_211617_outLine +BABEL_OP3_402_74641_20141124_185314_inLine +BABEL_OP3_402_74641_20141124_185314_outLine +BABEL_OP3_402_75981_20150327_130110_inLine +BABEL_OP3_402_75981_20150327_130110_outLine +BABEL_OP3_402_76218_20141110_195047_inLine +BABEL_OP3_402_76218_20141110_195047_outLine +BABEL_OP3_402_77744_20141105_195905_inLine +BABEL_OP3_402_77744_20141105_195905_outLine +BABEL_OP3_402_77974_20150305_190614_inLine +BABEL_OP3_402_78016_20141104_182140_outLine +BABEL_OP3_402_78016_20141104_194136_outLine +BABEL_OP3_402_78511_20141228_004040_inLine +BABEL_OP3_402_78511_20141228_010153_inLine +BABEL_OP3_402_78976_20141030_185556_inLine +BABEL_OP3_402_78976_20141030_185556_outLine +BABEL_OP3_402_79045_20150114_155912_inLine +BABEL_OP3_402_79045_20150114_155912_outLine +BABEL_OP3_402_79080_20150110_190015_inLine +BABEL_OP3_402_79080_20150110_190015_outLine +BABEL_OP3_402_79129_20141130_180012_outLine +BABEL_OP3_402_79660_20150407_192210_inLine +BABEL_OP3_402_79660_20150407_192210_outLine +BABEL_OP3_402_80655_20150314_181243_inLine +BABEL_OP3_402_80655_20150314_181243_outLine +BABEL_OP3_402_81149_20150314_060925_inLine +BABEL_OP3_402_81149_20150314_060925_outLine +BABEL_OP3_402_81287_20141217_002122_inLine +BABEL_OP3_402_81287_20141217_002122_outLine +BABEL_OP3_402_81671_20141230_233802_inLine +BABEL_OP3_402_81671_20141230_233802_outLine +BABEL_OP3_402_82035_20141221_140850_inLine +BABEL_OP3_402_82035_20141221_140850_outLine +BABEL_OP3_402_82391_20150105_161651_inLine +BABEL_OP3_402_82391_20150105_161651_outLine +BABEL_OP3_402_82473_20141013_175410_inLine +BABEL_OP3_402_82473_20141013_175410_outLine +BABEL_OP3_402_82622_20141104_150303_inLine +BABEL_OP3_402_82742_20141224_153706_inLine +BABEL_OP3_402_82742_20141224_153706_outLine +BABEL_OP3_402_82904_20150314_144007_inLine +BABEL_OP3_402_82904_20150314_144007_outLine +BABEL_OP3_402_82979_20141007_015257_inLine +BABEL_OP3_402_82979_20141007_015257_outLine +BABEL_OP3_402_83394_20150413_180513_inLine +BABEL_OP3_402_83394_20150413_180513_outLine +BABEL_OP3_402_83455_20141222_014307_inLine +BABEL_OP3_402_83455_20141222_014307_outLine +BABEL_OP3_402_83545_20150306_214611_inLine +BABEL_OP3_402_83545_20150306_214611_outLine +BABEL_OP3_402_83851_20141101_203855_outLine +BABEL_OP3_402_84339_20150309_193354_inLine +BABEL_OP3_402_84339_20150309_193354_outLine +BABEL_OP3_402_84466_20150311_162506_inLine +BABEL_OP3_402_84466_20150311_162506_outLine +BABEL_OP3_402_84466_20150311_164841_inLine +BABEL_OP3_402_84466_20150311_164841_outLine +BABEL_OP3_402_84547_20141031_011002_inLine +BABEL_OP3_402_84547_20141031_011002_outLine +BABEL_OP3_402_85028_20150203_222949_inLine +BABEL_OP3_402_85028_20150203_222949_outLine +BABEL_OP3_402_85519_20150205_150346_inLine +BABEL_OP3_402_85519_20150205_150346_outLine +BABEL_OP3_402_85647_20141202_010613_inLine +BABEL_OP3_402_85647_20141202_010613_outLine +BABEL_OP3_402_85651_20150111_195212_inLine +BABEL_OP3_402_85651_20150111_195212_outLine +BABEL_OP3_402_86557_20140919_134613_inLine +BABEL_OP3_402_86557_20140919_134613_outLine +BABEL_OP3_402_86597_20150415_233059_inLine +BABEL_OP3_402_86597_20150415_233059_outLine +BABEL_OP3_402_86628_20150406_142110_inLine +BABEL_OP3_402_86628_20150406_142110_outLine +BABEL_OP3_402_86676_20141208_175123_inLine +BABEL_OP3_402_86676_20141208_175123_outLine +BABEL_OP3_402_86826_20150414_193901_inLine +BABEL_OP3_402_86826_20150414_193901_outLine +BABEL_OP3_402_86830_20150131_191140_inLine +BABEL_OP3_402_87305_20150415_152636_outLine +BABEL_OP3_402_87545_20150308_173713_inLine +BABEL_OP3_402_87545_20150308_173713_outLine +BABEL_OP3_402_87693_20141105_154104_inLine +BABEL_OP3_402_87693_20141105_154104_outLine +BABEL_OP3_402_88661_20141206_194640_inLine +BABEL_OP3_402_88661_20141206_194640_outLine +BABEL_OP3_402_88661_20141206_195854_inLine +BABEL_OP3_402_88661_20141206_195854_outLine +BABEL_OP3_402_88661_20141206_200827_inLine +BABEL_OP3_402_88661_20141206_200827_outLine +BABEL_OP3_402_88783_20141228_150438_inLine +BABEL_OP3_402_88783_20141228_150438_outLine +BABEL_OP3_402_88865_20150316_142749_inLine +BABEL_OP3_402_88865_20150316_142749_outLine +BABEL_OP3_402_88938_20150104_171742_inLine +BABEL_OP3_402_88938_20150104_171742_outLine +BABEL_OP3_402_89560_20150106_231355_inLine +BABEL_OP3_402_89560_20150106_231355_outLine +BABEL_OP3_402_89695_20141115_012527_outLine +BABEL_OP3_402_91372_20150306_193038_inLine +BABEL_OP3_402_91372_20150306_193038_outLine +BABEL_OP3_402_92077_20150313_145153_inLine +BABEL_OP3_402_92077_20150313_145153_outLine +BABEL_OP3_402_92736_20150106_183108_inLine +BABEL_OP3_402_92736_20150106_183108_outLine +BABEL_OP3_402_92809_20140924_164438_inLine +BABEL_OP3_402_93469_20150308_223956_inLine +BABEL_OP3_402_93469_20150308_223956_outLine +BABEL_OP3_402_93515_20150318_184223_inLine +BABEL_OP3_402_93515_20150318_184223_outLine +BABEL_OP3_402_93861_20141126_021459_outLine +BABEL_OP3_402_93861_20141202_013129_outLine +BABEL_OP3_402_94141_20150312_184456_inLine +BABEL_OP3_402_94141_20150312_184456_outLine +BABEL_OP3_402_94237_20150319_141146_inLine +BABEL_OP3_402_94237_20150319_141146_outLine +BABEL_OP3_402_94262_20150308_140603_inLine +BABEL_OP3_402_94262_20150308_140603_outLine +BABEL_OP3_402_94442_20150326_164734_inLine +BABEL_OP3_402_94442_20150326_164734_outLine +BABEL_OP3_402_94449_20150315_122812_inLine +BABEL_OP3_402_94449_20150315_122812_outLine +BABEL_OP3_402_94465_20141227_155756_inLine +BABEL_OP3_402_94465_20141227_155756_outLine +BABEL_OP3_402_94487_20150312_163837_inLine +BABEL_OP3_402_94487_20150312_163837_outLine +BABEL_OP3_402_94587_20150128_234118_inLine +BABEL_OP3_402_94587_20150128_234118_outLine +BABEL_OP3_402_94745_20141214_225333_inLine +BABEL_OP3_402_94745_20141214_225333_outLine +BABEL_OP3_402_95935_20150106_123341_inLine +BABEL_OP3_402_95935_20150106_123341_outLine +BABEL_OP3_402_95966_20141110_203915_inLine +BABEL_OP3_402_95966_20141110_203915_outLine +BABEL_OP3_402_96446_20141106_013329_inLine +BABEL_OP3_402_96446_20141106_013329_outLine +BABEL_OP3_402_96525_20150102_120919_inLine +BABEL_OP3_402_96842_20150327_193159_inLine +BABEL_OP3_402_96842_20150327_193159_outLine +BABEL_OP3_402_97363_20140929_125711_outLine +BABEL_OP3_402_97731_20150102_215016_outLine +BABEL_OP3_402_98365_20141120_164222_inLine +BABEL_OP3_402_98506_20150314_191311_inLine +BABEL_OP3_402_98506_20150314_191311_outLine +BABEL_OP3_402_99732_20141224_145056_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list b/egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list new file mode 100644 index 00000000000..195f3e16bf3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list @@ -0,0 +1,122 @@ +BABEL_OP3_403_10019_20141027_010545_inLine +BABEL_OP3_403_10019_20141027_010545_outLine +BABEL_OP3_403_12220_20141026_204025_inLine +BABEL_OP3_403_12220_20141026_204025_outLine +BABEL_OP3_403_13178_20141128_223039_inLine +BABEL_OP3_403_13178_20141128_223039_outLine +BABEL_OP3_403_14440_20141129_004855_inLine +BABEL_OP3_403_14440_20141129_004855_outLine +BABEL_OP3_403_15042_20150313_165638_inLine +BABEL_OP3_403_15042_20150313_165638_outLine +BABEL_OP3_403_17440_20141210_204026_inLine +BABEL_OP3_403_17440_20141210_204026_outLine +BABEL_OP3_403_17440_20141210_204535_inLine +BABEL_OP3_403_17440_20141210_204535_outLine +BABEL_OP3_403_19663_20141029_190739_inLine +BABEL_OP3_403_19663_20141029_190739_outLine +BABEL_OP3_403_19782_20141216_211916_inLine +BABEL_OP3_403_19782_20141216_211916_outLine +BABEL_OP3_403_22216_20141014_202442_inLine +BABEL_OP3_403_22216_20141014_202442_outLine +BABEL_OP3_403_23151_20150108_032700_inLine +BABEL_OP3_403_23151_20150108_032700_outLine +BABEL_OP3_403_25012_20150201_000040_inLine +BABEL_OP3_403_25012_20150201_000040_outLine +BABEL_OP3_403_28606_20141205_184257_inLine +BABEL_OP3_403_28606_20141205_184257_outLine +BABEL_OP3_403_32727_20141210_200505_inLine +BABEL_OP3_403_32727_20141210_200505_outLine +BABEL_OP3_403_33175_20141014_202944_inLine +BABEL_OP3_403_33175_20141014_202944_outLine +BABEL_OP3_403_33251_20141118_224420_inLine +BABEL_OP3_403_33251_20141118_224420_outLine +BABEL_OP3_403_34564_20141212_001647_inLine +BABEL_OP3_403_34564_20141212_001647_outLine +BABEL_OP3_403_36341_20141013_224204_inLine +BABEL_OP3_403_36341_20141013_224204_outLine +BABEL_OP3_403_41100_20141006_230147_inLine +BABEL_OP3_403_41100_20141006_230147_outLine +BABEL_OP3_403_42243_20141016_231219_inLine +BABEL_OP3_403_42243_20141016_231219_outLine +BABEL_OP3_403_42497_20141004_235231_inLine +BABEL_OP3_403_42497_20141004_235231_outLine +BABEL_OP3_403_43388_20141028_212938_inLine +BABEL_OP3_403_43388_20141028_212938_outLine +BABEL_OP3_403_44847_20141127_190752_inLine +BABEL_OP3_403_44847_20141127_190752_outLine +BABEL_OP3_403_45560_20141012_204242_inLine +BABEL_OP3_403_45560_20141012_204242_outLine +BABEL_OP3_403_45697_20150211_181356_inLine +BABEL_OP3_403_45697_20150211_181356_outLine +BABEL_OP3_403_46881_20141014_210231_inLine +BABEL_OP3_403_46881_20141014_210231_outLine +BABEL_OP3_403_47877_20150105_200005_inLine +BABEL_OP3_403_47877_20150105_200005_outLine +BABEL_OP3_403_47882_20150131_215134_inLine +BABEL_OP3_403_47882_20150131_215134_outLine +BABEL_OP3_403_48789_20141031_205407_inLine +BABEL_OP3_403_48789_20141031_205407_outLine +BABEL_OP3_403_49502_20141013_230428_inLine +BABEL_OP3_403_49502_20141013_230428_outLine +BABEL_OP3_403_49902_20141025_214609_inLine +BABEL_OP3_403_49902_20141025_214609_outLine +BABEL_OP3_403_50726_20141015_222945_inLine +BABEL_OP3_403_50726_20141015_222945_outLine +BABEL_OP3_403_52438_20141005_211825_inLine +BABEL_OP3_403_52438_20141005_211825_outLine +BABEL_OP3_403_54160_20141012_225050_inLine +BABEL_OP3_403_54160_20141012_225050_outLine +BABEL_OP3_403_56090_20141001_220534_inLine +BABEL_OP3_403_56090_20141001_220534_outLine +BABEL_OP3_403_58850_20141030_190407_inLine +BABEL_OP3_403_58850_20141030_190407_outLine +BABEL_OP3_403_60538_20141007_015704_inLine +BABEL_OP3_403_60538_20141007_015704_outLine +BABEL_OP3_403_60706_20141014_225721_inLine +BABEL_OP3_403_60706_20141014_225721_outLine +BABEL_OP3_403_61225_20141014_225524_inLine +BABEL_OP3_403_61225_20141014_225524_outLine +BABEL_OP3_403_62456_20141107_224816_inLine +BABEL_OP3_403_62456_20141107_224816_outLine +BABEL_OP3_403_62545_20150203_205015_inLine +BABEL_OP3_403_62545_20150203_205015_outLine +BABEL_OP3_403_63081_20141013_184721_inLine +BABEL_OP3_403_63081_20141013_184721_outLine +BABEL_OP3_403_63938_20150304_184136_inLine +BABEL_OP3_403_63938_20150304_184136_outLine +BABEL_OP3_403_65723_20141004_231950_inLine +BABEL_OP3_403_65723_20141004_231950_outLine +BABEL_OP3_403_65882_20141005_214649_inLine +BABEL_OP3_403_65882_20141005_214649_outLine +BABEL_OP3_403_66026_20141207_212517_inLine +BABEL_OP3_403_66026_20141207_212517_outLine +BABEL_OP3_403_68306_20141206_183801_inLine +BABEL_OP3_403_68306_20141206_183801_outLine +BABEL_OP3_403_70110_20141016_195210_inLine +BABEL_OP3_403_70110_20141016_195210_outLine +BABEL_OP3_403_71780_20141006_005652_inLine +BABEL_OP3_403_71780_20141006_005652_outLine +BABEL_OP3_403_72349_20150313_194307_inLine +BABEL_OP3_403_72349_20150313_194307_outLine +BABEL_OP3_403_78877_20150203_012549_inLine +BABEL_OP3_403_78877_20150203_012549_outLine +BABEL_OP3_403_79820_20141005_212016_inLine +BABEL_OP3_403_79820_20141005_212016_outLine +BABEL_OP3_403_87280_20141217_230121_inLine +BABEL_OP3_403_87280_20141217_230121_outLine +BABEL_OP3_403_88938_20141219_211017_inLine +BABEL_OP3_403_88938_20141219_211017_outLine +BABEL_OP3_403_90777_20141028_012959_inLine +BABEL_OP3_403_90777_20141028_012959_outLine +BABEL_OP3_403_92356_20150305_033040_inLine +BABEL_OP3_403_92356_20150305_033040_outLine +BABEL_OP3_403_94035_20150201_183321_inLine +BABEL_OP3_403_94035_20150201_183321_outLine +BABEL_OP3_403_96446_20141013_215249_inLine +BABEL_OP3_403_96446_20141013_215249_outLine +BABEL_OP3_403_97264_20141220_220653_inLine +BABEL_OP3_403_97264_20141220_220653_outLine +BABEL_OP3_403_97849_20150313_175528_inLine +BABEL_OP3_403_97849_20150313_175528_outLine +BABEL_OP3_403_99813_20141106_211637_inLine +BABEL_OP3_403_99813_20141106_211637_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/dev.list b/egs/babel/s5d/conf/lists/403-dholuo/dev.list new file mode 100644 index 00000000000..195f3e16bf3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/dev.list @@ -0,0 +1,122 @@ +BABEL_OP3_403_10019_20141027_010545_inLine +BABEL_OP3_403_10019_20141027_010545_outLine +BABEL_OP3_403_12220_20141026_204025_inLine +BABEL_OP3_403_12220_20141026_204025_outLine +BABEL_OP3_403_13178_20141128_223039_inLine +BABEL_OP3_403_13178_20141128_223039_outLine +BABEL_OP3_403_14440_20141129_004855_inLine +BABEL_OP3_403_14440_20141129_004855_outLine +BABEL_OP3_403_15042_20150313_165638_inLine +BABEL_OP3_403_15042_20150313_165638_outLine +BABEL_OP3_403_17440_20141210_204026_inLine +BABEL_OP3_403_17440_20141210_204026_outLine +BABEL_OP3_403_17440_20141210_204535_inLine +BABEL_OP3_403_17440_20141210_204535_outLine +BABEL_OP3_403_19663_20141029_190739_inLine +BABEL_OP3_403_19663_20141029_190739_outLine +BABEL_OP3_403_19782_20141216_211916_inLine +BABEL_OP3_403_19782_20141216_211916_outLine +BABEL_OP3_403_22216_20141014_202442_inLine +BABEL_OP3_403_22216_20141014_202442_outLine +BABEL_OP3_403_23151_20150108_032700_inLine +BABEL_OP3_403_23151_20150108_032700_outLine +BABEL_OP3_403_25012_20150201_000040_inLine +BABEL_OP3_403_25012_20150201_000040_outLine +BABEL_OP3_403_28606_20141205_184257_inLine +BABEL_OP3_403_28606_20141205_184257_outLine +BABEL_OP3_403_32727_20141210_200505_inLine +BABEL_OP3_403_32727_20141210_200505_outLine +BABEL_OP3_403_33175_20141014_202944_inLine +BABEL_OP3_403_33175_20141014_202944_outLine +BABEL_OP3_403_33251_20141118_224420_inLine +BABEL_OP3_403_33251_20141118_224420_outLine +BABEL_OP3_403_34564_20141212_001647_inLine +BABEL_OP3_403_34564_20141212_001647_outLine +BABEL_OP3_403_36341_20141013_224204_inLine +BABEL_OP3_403_36341_20141013_224204_outLine +BABEL_OP3_403_41100_20141006_230147_inLine +BABEL_OP3_403_41100_20141006_230147_outLine +BABEL_OP3_403_42243_20141016_231219_inLine +BABEL_OP3_403_42243_20141016_231219_outLine +BABEL_OP3_403_42497_20141004_235231_inLine +BABEL_OP3_403_42497_20141004_235231_outLine +BABEL_OP3_403_43388_20141028_212938_inLine +BABEL_OP3_403_43388_20141028_212938_outLine +BABEL_OP3_403_44847_20141127_190752_inLine +BABEL_OP3_403_44847_20141127_190752_outLine +BABEL_OP3_403_45560_20141012_204242_inLine +BABEL_OP3_403_45560_20141012_204242_outLine +BABEL_OP3_403_45697_20150211_181356_inLine +BABEL_OP3_403_45697_20150211_181356_outLine +BABEL_OP3_403_46881_20141014_210231_inLine +BABEL_OP3_403_46881_20141014_210231_outLine +BABEL_OP3_403_47877_20150105_200005_inLine +BABEL_OP3_403_47877_20150105_200005_outLine +BABEL_OP3_403_47882_20150131_215134_inLine +BABEL_OP3_403_47882_20150131_215134_outLine +BABEL_OP3_403_48789_20141031_205407_inLine +BABEL_OP3_403_48789_20141031_205407_outLine +BABEL_OP3_403_49502_20141013_230428_inLine +BABEL_OP3_403_49502_20141013_230428_outLine +BABEL_OP3_403_49902_20141025_214609_inLine +BABEL_OP3_403_49902_20141025_214609_outLine +BABEL_OP3_403_50726_20141015_222945_inLine +BABEL_OP3_403_50726_20141015_222945_outLine +BABEL_OP3_403_52438_20141005_211825_inLine +BABEL_OP3_403_52438_20141005_211825_outLine +BABEL_OP3_403_54160_20141012_225050_inLine +BABEL_OP3_403_54160_20141012_225050_outLine +BABEL_OP3_403_56090_20141001_220534_inLine +BABEL_OP3_403_56090_20141001_220534_outLine +BABEL_OP3_403_58850_20141030_190407_inLine +BABEL_OP3_403_58850_20141030_190407_outLine +BABEL_OP3_403_60538_20141007_015704_inLine +BABEL_OP3_403_60538_20141007_015704_outLine +BABEL_OP3_403_60706_20141014_225721_inLine +BABEL_OP3_403_60706_20141014_225721_outLine +BABEL_OP3_403_61225_20141014_225524_inLine +BABEL_OP3_403_61225_20141014_225524_outLine +BABEL_OP3_403_62456_20141107_224816_inLine +BABEL_OP3_403_62456_20141107_224816_outLine +BABEL_OP3_403_62545_20150203_205015_inLine +BABEL_OP3_403_62545_20150203_205015_outLine +BABEL_OP3_403_63081_20141013_184721_inLine +BABEL_OP3_403_63081_20141013_184721_outLine +BABEL_OP3_403_63938_20150304_184136_inLine +BABEL_OP3_403_63938_20150304_184136_outLine +BABEL_OP3_403_65723_20141004_231950_inLine +BABEL_OP3_403_65723_20141004_231950_outLine +BABEL_OP3_403_65882_20141005_214649_inLine +BABEL_OP3_403_65882_20141005_214649_outLine +BABEL_OP3_403_66026_20141207_212517_inLine +BABEL_OP3_403_66026_20141207_212517_outLine +BABEL_OP3_403_68306_20141206_183801_inLine +BABEL_OP3_403_68306_20141206_183801_outLine +BABEL_OP3_403_70110_20141016_195210_inLine +BABEL_OP3_403_70110_20141016_195210_outLine +BABEL_OP3_403_71780_20141006_005652_inLine +BABEL_OP3_403_71780_20141006_005652_outLine +BABEL_OP3_403_72349_20150313_194307_inLine +BABEL_OP3_403_72349_20150313_194307_outLine +BABEL_OP3_403_78877_20150203_012549_inLine +BABEL_OP3_403_78877_20150203_012549_outLine +BABEL_OP3_403_79820_20141005_212016_inLine +BABEL_OP3_403_79820_20141005_212016_outLine +BABEL_OP3_403_87280_20141217_230121_inLine +BABEL_OP3_403_87280_20141217_230121_outLine +BABEL_OP3_403_88938_20141219_211017_inLine +BABEL_OP3_403_88938_20141219_211017_outLine +BABEL_OP3_403_90777_20141028_012959_inLine +BABEL_OP3_403_90777_20141028_012959_outLine +BABEL_OP3_403_92356_20150305_033040_inLine +BABEL_OP3_403_92356_20150305_033040_outLine +BABEL_OP3_403_94035_20150201_183321_inLine +BABEL_OP3_403_94035_20150201_183321_outLine +BABEL_OP3_403_96446_20141013_215249_inLine +BABEL_OP3_403_96446_20141013_215249_outLine +BABEL_OP3_403_97264_20141220_220653_inLine +BABEL_OP3_403_97264_20141220_220653_outLine +BABEL_OP3_403_97849_20150313_175528_inLine +BABEL_OP3_403_97849_20150313_175528_outLine +BABEL_OP3_403_99813_20141106_211637_inLine +BABEL_OP3_403_99813_20141106_211637_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/eval.list b/egs/babel/s5d/conf/lists/403-dholuo/eval.list new file mode 100644 index 00000000000..4fc564e5b78 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/eval.list @@ -0,0 +1,182 @@ +BABEL_OP3_403_13040_20141004_235933_inLine +BABEL_OP3_403_13040_20141004_235933_outLine +BABEL_OP3_403_13929_20150204_022153_inLine +BABEL_OP3_403_13929_20150204_022153_outLine +BABEL_OP3_403_14350_20141007_001036_inLine +BABEL_OP3_403_14350_20141007_001036_outLine +BABEL_OP3_403_14575_20150205_194428_inLine +BABEL_OP3_403_14575_20150205_194428_outLine +BABEL_OP3_403_15262_20141008_011520_inLine +BABEL_OP3_403_15262_20141008_011520_outLine +BABEL_OP3_403_15848_20141001_223454_inLine +BABEL_OP3_403_15848_20141001_223454_outLine +BABEL_OP3_403_15902_20141006_235206_inLine +BABEL_OP3_403_15902_20141006_235206_outLine +BABEL_OP3_403_16056_20141007_015057_inLine +BABEL_OP3_403_16056_20141007_015057_outLine +BABEL_OP3_403_16184_20141003_220544_inLine +BABEL_OP3_403_16184_20141003_220544_outLine +BABEL_OP3_403_17165_20141103_175355_inLine +BABEL_OP3_403_17165_20141103_175355_outLine +BABEL_OP3_403_19120_20150320_014910_inLine +BABEL_OP3_403_19120_20150320_014910_outLine +BABEL_OP3_403_19545_20141107_193534_inLine +BABEL_OP3_403_19545_20141107_193534_outLine +BABEL_OP3_403_21029_20141010_220724_inLine +BABEL_OP3_403_21029_20141010_220724_outLine +BABEL_OP3_403_21581_20141026_010129_inLine +BABEL_OP3_403_21581_20141026_010129_outLine +BABEL_OP3_403_23260_20150313_211958_inLine +BABEL_OP3_403_23260_20150313_211958_outLine +BABEL_OP3_403_29777_20141218_221709_inLine +BABEL_OP3_403_29777_20141218_221709_outLine +BABEL_OP3_403_30497_20150314_160011_inLine +BABEL_OP3_403_30497_20150314_160011_outLine +BABEL_OP3_403_31583_20141216_190359_inLine +BABEL_OP3_403_31583_20141216_190359_outLine +BABEL_OP3_403_32048_20141219_213429_inLine +BABEL_OP3_403_32048_20141219_213429_outLine +BABEL_OP3_403_32959_20141208_210738_inLine +BABEL_OP3_403_32959_20141208_210738_outLine +BABEL_OP3_403_33635_20141029_220701_inLine +BABEL_OP3_403_33635_20141029_220701_outLine +BABEL_OP3_403_35069_20141219_230111_inLine +BABEL_OP3_403_35069_20141219_230111_outLine +BABEL_OP3_403_35885_20150319_180147_inLine +BABEL_OP3_403_35885_20150319_180147_outLine +BABEL_OP3_403_36219_20141024_182040_inLine +BABEL_OP3_403_36219_20141024_182040_outLine +BABEL_OP3_403_37281_20141028_212708_inLine +BABEL_OP3_403_37281_20141028_212708_outLine +BABEL_OP3_403_39277_20150204_013404_inLine +BABEL_OP3_403_39277_20150204_013404_outLine +BABEL_OP3_403_44290_20150313_161518_inLine +BABEL_OP3_403_44290_20150313_161518_outLine +BABEL_OP3_403_44681_20150202_013205_inLine +BABEL_OP3_403_44681_20150202_013205_outLine +BABEL_OP3_403_45140_20150314_202244_inLine +BABEL_OP3_403_45140_20150314_202244_outLine +BABEL_OP3_403_47270_20150305_004557_inLine +BABEL_OP3_403_47270_20150305_004557_outLine +BABEL_OP3_403_47309_20150131_232140_inLine +BABEL_OP3_403_47309_20150131_232140_outLine +BABEL_OP3_403_50090_20141128_005549_inLine +BABEL_OP3_403_50090_20141128_005549_outLine +BABEL_OP3_403_52025_20141016_194738_inLine +BABEL_OP3_403_52025_20141016_194738_outLine +BABEL_OP3_403_52381_20150106_000156_inLine +BABEL_OP3_403_52381_20150106_000156_outLine +BABEL_OP3_403_53419_20141216_202007_inLine +BABEL_OP3_403_53419_20141216_202007_outLine +BABEL_OP3_403_54046_20141221_013345_inLine +BABEL_OP3_403_54046_20141221_013345_outLine +BABEL_OP3_403_54405_20141105_215311_inLine +BABEL_OP3_403_54405_20141105_215311_outLine +BABEL_OP3_403_56429_20141004_212928_inLine +BABEL_OP3_403_56429_20141004_212928_outLine +BABEL_OP3_403_56523_20141027_234249_inLine +BABEL_OP3_403_56523_20141027_234249_outLine +BABEL_OP3_403_56720_20141204_213606_inLine +BABEL_OP3_403_56720_20141204_213606_outLine +BABEL_OP3_403_56743_20141027_224527_inLine +BABEL_OP3_403_56743_20141027_224527_outLine +BABEL_OP3_403_57654_20141004_222740_inLine +BABEL_OP3_403_57654_20141004_222740_outLine +BABEL_OP3_403_57922_20141119_003457_inLine +BABEL_OP3_403_57922_20141119_003457_outLine +BABEL_OP3_403_60508_20141015_194223_inLine +BABEL_OP3_403_60508_20141015_194223_outLine +BABEL_OP3_403_60626_20141007_020141_inLine +BABEL_OP3_403_60626_20141007_020141_outLine +BABEL_OP3_403_61219_20141025_212855_inLine +BABEL_OP3_403_61219_20141025_212855_outLine +BABEL_OP3_403_62286_20141029_183256_inLine +BABEL_OP3_403_62286_20141029_183256_outLine +BABEL_OP3_403_62852_20141016_194911_inLine +BABEL_OP3_403_62852_20141016_194911_outLine +BABEL_OP3_403_63445_20141016_201418_inLine +BABEL_OP3_403_63445_20141016_201418_outLine +BABEL_OP3_403_63481_20141014_201444_inLine +BABEL_OP3_403_63481_20141014_201444_outLine +BABEL_OP3_403_64494_20141005_003938_inLine +BABEL_OP3_403_64494_20141005_003938_outLine +BABEL_OP3_403_64796_20141014_213212_inLine +BABEL_OP3_403_64796_20141014_213212_outLine +BABEL_OP3_403_64902_20150319_231944_inLine +BABEL_OP3_403_64902_20150319_231944_outLine +BABEL_OP3_403_65477_20141029_190115_inLine +BABEL_OP3_403_65477_20141029_190115_outLine +BABEL_OP3_403_66519_20141026_200412_inLine +BABEL_OP3_403_66519_20141026_200412_outLine +BABEL_OP3_403_67552_20141204_235240_inLine +BABEL_OP3_403_67552_20141204_235240_outLine +BABEL_OP3_403_67842_20141005_213633_inLine +BABEL_OP3_403_67842_20141005_213633_outLine +BABEL_OP3_403_70639_20150201_224933_inLine +BABEL_OP3_403_70639_20150201_224933_outLine +BABEL_OP3_403_71282_20150304_001933_inLine +BABEL_OP3_403_71282_20150304_001933_outLine +BABEL_OP3_403_71566_20141210_221853_inLine +BABEL_OP3_403_71566_20141210_221853_outLine +BABEL_OP3_403_71704_20141005_194010_inLine +BABEL_OP3_403_71704_20141005_194010_outLine +BABEL_OP3_403_73042_20141004_213024_inLine +BABEL_OP3_403_73042_20141004_213024_outLine +BABEL_OP3_403_73119_20141024_013927_inLine +BABEL_OP3_403_73119_20141024_013927_outLine +BABEL_OP3_403_74641_20141104_204017_inLine +BABEL_OP3_403_74641_20141104_204017_outLine +BABEL_OP3_403_75359_20150306_233416_inLine +BABEL_OP3_403_75359_20150306_233416_outLine +BABEL_OP3_403_77567_20141016_212214_inLine +BABEL_OP3_403_77567_20141016_212214_outLine +BABEL_OP3_403_80655_20150313_202935_inLine +BABEL_OP3_403_80655_20150313_202935_outLine +BABEL_OP3_403_81229_20141028_221835_inLine +BABEL_OP3_403_81229_20141028_221835_outLine +BABEL_OP3_403_81404_20141027_225835_inLine +BABEL_OP3_403_81404_20141027_225835_outLine +BABEL_OP3_403_81427_20141025_192229_inLine +BABEL_OP3_403_81427_20141025_192229_outLine +BABEL_OP3_403_81581_20150205_214253_inLine +BABEL_OP3_403_81581_20150205_214253_outLine +BABEL_OP3_403_82966_20141215_232026_inLine +BABEL_OP3_403_82966_20141215_232026_outLine +BABEL_OP3_403_83062_20150314_182244_inLine +BABEL_OP3_403_83062_20150314_182244_outLine +BABEL_OP3_403_84715_20150106_201437_inLine +BABEL_OP3_403_84715_20150106_201437_outLine +BABEL_OP3_403_86748_20150305_041204_inLine +BABEL_OP3_403_86748_20150305_041204_outLine +BABEL_OP3_403_87629_20141107_235904_inLine +BABEL_OP3_403_87629_20141107_235904_outLine +BABEL_OP3_403_88686_20141014_185730_inLine +BABEL_OP3_403_88686_20141014_185730_outLine +BABEL_OP3_403_88873_20141005_183048_inLine +BABEL_OP3_403_88873_20141005_183048_outLine +BABEL_OP3_403_90080_20150305_215921_inLine +BABEL_OP3_403_90080_20150305_215921_outLine +BABEL_OP3_403_91825_20141016_185730_inLine +BABEL_OP3_403_91825_20141016_185730_outLine +BABEL_OP3_403_94166_20150304_233340_inLine +BABEL_OP3_403_94166_20150304_233340_outLine +BABEL_OP3_403_94212_20150203_035128_inLine +BABEL_OP3_403_94212_20150203_035128_outLine +BABEL_OP3_403_94587_20141206_200001_inLine +BABEL_OP3_403_94587_20141206_200001_outLine +BABEL_OP3_403_95077_20141211_172737_inLine +BABEL_OP3_403_95077_20141211_172737_outLine +BABEL_OP3_403_95490_20141015_192814_inLine +BABEL_OP3_403_95490_20141015_192814_outLine +BABEL_OP3_403_96088_20150307_205122_inLine +BABEL_OP3_403_96088_20150307_205122_outLine +BABEL_OP3_403_96934_20141025_215407_inLine +BABEL_OP3_403_96934_20141025_215407_outLine +BABEL_OP3_403_98255_20150204_194911_inLine +BABEL_OP3_403_98255_20150204_194911_outLine +BABEL_OP3_403_98580_20141029_181611_inLine +BABEL_OP3_403_98580_20141029_181611_outLine +BABEL_OP3_403_98888_20141028_214127_inLine +BABEL_OP3_403_98888_20141028_214127_outLine +BABEL_OP3_403_99264_20141216_011902_inLine +BABEL_OP3_403_99264_20141216_011902_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/sub-train.list b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.list new file mode 100644 index 00000000000..138a27efd31 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.list @@ -0,0 +1,122 @@ +BABEL_OP3_403_11681_20141010_203514_inLine +BABEL_OP3_403_11681_20141010_203514_outLine +BABEL_OP3_403_13324_20141004_204835_inLine +BABEL_OP3_403_13324_20141004_204835_outLine +BABEL_OP3_403_13490_20141103_182258_inLine +BABEL_OP3_403_13490_20141103_182258_outLine +BABEL_OP3_403_16475_20141027_225406_inLine +BABEL_OP3_403_16475_20141027_225406_outLine +BABEL_OP3_403_16938_20141117_002438_inLine +BABEL_OP3_403_16938_20141117_002438_outLine +BABEL_OP3_403_17280_20141027_223651_inLine +BABEL_OP3_403_17280_20141027_223651_outLine +BABEL_OP3_403_19722_20141013_214859_inLine +BABEL_OP3_403_19722_20141013_214859_outLine +BABEL_OP3_403_19749_20150211_210545_inLine +BABEL_OP3_403_19749_20150211_210545_outLine +BABEL_OP3_403_22321_20141012_234503_inLine +BABEL_OP3_403_22321_20141012_234503_outLine +BABEL_OP3_403_23893_20150311_211416_inLine +BABEL_OP3_403_23893_20150311_211416_outLine +BABEL_OP3_403_24589_20141023_173937_inLine +BABEL_OP3_403_24589_20141023_173937_outLine +BABEL_OP3_403_25961_20141016_201537_inLine +BABEL_OP3_403_25961_20141016_201537_outLine +BABEL_OP3_403_28190_20141218_232404_inLine +BABEL_OP3_403_28190_20141218_232404_outLine +BABEL_OP3_403_28775_20141005_002735_inLine +BABEL_OP3_403_28775_20141005_002735_outLine +BABEL_OP3_403_28945_20141006_202723_inLine +BABEL_OP3_403_28945_20141006_202723_outLine +BABEL_OP3_403_29168_20141013_195745_inLine +BABEL_OP3_403_29168_20141013_195745_outLine +BABEL_OP3_403_29323_20150303_223419_inLine +BABEL_OP3_403_29323_20150303_223419_outLine +BABEL_OP3_403_31109_20141107_213704_inLine +BABEL_OP3_403_31109_20141107_213704_outLine +BABEL_OP3_403_31490_20141006_210241_inLine +BABEL_OP3_403_31490_20141006_210241_outLine +BABEL_OP3_403_31624_20141014_211203_inLine +BABEL_OP3_403_31624_20141014_211203_outLine +BABEL_OP3_403_32861_20150107_160647_inLine +BABEL_OP3_403_32861_20150107_160647_outLine +BABEL_OP3_403_33840_20141219_200146_inLine +BABEL_OP3_403_33840_20141219_200146_outLine +BABEL_OP3_403_36632_20150206_041325_inLine +BABEL_OP3_403_36632_20150206_041325_outLine +BABEL_OP3_403_36990_20141030_231441_inLine +BABEL_OP3_403_36990_20141030_231441_outLine +BABEL_OP3_403_38878_20141121_184540_inLine +BABEL_OP3_403_38878_20141121_184540_outLine +BABEL_OP3_403_42718_20150306_202240_inLine +BABEL_OP3_403_42718_20150306_202240_outLine +BABEL_OP3_403_43368_20141026_000458_inLine +BABEL_OP3_403_43368_20141026_000458_outLine +BABEL_OP3_403_44868_20141129_220211_inLine +BABEL_OP3_403_44868_20141129_220211_outLine +BABEL_OP3_403_44961_20141006_233622_inLine +BABEL_OP3_403_44961_20141006_233622_outLine +BABEL_OP3_403_44961_20141006_235203_inLine +BABEL_OP3_403_44961_20141006_235203_outLine +BABEL_OP3_403_47215_20141007_230222_inLine +BABEL_OP3_403_47215_20141007_230222_outLine +BABEL_OP3_403_48663_20150306_181741_inLine +BABEL_OP3_403_48663_20150306_181741_outLine +BABEL_OP3_403_51955_20141004_212210_inLine +BABEL_OP3_403_51955_20141004_212210_outLine +BABEL_OP3_403_53842_20141031_193507_inLine +BABEL_OP3_403_53842_20141031_193507_outLine +BABEL_OP3_403_55950_20150312_174125_inLine +BABEL_OP3_403_55950_20150312_174125_outLine +BABEL_OP3_403_56198_20141005_222956_inLine +BABEL_OP3_403_56198_20141005_222956_outLine +BABEL_OP3_403_58047_20141118_184454_inLine +BABEL_OP3_403_58047_20141118_184454_outLine +BABEL_OP3_403_58585_20150106_172737_inLine +BABEL_OP3_403_58585_20150106_172737_outLine +BABEL_OP3_403_60310_20141217_205059_inLine +BABEL_OP3_403_60310_20141217_205059_outLine +BABEL_OP3_403_60418_20141129_235907_inLine +BABEL_OP3_403_60418_20141129_235907_outLine +BABEL_OP3_403_61348_20141103_230857_inLine +BABEL_OP3_403_61348_20141103_230857_outLine +BABEL_OP3_403_65640_20150314_163101_inLine +BABEL_OP3_403_65640_20150314_163101_outLine +BABEL_OP3_403_69107_20141106_000151_inLine +BABEL_OP3_403_69107_20141106_000151_outLine +BABEL_OP3_403_69746_20141220_191513_inLine +BABEL_OP3_403_69746_20141220_191513_outLine +BABEL_OP3_403_72007_20141205_002010_inLine +BABEL_OP3_403_72007_20141205_002010_outLine +BABEL_OP3_403_72110_20141210_212045_inLine +BABEL_OP3_403_72110_20141210_212045_outLine +BABEL_OP3_403_72844_20141004_005248_inLine +BABEL_OP3_403_72844_20141004_005248_outLine +BABEL_OP3_403_75223_20141016_194054_inLine +BABEL_OP3_403_75223_20141016_194054_outLine +BABEL_OP3_403_77974_20150312_200046_inLine +BABEL_OP3_403_77974_20150312_200046_outLine +BABEL_OP3_403_78360_20150107_231519_inLine +BABEL_OP3_403_78360_20150107_231519_outLine +BABEL_OP3_403_78544_20141201_192016_inLine +BABEL_OP3_403_78544_20141201_192016_outLine +BABEL_OP3_403_82391_20141206_001207_inLine +BABEL_OP3_403_82391_20141206_001207_outLine +BABEL_OP3_403_83436_20141012_221126_inLine +BABEL_OP3_403_83436_20141012_221126_outLine +BABEL_OP3_403_84469_20141211_002526_inLine +BABEL_OP3_403_84469_20141211_002526_outLine +BABEL_OP3_403_84605_20141005_214529_inLine +BABEL_OP3_403_84605_20141005_214529_outLine +BABEL_OP3_403_87921_20141210_233414_inLine +BABEL_OP3_403_87921_20141210_233414_outLine +BABEL_OP3_403_92509_20141014_232528_inLine +BABEL_OP3_403_92509_20141014_232528_outLine +BABEL_OP3_403_95269_20141026_235206_inLine +BABEL_OP3_403_95269_20141026_235206_outLine +BABEL_OP3_403_96324_20141014_194024_inLine +BABEL_OP3_403_96324_20141014_194024_outLine +BABEL_OP3_403_97588_20141015_193851_inLine +BABEL_OP3_403_97588_20141015_193851_outLine +BABEL_OP3_403_98506_20150319_151741_inLine +BABEL_OP3_403_98506_20150319_151741_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list new file mode 100644 index 00000000000..b22e404cf6c --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP3_403_10313_20150130_193605_inLine +BABEL_OP3_403_10313_20150130_193605_outLine +BABEL_OP3_403_10469_20150130_211522_inLine +BABEL_OP3_403_10469_20150130_211522_outLine +BABEL_OP3_403_10966_20141025_191612_inLine +BABEL_OP3_403_10966_20141025_191612_outLine +BABEL_OP3_403_11663_20141205_201130_inLine +BABEL_OP3_403_11663_20141205_201130_outLine +BABEL_OP3_403_11663_20141205_204332_inLine +BABEL_OP3_403_11663_20141205_204332_outLine +BABEL_OP3_403_11797_20141013_012556_inLine +BABEL_OP3_403_11797_20141013_012556_outLine +BABEL_OP3_403_12606_20150314_170830_inLine +BABEL_OP3_403_12606_20150314_170830_outLine +BABEL_OP3_403_12609_20150313_183914_inLine +BABEL_OP3_403_12609_20150313_183914_outLine +BABEL_OP3_403_13483_20141205_201059_inLine +BABEL_OP3_403_13483_20141205_201059_outLine +BABEL_OP3_403_14807_20141117_201842_inLine +BABEL_OP3_403_14807_20141117_201842_outLine +BABEL_OP3_403_14814_20141023_004823_inLine +BABEL_OP3_403_14814_20141023_004823_outLine +BABEL_OP3_403_14929_20141029_222741_inLine +BABEL_OP3_403_14929_20141029_222741_outLine +BABEL_OP3_403_15281_20150307_201454_inLine +BABEL_OP3_403_15281_20150307_201454_outLine +BABEL_OP3_403_15322_20150319_184826_inLine +BABEL_OP3_403_15322_20150319_184826_outLine +BABEL_OP3_403_15702_20141202_010728_inLine +BABEL_OP3_403_15702_20141202_010728_outLine +BABEL_OP3_403_16749_20141221_021003_inLine +BABEL_OP3_403_16749_20141221_021003_outLine +BABEL_OP3_403_16800_20141216_181241_inLine +BABEL_OP3_403_16800_20141216_181241_outLine +BABEL_OP3_403_16839_20141219_222837_inLine +BABEL_OP3_403_16839_20141219_222837_outLine +BABEL_OP3_403_16924_20141202_004432_inLine +BABEL_OP3_403_16924_20141202_004432_outLine +BABEL_OP3_403_17496_20141202_004936_inLine +BABEL_OP3_403_17496_20141202_004936_outLine +BABEL_OP3_403_18924_20141117_193058_inLine +BABEL_OP3_403_18924_20141117_193058_outLine +BABEL_OP3_403_18939_20141007_221546_inLine +BABEL_OP3_403_18939_20141007_221546_outLine +BABEL_OP3_403_19688_20150205_181600_inLine +BABEL_OP3_403_19688_20150205_181600_outLine +BABEL_OP3_403_19703_20141024_194336_inLine +BABEL_OP3_403_19703_20141024_194336_outLine +BABEL_OP3_403_20133_20141001_221247_inLine +BABEL_OP3_403_20133_20141001_221247_outLine +BABEL_OP3_403_20916_20141003_230543_inLine +BABEL_OP3_403_20916_20141003_230543_outLine +BABEL_OP3_403_21004_20141210_214422_inLine +BABEL_OP3_403_21004_20141210_214422_outLine +BABEL_OP3_403_21206_20141004_231905_inLine +BABEL_OP3_403_21206_20141004_231905_outLine +BABEL_OP3_403_21327_20141207_194922_inLine +BABEL_OP3_403_21327_20141207_194922_outLine +BABEL_OP3_403_21892_20141216_205644_inLine +BABEL_OP3_403_21892_20141216_205644_outLine +BABEL_OP3_403_22643_20150131_005325_inLine +BABEL_OP3_403_22643_20150131_005325_outLine +BABEL_OP3_403_22918_20150305_185811_inLine +BABEL_OP3_403_22918_20150305_185811_outLine +BABEL_OP3_403_22965_20141005_232023_inLine +BABEL_OP3_403_22965_20141005_232023_outLine +BABEL_OP3_403_23006_20141024_182721_inLine +BABEL_OP3_403_23006_20141024_182721_outLine +BABEL_OP3_403_23046_20141023_002436_inLine +BABEL_OP3_403_23046_20141023_002436_outLine +BABEL_OP3_403_24017_20141218_235010_inLine +BABEL_OP3_403_24017_20141218_235010_outLine +BABEL_OP3_403_24290_20150319_170027_inLine +BABEL_OP3_403_24290_20150319_170027_outLine +BABEL_OP3_403_25015_20150312_185754_inLine +BABEL_OP3_403_25015_20150312_185754_outLine +BABEL_OP3_403_25242_20150129_211027_inLine +BABEL_OP3_403_25242_20150129_211027_outLine +BABEL_OP3_403_25767_20141007_010749_inLine +BABEL_OP3_403_25767_20141007_010749_outLine +BABEL_OP3_403_26072_20150107_183553_inLine +BABEL_OP3_403_26072_20150107_183553_outLine +BABEL_OP3_403_27125_20141005_201825_inLine +BABEL_OP3_403_27125_20141005_201825_outLine +BABEL_OP3_403_27367_20150201_011720_inLine +BABEL_OP3_403_27367_20150201_011720_outLine +BABEL_OP3_403_28522_20141129_232715_inLine +BABEL_OP3_403_28522_20141129_232715_outLine +BABEL_OP3_403_28814_20141220_183346_inLine +BABEL_OP3_403_28814_20141220_183346_outLine +BABEL_OP3_403_31346_20141217_195511_inLine +BABEL_OP3_403_31346_20141217_195511_outLine +BABEL_OP3_403_31919_20150306_024725_inLine +BABEL_OP3_403_31919_20150306_024725_outLine +BABEL_OP3_403_32301_20141202_224639_inLine +BABEL_OP3_403_32301_20141202_224639_outLine +BABEL_OP3_403_32328_20141218_000510_inLine +BABEL_OP3_403_32328_20141218_000510_outLine +BABEL_OP3_403_33913_20141211_183339_inLine +BABEL_OP3_403_33913_20141211_183339_outLine +BABEL_OP3_403_34197_20141117_004710_inLine +BABEL_OP3_403_34197_20141117_004710_outLine +BABEL_OP3_403_34477_20141023_231734_inLine +BABEL_OP3_403_34477_20141023_231734_outLine +BABEL_OP3_403_35008_20141204_201241_inLine +BABEL_OP3_403_35008_20141204_201241_outLine +BABEL_OP3_403_35139_20141004_205621_inLine +BABEL_OP3_403_35139_20141004_205621_outLine +BABEL_OP3_403_35143_20141210_230830_inLine +BABEL_OP3_403_35143_20141210_230830_outLine +BABEL_OP3_403_35467_20141004_175833_inLine +BABEL_OP3_403_35467_20141004_175833_outLine +BABEL_OP3_403_35583_20150108_024439_inLine +BABEL_OP3_403_35583_20150108_024439_outLine +BABEL_OP3_403_36293_20141015_192540_inLine +BABEL_OP3_403_36293_20141015_192540_outLine +BABEL_OP3_403_37228_20150108_000217_inLine +BABEL_OP3_403_37228_20150108_000217_outLine +BABEL_OP3_403_37682_20141028_002656_inLine +BABEL_OP3_403_37682_20141028_002656_outLine +BABEL_OP3_403_37853_20150303_020840_inLine +BABEL_OP3_403_37853_20150303_020840_outLine +BABEL_OP3_403_38588_20141026_220103_inLine +BABEL_OP3_403_38588_20141026_220103_outLine +BABEL_OP3_403_38664_20141028_005703_inLine +BABEL_OP3_403_38664_20141028_005703_outLine +BABEL_OP3_403_38689_20141204_215950_inLine +BABEL_OP3_403_38689_20141204_215950_outLine +BABEL_OP3_403_38741_20141005_205401_inLine +BABEL_OP3_403_38741_20141005_205401_outLine +BABEL_OP3_403_39099_20150306_193032_inLine +BABEL_OP3_403_39099_20150306_193032_outLine +BABEL_OP3_403_39307_20141014_234344_inLine +BABEL_OP3_403_39307_20141014_234344_outLine +BABEL_OP3_403_39555_20141217_205213_inLine +BABEL_OP3_403_39555_20141217_205213_outLine +BABEL_OP3_403_41442_20141216_224519_inLine +BABEL_OP3_403_41442_20141216_224519_outLine +BABEL_OP3_403_41958_20141026_202739_inLine +BABEL_OP3_403_41958_20141026_202739_outLine +BABEL_OP3_403_42434_20141026_001223_inLine +BABEL_OP3_403_42434_20141026_001223_outLine +BABEL_OP3_403_42771_20141104_215437_inLine +BABEL_OP3_403_42771_20141104_215437_outLine +BABEL_OP3_403_43784_20141005_193431_inLine +BABEL_OP3_403_43784_20141005_193431_outLine +BABEL_OP3_403_43788_20141202_235051_inLine +BABEL_OP3_403_43788_20141202_235051_outLine +BABEL_OP3_403_44309_20150305_200025_inLine +BABEL_OP3_403_44309_20150305_200025_outLine +BABEL_OP3_403_44478_20150307_223313_inLine +BABEL_OP3_403_44478_20150307_223313_outLine +BABEL_OP3_403_45486_20150130_235157_inLine +BABEL_OP3_403_45486_20150130_235157_outLine +BABEL_OP3_403_45536_20141219_005329_inLine +BABEL_OP3_403_45536_20141219_005329_outLine +BABEL_OP3_403_46008_20150307_190844_inLine +BABEL_OP3_403_46008_20150307_190844_outLine +BABEL_OP3_403_46041_20141217_222544_inLine +BABEL_OP3_403_46041_20141217_222544_outLine +BABEL_OP3_403_46310_20141012_204940_inLine +BABEL_OP3_403_46310_20141012_204940_outLine +BABEL_OP3_403_46757_20141202_212733_inLine +BABEL_OP3_403_46757_20141202_212733_outLine +BABEL_OP3_403_47283_20141005_204650_inLine +BABEL_OP3_403_47283_20141005_204650_outLine +BABEL_OP3_403_47487_20141025_235747_inLine +BABEL_OP3_403_47487_20141025_235747_outLine +BABEL_OP3_403_47866_20150317_213617_inLine +BABEL_OP3_403_47866_20150317_213617_outLine +BABEL_OP3_403_47878_20141118_193135_inLine +BABEL_OP3_403_47878_20141118_193135_outLine +BABEL_OP3_403_48243_20141004_221542_inLine +BABEL_OP3_403_48243_20141004_221542_outLine +BABEL_OP3_403_48610_20141007_225901_inLine +BABEL_OP3_403_48610_20141007_225901_outLine +BABEL_OP3_403_48844_20141007_004947_inLine +BABEL_OP3_403_48844_20141007_004947_outLine +BABEL_OP3_403_48844_20141007_011027_inLine +BABEL_OP3_403_48844_20141007_011027_outLine +BABEL_OP3_403_49027_20150307_230828_inLine +BABEL_OP3_403_49027_20150307_230828_outLine +BABEL_OP3_403_49630_20141205_233804_inLine +BABEL_OP3_403_49630_20141205_233804_outLine +BABEL_OP3_403_49768_20141026_000059_inLine +BABEL_OP3_403_49768_20141026_000059_outLine +BABEL_OP3_403_49907_20141005_215057_inLine +BABEL_OP3_403_49907_20141005_215057_outLine +BABEL_OP3_403_50427_20141116_233807_inLine +BABEL_OP3_403_50427_20141116_233807_outLine +BABEL_OP3_403_50549_20150304_014353_inLine +BABEL_OP3_403_50549_20150304_014353_outLine +BABEL_OP3_403_50779_20141118_221929_inLine +BABEL_OP3_403_50779_20141118_221929_outLine +BABEL_OP3_403_50779_20141118_230132_inLine +BABEL_OP3_403_50779_20141118_230132_outLine +BABEL_OP3_403_52490_20141016_230923_inLine +BABEL_OP3_403_52490_20141016_230923_outLine +BABEL_OP3_403_52717_20141008_003843_inLine +BABEL_OP3_403_52717_20141008_003843_outLine +BABEL_OP3_403_53063_20141207_192558_inLine +BABEL_OP3_403_53063_20141207_192558_outLine +BABEL_OP3_403_53063_20141207_194007_inLine +BABEL_OP3_403_53063_20141207_194007_outLine +BABEL_OP3_403_54104_20141006_230139_inLine +BABEL_OP3_403_54104_20141006_230139_outLine +BABEL_OP3_403_54104_20141006_230643_inLine +BABEL_OP3_403_54104_20141006_230643_outLine +BABEL_OP3_403_54162_20141103_190601_inLine +BABEL_OP3_403_54162_20141103_190601_outLine +BABEL_OP3_403_54477_20141216_200349_inLine +BABEL_OP3_403_54477_20141216_200349_outLine +BABEL_OP3_403_54477_20141216_213534_inLine +BABEL_OP3_403_54477_20141216_213534_outLine +BABEL_OP3_403_54530_20141217_220934_inLine +BABEL_OP3_403_54530_20141217_220934_outLine +BABEL_OP3_403_54594_20150204_003149_inLine +BABEL_OP3_403_54594_20150204_003149_outLine +BABEL_OP3_403_55259_20141025_175845_inLine +BABEL_OP3_403_55259_20141025_175845_outLine +BABEL_OP3_403_55968_20141004_005950_inLine +BABEL_OP3_403_55968_20141004_005950_outLine +BABEL_OP3_403_56326_20150129_020103_inLine +BABEL_OP3_403_56326_20150129_020103_outLine +BABEL_OP3_403_57093_20141103_221842_inLine +BABEL_OP3_403_57093_20141103_221842_outLine +BABEL_OP3_403_57141_20141215_224302_inLine +BABEL_OP3_403_57141_20141215_224302_outLine +BABEL_OP3_403_57529_20141207_002135_inLine +BABEL_OP3_403_57529_20141207_002135_outLine +BABEL_OP3_403_59262_20141216_193024_inLine +BABEL_OP3_403_59262_20141216_193024_outLine +BABEL_OP3_403_60115_20141129_235248_inLine +BABEL_OP3_403_60115_20141129_235248_outLine +BABEL_OP3_403_60650_20150131_013236_inLine +BABEL_OP3_403_60650_20150131_013236_outLine +BABEL_OP3_403_61678_20141003_231023_inLine +BABEL_OP3_403_61678_20141003_231023_outLine +BABEL_OP3_403_61731_20141005_201612_inLine +BABEL_OP3_403_61731_20141005_201612_outLine +BABEL_OP3_403_61971_20150307_004145_inLine +BABEL_OP3_403_61971_20150307_004145_outLine +BABEL_OP3_403_62014_20141127_180004_inLine +BABEL_OP3_403_62014_20141127_180004_outLine +BABEL_OP3_403_62734_20141025_192117_inLine +BABEL_OP3_403_62734_20141025_192117_outLine +BABEL_OP3_403_62810_20141016_191619_inLine +BABEL_OP3_403_62810_20141016_191619_outLine +BABEL_OP3_403_63670_20141215_221926_inLine +BABEL_OP3_403_63670_20141215_221926_outLine +BABEL_OP3_403_63787_20141006_214400_inLine +BABEL_OP3_403_63787_20141006_214400_outLine +BABEL_OP3_403_63906_20150305_205105_inLine +BABEL_OP3_403_63906_20150305_205105_outLine +BABEL_OP3_403_65367_20150108_004325_inLine +BABEL_OP3_403_65367_20150108_004325_outLine +BABEL_OP3_403_66001_20141007_230508_inLine +BABEL_OP3_403_66001_20141007_230508_outLine +BABEL_OP3_403_66822_20141029_224921_inLine +BABEL_OP3_403_66822_20141029_224921_outLine +BABEL_OP3_403_66916_20141015_215414_inLine +BABEL_OP3_403_66916_20141015_215414_outLine +BABEL_OP3_403_67622_20141014_193846_inLine +BABEL_OP3_403_67622_20141014_193846_outLine +BABEL_OP3_403_67659_20141023_013756_inLine +BABEL_OP3_403_67659_20141023_013756_outLine +BABEL_OP3_403_68384_20141216_000507_inLine +BABEL_OP3_403_68384_20141216_000507_outLine +BABEL_OP3_403_68748_20141130_014650_inLine +BABEL_OP3_403_68748_20141130_014650_outLine +BABEL_OP3_403_68854_20150306_195508_inLine +BABEL_OP3_403_68854_20150306_195508_outLine +BABEL_OP3_403_69096_20150309_190140_inLine +BABEL_OP3_403_69096_20150309_190140_outLine +BABEL_OP3_403_70121_20141026_225432_inLine +BABEL_OP3_403_70121_20141026_225432_outLine +BABEL_OP3_403_70216_20150128_234110_inLine +BABEL_OP3_403_70216_20150128_234110_outLine +BABEL_OP3_403_70257_20150204_032020_inLine +BABEL_OP3_403_70257_20150204_032020_outLine +BABEL_OP3_403_70343_20141205_225856_inLine +BABEL_OP3_403_70343_20141205_225856_outLine +BABEL_OP3_403_71047_20150106_190413_inLine +BABEL_OP3_403_71047_20150106_190413_outLine +BABEL_OP3_403_72040_20141006_004959_inLine +BABEL_OP3_403_72040_20141006_004959_outLine +BABEL_OP3_403_73430_20141205_233006_inLine +BABEL_OP3_403_73430_20141205_233006_outLine +BABEL_OP3_403_73591_20140930_234521_inLine +BABEL_OP3_403_73591_20140930_234521_outLine +BABEL_OP3_403_74728_20150312_182026_inLine +BABEL_OP3_403_74728_20150312_182026_outLine +BABEL_OP3_403_75064_20141022_225629_inLine +BABEL_OP3_403_75064_20141022_225629_outLine +BABEL_OP3_403_76499_20141103_232220_inLine +BABEL_OP3_403_76499_20141103_232220_outLine +BABEL_OP3_403_77427_20141027_223134_inLine +BABEL_OP3_403_77427_20141027_223134_outLine +BABEL_OP3_403_77990_20141004_201020_inLine +BABEL_OP3_403_77990_20141004_201020_outLine +BABEL_OP3_403_78116_20141208_213333_inLine +BABEL_OP3_403_78116_20141208_213333_outLine +BABEL_OP3_403_78116_20141208_214155_inLine +BABEL_OP3_403_78116_20141208_214155_outLine +BABEL_OP3_403_78254_20141024_234037_inLine +BABEL_OP3_403_78254_20141024_234037_outLine +BABEL_OP3_403_78604_20141006_193457_inLine +BABEL_OP3_403_78604_20141006_193457_outLine +BABEL_OP3_403_78833_20150205_204459_inLine +BABEL_OP3_403_78833_20150205_204459_outLine +BABEL_OP3_403_80439_20141023_195331_inLine +BABEL_OP3_403_80439_20141023_195331_outLine +BABEL_OP3_403_80781_20141026_214157_inLine +BABEL_OP3_403_80781_20141026_214157_outLine +BABEL_OP3_403_81149_20150313_000213_inLine +BABEL_OP3_403_81149_20150313_000213_outLine +BABEL_OP3_403_81213_20141004_213211_inLine +BABEL_OP3_403_81213_20141004_213211_outLine +BABEL_OP3_403_82425_20141007_231028_inLine +BABEL_OP3_403_82425_20141007_231028_outLine +BABEL_OP3_403_83238_20141107_233257_inLine +BABEL_OP3_403_83238_20141107_233257_outLine +BABEL_OP3_403_83455_20141103_225146_inLine +BABEL_OP3_403_83455_20141103_225146_outLine +BABEL_OP3_403_83651_20141005_194737_inLine +BABEL_OP3_403_83651_20141005_194737_outLine +BABEL_OP3_403_84194_20150204_213858_inLine +BABEL_OP3_403_84194_20150204_213858_outLine +BABEL_OP3_403_84458_20141208_005012_inLine +BABEL_OP3_403_84458_20141208_005012_outLine +BABEL_OP3_403_84547_20141013_223556_inLine +BABEL_OP3_403_84547_20141013_223556_outLine +BABEL_OP3_403_84737_20150303_195506_inLine +BABEL_OP3_403_84737_20150303_195506_outLine +BABEL_OP3_403_85647_20141103_192225_inLine +BABEL_OP3_403_85647_20141103_192225_outLine +BABEL_OP3_403_86845_20150201_015753_inLine +BABEL_OP3_403_86845_20150201_015753_outLine +BABEL_OP3_403_87889_20150107_001827_inLine +BABEL_OP3_403_87889_20150107_001827_outLine +BABEL_OP3_403_88260_20141029_205951_inLine +BABEL_OP3_403_88260_20141029_205951_outLine +BABEL_OP3_403_88812_20150307_181013_inLine +BABEL_OP3_403_88812_20150307_181013_outLine +BABEL_OP3_403_89059_20141220_191342_inLine +BABEL_OP3_403_89059_20141220_191342_outLine +BABEL_OP3_403_89358_20141030_231758_inLine +BABEL_OP3_403_89358_20141030_231758_outLine +BABEL_OP3_403_90709_20141007_234900_inLine +BABEL_OP3_403_90709_20141007_234900_outLine +BABEL_OP3_403_90739_20141028_224009_inLine +BABEL_OP3_403_90739_20141028_224009_outLine +BABEL_OP3_403_92527_20141026_192704_inLine +BABEL_OP3_403_92527_20141026_192704_outLine +BABEL_OP3_403_92740_20141130_011740_inLine +BABEL_OP3_403_92740_20141130_011740_outLine +BABEL_OP3_403_94409_20141028_214356_inLine +BABEL_OP3_403_94409_20141028_214356_outLine +BABEL_OP3_403_94449_20150309_193606_inLine +BABEL_OP3_403_94449_20150309_193606_outLine +BABEL_OP3_403_94487_20150312_014136_inLine +BABEL_OP3_403_94487_20150312_014136_outLine +BABEL_OP3_403_95583_20141013_184937_inLine +BABEL_OP3_403_95583_20141013_184937_outLine +BABEL_OP3_403_95670_20141016_214958_inLine +BABEL_OP3_403_95670_20141016_214958_outLine +BABEL_OP3_403_96525_20141217_223842_inLine +BABEL_OP3_403_96525_20141217_223842_outLine +BABEL_OP3_403_96910_20141024_195822_inLine +BABEL_OP3_403_96910_20141024_195822_outLine +BABEL_OP3_403_97376_20141206_215930_inLine +BABEL_OP3_403_97376_20141206_215930_outLine +BABEL_OP3_403_97772_20141003_213919_inLine +BABEL_OP3_403_97772_20141003_213919_outLine +BABEL_OP3_403_98311_20141005_195843_inLine +BABEL_OP3_403_98311_20141005_195843_outLine +BABEL_OP3_403_99202_20141108_002737_inLine +BABEL_OP3_403_99202_20141108_002737_outLine +BABEL_OP3_403_99955_20150107_213836_inLine +BABEL_OP3_403_99955_20150107_213836_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/training.list b/egs/babel/s5d/conf/lists/403-dholuo/training.list new file mode 100644 index 00000000000..3b32ed92b92 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/training.list @@ -0,0 +1,502 @@ +BABEL_OP3_403_10313_20150130_193605_inLine +BABEL_OP3_403_10313_20150130_193605_outLine +BABEL_OP3_403_10469_20150130_211522_inLine +BABEL_OP3_403_10469_20150130_211522_outLine +BABEL_OP3_403_10966_20141025_191612_inLine +BABEL_OP3_403_10966_20141025_191612_outLine +BABEL_OP3_403_11663_20141205_201130_inLine +BABEL_OP3_403_11663_20141205_201130_outLine +BABEL_OP3_403_11663_20141205_204332_inLine +BABEL_OP3_403_11663_20141205_204332_outLine +BABEL_OP3_403_11681_20141010_203514_inLine +BABEL_OP3_403_11681_20141010_203514_outLine +BABEL_OP3_403_11797_20141013_012556_inLine +BABEL_OP3_403_11797_20141013_012556_outLine +BABEL_OP3_403_12606_20150314_170830_inLine +BABEL_OP3_403_12606_20150314_170830_outLine +BABEL_OP3_403_12609_20150313_183914_inLine +BABEL_OP3_403_12609_20150313_183914_outLine +BABEL_OP3_403_13324_20141004_204835_inLine +BABEL_OP3_403_13324_20141004_204835_outLine +BABEL_OP3_403_13483_20141205_201059_inLine +BABEL_OP3_403_13483_20141205_201059_outLine +BABEL_OP3_403_13490_20141103_182258_inLine +BABEL_OP3_403_13490_20141103_182258_outLine +BABEL_OP3_403_14807_20141117_201842_inLine +BABEL_OP3_403_14807_20141117_201842_outLine +BABEL_OP3_403_14814_20141023_004823_inLine +BABEL_OP3_403_14814_20141023_004823_outLine +BABEL_OP3_403_14929_20141029_222741_inLine +BABEL_OP3_403_14929_20141029_222741_outLine +BABEL_OP3_403_15281_20150307_201454_inLine +BABEL_OP3_403_15281_20150307_201454_outLine +BABEL_OP3_403_15322_20150319_184826_inLine +BABEL_OP3_403_15322_20150319_184826_outLine +BABEL_OP3_403_15702_20141202_010728_inLine +BABEL_OP3_403_15702_20141202_010728_outLine +BABEL_OP3_403_16475_20141027_225406_inLine +BABEL_OP3_403_16475_20141027_225406_outLine +BABEL_OP3_403_16749_20141221_021003_inLine +BABEL_OP3_403_16749_20141221_021003_outLine +BABEL_OP3_403_16800_20141216_181241_inLine +BABEL_OP3_403_16800_20141216_181241_outLine +BABEL_OP3_403_16839_20141219_222837_inLine +BABEL_OP3_403_16839_20141219_222837_outLine +BABEL_OP3_403_16924_20141202_004432_inLine +BABEL_OP3_403_16924_20141202_004432_outLine +BABEL_OP3_403_16938_20141117_002438_inLine +BABEL_OP3_403_16938_20141117_002438_outLine +BABEL_OP3_403_17280_20141027_223651_inLine +BABEL_OP3_403_17280_20141027_223651_outLine +BABEL_OP3_403_17496_20141202_004936_inLine +BABEL_OP3_403_17496_20141202_004936_outLine +BABEL_OP3_403_18924_20141117_193058_inLine +BABEL_OP3_403_18924_20141117_193058_outLine +BABEL_OP3_403_18939_20141007_221546_inLine +BABEL_OP3_403_18939_20141007_221546_outLine +BABEL_OP3_403_19688_20150205_181600_inLine +BABEL_OP3_403_19688_20150205_181600_outLine +BABEL_OP3_403_19703_20141024_194336_inLine +BABEL_OP3_403_19703_20141024_194336_outLine +BABEL_OP3_403_19722_20141013_214859_inLine +BABEL_OP3_403_19722_20141013_214859_outLine +BABEL_OP3_403_19749_20150211_210545_inLine +BABEL_OP3_403_19749_20150211_210545_outLine +BABEL_OP3_403_20133_20141001_221247_inLine +BABEL_OP3_403_20133_20141001_221247_outLine +BABEL_OP3_403_20916_20141003_230543_inLine +BABEL_OP3_403_20916_20141003_230543_outLine +BABEL_OP3_403_21004_20141210_214422_inLine +BABEL_OP3_403_21004_20141210_214422_outLine +BABEL_OP3_403_21206_20141004_231905_inLine +BABEL_OP3_403_21206_20141004_231905_outLine +BABEL_OP3_403_21327_20141207_194922_inLine +BABEL_OP3_403_21327_20141207_194922_outLine +BABEL_OP3_403_21892_20141216_205644_inLine +BABEL_OP3_403_21892_20141216_205644_outLine +BABEL_OP3_403_22321_20141012_234503_inLine +BABEL_OP3_403_22321_20141012_234503_outLine +BABEL_OP3_403_22643_20150131_005325_inLine +BABEL_OP3_403_22643_20150131_005325_outLine +BABEL_OP3_403_22918_20150305_185811_inLine +BABEL_OP3_403_22918_20150305_185811_outLine +BABEL_OP3_403_22965_20141005_232023_inLine +BABEL_OP3_403_22965_20141005_232023_outLine +BABEL_OP3_403_23006_20141024_182721_inLine +BABEL_OP3_403_23006_20141024_182721_outLine +BABEL_OP3_403_23046_20141023_002436_inLine +BABEL_OP3_403_23046_20141023_002436_outLine +BABEL_OP3_403_23893_20150311_211416_inLine +BABEL_OP3_403_23893_20150311_211416_outLine +BABEL_OP3_403_24017_20141218_235010_inLine +BABEL_OP3_403_24017_20141218_235010_outLine +BABEL_OP3_403_24290_20150319_170027_inLine +BABEL_OP3_403_24290_20150319_170027_outLine +BABEL_OP3_403_24589_20141023_173937_inLine +BABEL_OP3_403_24589_20141023_173937_outLine +BABEL_OP3_403_25015_20150312_185754_inLine +BABEL_OP3_403_25015_20150312_185754_outLine +BABEL_OP3_403_25242_20150129_211027_inLine +BABEL_OP3_403_25242_20150129_211027_outLine +BABEL_OP3_403_25767_20141007_010749_inLine +BABEL_OP3_403_25767_20141007_010749_outLine +BABEL_OP3_403_25961_20141016_201537_inLine +BABEL_OP3_403_25961_20141016_201537_outLine +BABEL_OP3_403_26072_20150107_183553_inLine +BABEL_OP3_403_26072_20150107_183553_outLine +BABEL_OP3_403_27125_20141005_201825_inLine +BABEL_OP3_403_27125_20141005_201825_outLine +BABEL_OP3_403_27367_20150201_011720_inLine +BABEL_OP3_403_27367_20150201_011720_outLine +BABEL_OP3_403_28190_20141218_232404_inLine +BABEL_OP3_403_28190_20141218_232404_outLine +BABEL_OP3_403_28522_20141129_232715_inLine +BABEL_OP3_403_28522_20141129_232715_outLine +BABEL_OP3_403_28775_20141005_002735_inLine +BABEL_OP3_403_28775_20141005_002735_outLine +BABEL_OP3_403_28814_20141220_183346_inLine +BABEL_OP3_403_28814_20141220_183346_outLine +BABEL_OP3_403_28945_20141006_202723_inLine +BABEL_OP3_403_28945_20141006_202723_outLine +BABEL_OP3_403_29168_20141013_195745_inLine +BABEL_OP3_403_29168_20141013_195745_outLine +BABEL_OP3_403_29323_20150303_223419_inLine +BABEL_OP3_403_29323_20150303_223419_outLine +BABEL_OP3_403_31109_20141107_213704_inLine +BABEL_OP3_403_31109_20141107_213704_outLine +BABEL_OP3_403_31346_20141217_195511_inLine +BABEL_OP3_403_31346_20141217_195511_outLine +BABEL_OP3_403_31490_20141006_210241_inLine +BABEL_OP3_403_31490_20141006_210241_outLine +BABEL_OP3_403_31624_20141014_211203_inLine +BABEL_OP3_403_31624_20141014_211203_outLine +BABEL_OP3_403_31919_20150306_024725_inLine +BABEL_OP3_403_31919_20150306_024725_outLine +BABEL_OP3_403_32301_20141202_224639_inLine +BABEL_OP3_403_32301_20141202_224639_outLine +BABEL_OP3_403_32328_20141218_000510_inLine +BABEL_OP3_403_32328_20141218_000510_outLine +BABEL_OP3_403_32861_20150107_160647_inLine +BABEL_OP3_403_32861_20150107_160647_outLine +BABEL_OP3_403_33840_20141219_200146_inLine +BABEL_OP3_403_33840_20141219_200146_outLine +BABEL_OP3_403_33913_20141211_183339_inLine +BABEL_OP3_403_33913_20141211_183339_outLine +BABEL_OP3_403_34197_20141117_004710_inLine +BABEL_OP3_403_34197_20141117_004710_outLine +BABEL_OP3_403_34477_20141023_231734_inLine +BABEL_OP3_403_34477_20141023_231734_outLine +BABEL_OP3_403_35008_20141204_201241_inLine +BABEL_OP3_403_35008_20141204_201241_outLine +BABEL_OP3_403_35139_20141004_205621_inLine +BABEL_OP3_403_35139_20141004_205621_outLine +BABEL_OP3_403_35143_20141210_230830_inLine +BABEL_OP3_403_35143_20141210_230830_outLine +BABEL_OP3_403_35467_20141004_175833_inLine +BABEL_OP3_403_35467_20141004_175833_outLine +BABEL_OP3_403_35583_20150108_024439_inLine +BABEL_OP3_403_35583_20150108_024439_outLine +BABEL_OP3_403_36293_20141015_192540_inLine +BABEL_OP3_403_36293_20141015_192540_outLine +BABEL_OP3_403_36632_20150206_041325_inLine +BABEL_OP3_403_36632_20150206_041325_outLine +BABEL_OP3_403_36990_20141030_231441_inLine +BABEL_OP3_403_36990_20141030_231441_outLine +BABEL_OP3_403_37228_20150108_000217_inLine +BABEL_OP3_403_37228_20150108_000217_outLine +BABEL_OP3_403_37682_20141028_002656_inLine +BABEL_OP3_403_37682_20141028_002656_outLine +BABEL_OP3_403_37853_20150303_020840_inLine +BABEL_OP3_403_37853_20150303_020840_outLine +BABEL_OP3_403_38588_20141026_220103_inLine +BABEL_OP3_403_38588_20141026_220103_outLine +BABEL_OP3_403_38664_20141028_005703_inLine +BABEL_OP3_403_38664_20141028_005703_outLine +BABEL_OP3_403_38689_20141204_215950_inLine +BABEL_OP3_403_38689_20141204_215950_outLine +BABEL_OP3_403_38741_20141005_205401_inLine +BABEL_OP3_403_38741_20141005_205401_outLine +BABEL_OP3_403_38878_20141121_184540_inLine +BABEL_OP3_403_38878_20141121_184540_outLine +BABEL_OP3_403_39099_20150306_193032_inLine +BABEL_OP3_403_39099_20150306_193032_outLine +BABEL_OP3_403_39307_20141014_234344_inLine +BABEL_OP3_403_39307_20141014_234344_outLine +BABEL_OP3_403_39555_20141217_205213_inLine +BABEL_OP3_403_39555_20141217_205213_outLine +BABEL_OP3_403_41442_20141216_224519_inLine +BABEL_OP3_403_41442_20141216_224519_outLine +BABEL_OP3_403_41958_20141026_202739_inLine +BABEL_OP3_403_41958_20141026_202739_outLine +BABEL_OP3_403_42434_20141026_001223_inLine +BABEL_OP3_403_42434_20141026_001223_outLine +BABEL_OP3_403_42718_20150306_202240_inLine +BABEL_OP3_403_42718_20150306_202240_outLine +BABEL_OP3_403_42771_20141104_215437_inLine +BABEL_OP3_403_42771_20141104_215437_outLine +BABEL_OP3_403_43368_20141026_000458_inLine +BABEL_OP3_403_43368_20141026_000458_outLine +BABEL_OP3_403_43784_20141005_193431_inLine +BABEL_OP3_403_43784_20141005_193431_outLine +BABEL_OP3_403_43788_20141202_235051_inLine +BABEL_OP3_403_43788_20141202_235051_outLine +BABEL_OP3_403_44309_20150305_200025_inLine +BABEL_OP3_403_44309_20150305_200025_outLine +BABEL_OP3_403_44478_20150307_223313_inLine +BABEL_OP3_403_44478_20150307_223313_outLine +BABEL_OP3_403_44868_20141129_220211_inLine +BABEL_OP3_403_44868_20141129_220211_outLine +BABEL_OP3_403_44961_20141006_233622_inLine +BABEL_OP3_403_44961_20141006_233622_outLine +BABEL_OP3_403_44961_20141006_235203_inLine +BABEL_OP3_403_44961_20141006_235203_outLine +BABEL_OP3_403_45486_20150130_235157_inLine +BABEL_OP3_403_45486_20150130_235157_outLine +BABEL_OP3_403_45536_20141219_005329_inLine +BABEL_OP3_403_45536_20141219_005329_outLine +BABEL_OP3_403_46008_20150307_190844_inLine +BABEL_OP3_403_46008_20150307_190844_outLine +BABEL_OP3_403_46041_20141217_222544_inLine +BABEL_OP3_403_46041_20141217_222544_outLine +BABEL_OP3_403_46310_20141012_204940_inLine +BABEL_OP3_403_46310_20141012_204940_outLine +BABEL_OP3_403_46757_20141202_212733_inLine +BABEL_OP3_403_46757_20141202_212733_outLine +BABEL_OP3_403_47215_20141007_230222_inLine +BABEL_OP3_403_47215_20141007_230222_outLine +BABEL_OP3_403_47283_20141005_204650_inLine +BABEL_OP3_403_47283_20141005_204650_outLine +BABEL_OP3_403_47487_20141025_235747_inLine +BABEL_OP3_403_47487_20141025_235747_outLine +BABEL_OP3_403_47866_20150317_213617_inLine +BABEL_OP3_403_47866_20150317_213617_outLine +BABEL_OP3_403_47878_20141118_193135_inLine +BABEL_OP3_403_47878_20141118_193135_outLine +BABEL_OP3_403_48243_20141004_221542_inLine +BABEL_OP3_403_48243_20141004_221542_outLine +BABEL_OP3_403_48610_20141007_225901_inLine +BABEL_OP3_403_48610_20141007_225901_outLine +BABEL_OP3_403_48663_20150306_181741_inLine +BABEL_OP3_403_48663_20150306_181741_outLine +BABEL_OP3_403_48844_20141007_004947_inLine +BABEL_OP3_403_48844_20141007_004947_outLine +BABEL_OP3_403_48844_20141007_011027_inLine +BABEL_OP3_403_48844_20141007_011027_outLine +BABEL_OP3_403_49027_20150307_230828_inLine +BABEL_OP3_403_49027_20150307_230828_outLine +BABEL_OP3_403_49630_20141205_233804_inLine +BABEL_OP3_403_49630_20141205_233804_outLine +BABEL_OP3_403_49768_20141026_000059_inLine +BABEL_OP3_403_49768_20141026_000059_outLine +BABEL_OP3_403_49907_20141005_215057_inLine +BABEL_OP3_403_49907_20141005_215057_outLine +BABEL_OP3_403_50427_20141116_233807_inLine +BABEL_OP3_403_50427_20141116_233807_outLine +BABEL_OP3_403_50549_20150304_014353_inLine +BABEL_OP3_403_50549_20150304_014353_outLine +BABEL_OP3_403_50779_20141118_221929_inLine +BABEL_OP3_403_50779_20141118_221929_outLine +BABEL_OP3_403_50779_20141118_230132_inLine +BABEL_OP3_403_50779_20141118_230132_outLine +BABEL_OP3_403_51955_20141004_212210_inLine +BABEL_OP3_403_51955_20141004_212210_outLine +BABEL_OP3_403_52490_20141016_230923_inLine +BABEL_OP3_403_52490_20141016_230923_outLine +BABEL_OP3_403_52717_20141008_003843_inLine +BABEL_OP3_403_52717_20141008_003843_outLine +BABEL_OP3_403_53063_20141207_192558_inLine +BABEL_OP3_403_53063_20141207_192558_outLine +BABEL_OP3_403_53063_20141207_194007_inLine +BABEL_OP3_403_53063_20141207_194007_outLine +BABEL_OP3_403_53842_20141031_193507_inLine +BABEL_OP3_403_53842_20141031_193507_outLine +BABEL_OP3_403_54104_20141006_230139_inLine +BABEL_OP3_403_54104_20141006_230139_outLine +BABEL_OP3_403_54104_20141006_230643_inLine +BABEL_OP3_403_54104_20141006_230643_outLine +BABEL_OP3_403_54162_20141103_190601_inLine +BABEL_OP3_403_54162_20141103_190601_outLine +BABEL_OP3_403_54477_20141216_200349_inLine +BABEL_OP3_403_54477_20141216_200349_outLine +BABEL_OP3_403_54477_20141216_213534_inLine +BABEL_OP3_403_54477_20141216_213534_outLine +BABEL_OP3_403_54530_20141217_220934_inLine +BABEL_OP3_403_54530_20141217_220934_outLine +BABEL_OP3_403_54594_20150204_003149_inLine +BABEL_OP3_403_54594_20150204_003149_outLine +BABEL_OP3_403_55259_20141025_175845_inLine +BABEL_OP3_403_55259_20141025_175845_outLine +BABEL_OP3_403_55950_20150312_174125_inLine +BABEL_OP3_403_55950_20150312_174125_outLine +BABEL_OP3_403_55968_20141004_005950_inLine +BABEL_OP3_403_55968_20141004_005950_outLine +BABEL_OP3_403_56198_20141005_222956_inLine +BABEL_OP3_403_56198_20141005_222956_outLine +BABEL_OP3_403_56326_20150129_020103_inLine +BABEL_OP3_403_56326_20150129_020103_outLine +BABEL_OP3_403_57093_20141103_221842_inLine +BABEL_OP3_403_57093_20141103_221842_outLine +BABEL_OP3_403_57141_20141215_224302_inLine +BABEL_OP3_403_57141_20141215_224302_outLine +BABEL_OP3_403_57529_20141207_002135_inLine +BABEL_OP3_403_57529_20141207_002135_outLine +BABEL_OP3_403_58047_20141118_184454_inLine +BABEL_OP3_403_58047_20141118_184454_outLine +BABEL_OP3_403_58585_20150106_172737_inLine +BABEL_OP3_403_58585_20150106_172737_outLine +BABEL_OP3_403_59262_20141216_193024_inLine +BABEL_OP3_403_59262_20141216_193024_outLine +BABEL_OP3_403_60115_20141129_235248_inLine +BABEL_OP3_403_60115_20141129_235248_outLine +BABEL_OP3_403_60310_20141217_205059_inLine +BABEL_OP3_403_60310_20141217_205059_outLine +BABEL_OP3_403_60418_20141129_235907_inLine +BABEL_OP3_403_60418_20141129_235907_outLine +BABEL_OP3_403_60650_20150131_013236_inLine +BABEL_OP3_403_60650_20150131_013236_outLine +BABEL_OP3_403_61348_20141103_230857_inLine +BABEL_OP3_403_61348_20141103_230857_outLine +BABEL_OP3_403_61678_20141003_231023_inLine +BABEL_OP3_403_61678_20141003_231023_outLine +BABEL_OP3_403_61731_20141005_201612_inLine +BABEL_OP3_403_61731_20141005_201612_outLine +BABEL_OP3_403_61971_20150307_004145_inLine +BABEL_OP3_403_61971_20150307_004145_outLine +BABEL_OP3_403_62014_20141127_180004_inLine +BABEL_OP3_403_62014_20141127_180004_outLine +BABEL_OP3_403_62734_20141025_192117_inLine +BABEL_OP3_403_62734_20141025_192117_outLine +BABEL_OP3_403_62810_20141016_191619_inLine +BABEL_OP3_403_62810_20141016_191619_outLine +BABEL_OP3_403_63670_20141215_221926_inLine +BABEL_OP3_403_63670_20141215_221926_outLine +BABEL_OP3_403_63787_20141006_214400_inLine +BABEL_OP3_403_63787_20141006_214400_outLine +BABEL_OP3_403_63906_20150305_205105_inLine +BABEL_OP3_403_63906_20150305_205105_outLine +BABEL_OP3_403_65367_20150108_004325_inLine +BABEL_OP3_403_65367_20150108_004325_outLine +BABEL_OP3_403_65640_20150314_163101_inLine +BABEL_OP3_403_65640_20150314_163101_outLine +BABEL_OP3_403_66001_20141007_230508_inLine +BABEL_OP3_403_66001_20141007_230508_outLine +BABEL_OP3_403_66822_20141029_224921_inLine +BABEL_OP3_403_66822_20141029_224921_outLine +BABEL_OP3_403_66916_20141015_215414_inLine +BABEL_OP3_403_66916_20141015_215414_outLine +BABEL_OP3_403_67622_20141014_193846_inLine +BABEL_OP3_403_67622_20141014_193846_outLine +BABEL_OP3_403_67659_20141023_013756_inLine +BABEL_OP3_403_67659_20141023_013756_outLine +BABEL_OP3_403_68384_20141216_000507_inLine +BABEL_OP3_403_68384_20141216_000507_outLine +BABEL_OP3_403_68748_20141130_014650_inLine +BABEL_OP3_403_68748_20141130_014650_outLine +BABEL_OP3_403_68854_20150306_195508_inLine +BABEL_OP3_403_68854_20150306_195508_outLine +BABEL_OP3_403_69096_20150309_190140_inLine +BABEL_OP3_403_69096_20150309_190140_outLine +BABEL_OP3_403_69107_20141106_000151_inLine +BABEL_OP3_403_69107_20141106_000151_outLine +BABEL_OP3_403_69746_20141220_191513_inLine +BABEL_OP3_403_69746_20141220_191513_outLine +BABEL_OP3_403_70121_20141026_225432_inLine +BABEL_OP3_403_70121_20141026_225432_outLine +BABEL_OP3_403_70216_20150128_234110_inLine +BABEL_OP3_403_70216_20150128_234110_outLine +BABEL_OP3_403_70257_20150204_032020_inLine +BABEL_OP3_403_70257_20150204_032020_outLine +BABEL_OP3_403_70343_20141205_225856_inLine +BABEL_OP3_403_70343_20141205_225856_outLine +BABEL_OP3_403_71047_20150106_190413_inLine +BABEL_OP3_403_71047_20150106_190413_outLine +BABEL_OP3_403_72007_20141205_002010_inLine +BABEL_OP3_403_72007_20141205_002010_outLine +BABEL_OP3_403_72040_20141006_004959_inLine +BABEL_OP3_403_72040_20141006_004959_outLine +BABEL_OP3_403_72110_20141210_212045_inLine +BABEL_OP3_403_72110_20141210_212045_outLine +BABEL_OP3_403_72844_20141004_005248_inLine +BABEL_OP3_403_72844_20141004_005248_outLine +BABEL_OP3_403_73430_20141205_233006_inLine +BABEL_OP3_403_73430_20141205_233006_outLine +BABEL_OP3_403_73591_20140930_234521_inLine +BABEL_OP3_403_73591_20140930_234521_outLine +BABEL_OP3_403_74728_20150312_182026_inLine +BABEL_OP3_403_74728_20150312_182026_outLine +BABEL_OP3_403_75064_20141022_225629_inLine +BABEL_OP3_403_75064_20141022_225629_outLine +BABEL_OP3_403_75223_20141016_194054_inLine +BABEL_OP3_403_75223_20141016_194054_outLine +BABEL_OP3_403_76499_20141103_232220_inLine +BABEL_OP3_403_76499_20141103_232220_outLine +BABEL_OP3_403_77427_20141027_223134_inLine +BABEL_OP3_403_77427_20141027_223134_outLine +BABEL_OP3_403_77974_20150312_200046_inLine +BABEL_OP3_403_77974_20150312_200046_outLine +BABEL_OP3_403_77990_20141004_201020_inLine +BABEL_OP3_403_77990_20141004_201020_outLine +BABEL_OP3_403_78116_20141208_213333_inLine +BABEL_OP3_403_78116_20141208_213333_outLine +BABEL_OP3_403_78116_20141208_214155_inLine +BABEL_OP3_403_78116_20141208_214155_outLine +BABEL_OP3_403_78254_20141024_234037_inLine +BABEL_OP3_403_78254_20141024_234037_outLine +BABEL_OP3_403_78360_20150107_231519_inLine +BABEL_OP3_403_78360_20150107_231519_outLine +BABEL_OP3_403_78544_20141201_192016_inLine +BABEL_OP3_403_78544_20141201_192016_outLine +BABEL_OP3_403_78604_20141006_193457_inLine +BABEL_OP3_403_78604_20141006_193457_outLine +BABEL_OP3_403_78833_20150205_204459_inLine +BABEL_OP3_403_78833_20150205_204459_outLine +BABEL_OP3_403_80439_20141023_195331_inLine +BABEL_OP3_403_80439_20141023_195331_outLine +BABEL_OP3_403_80781_20141026_214157_inLine +BABEL_OP3_403_80781_20141026_214157_outLine +BABEL_OP3_403_81149_20150313_000213_inLine +BABEL_OP3_403_81149_20150313_000213_outLine +BABEL_OP3_403_81213_20141004_213211_inLine +BABEL_OP3_403_81213_20141004_213211_outLine +BABEL_OP3_403_82391_20141206_001207_inLine +BABEL_OP3_403_82391_20141206_001207_outLine +BABEL_OP3_403_82425_20141007_231028_inLine +BABEL_OP3_403_82425_20141007_231028_outLine +BABEL_OP3_403_83238_20141107_233257_inLine +BABEL_OP3_403_83238_20141107_233257_outLine +BABEL_OP3_403_83436_20141012_221126_inLine +BABEL_OP3_403_83436_20141012_221126_outLine +BABEL_OP3_403_83455_20141103_225146_inLine +BABEL_OP3_403_83455_20141103_225146_outLine +BABEL_OP3_403_83651_20141005_194737_inLine +BABEL_OP3_403_83651_20141005_194737_outLine +BABEL_OP3_403_84194_20150204_213858_inLine +BABEL_OP3_403_84194_20150204_213858_outLine +BABEL_OP3_403_84458_20141208_005012_inLine +BABEL_OP3_403_84458_20141208_005012_outLine +BABEL_OP3_403_84469_20141211_002526_inLine +BABEL_OP3_403_84469_20141211_002526_outLine +BABEL_OP3_403_84547_20141013_223556_inLine +BABEL_OP3_403_84547_20141013_223556_outLine +BABEL_OP3_403_84605_20141005_214529_inLine +BABEL_OP3_403_84605_20141005_214529_outLine +BABEL_OP3_403_84737_20150303_195506_inLine +BABEL_OP3_403_84737_20150303_195506_outLine +BABEL_OP3_403_85647_20141103_192225_inLine +BABEL_OP3_403_85647_20141103_192225_outLine +BABEL_OP3_403_86845_20150201_015753_inLine +BABEL_OP3_403_86845_20150201_015753_outLine +BABEL_OP3_403_87889_20150107_001827_inLine +BABEL_OP3_403_87889_20150107_001827_outLine +BABEL_OP3_403_87921_20141210_233414_inLine +BABEL_OP3_403_87921_20141210_233414_outLine +BABEL_OP3_403_88260_20141029_205951_inLine +BABEL_OP3_403_88260_20141029_205951_outLine +BABEL_OP3_403_88812_20150307_181013_inLine +BABEL_OP3_403_88812_20150307_181013_outLine +BABEL_OP3_403_89059_20141220_191342_inLine +BABEL_OP3_403_89059_20141220_191342_outLine +BABEL_OP3_403_89358_20141030_231758_inLine +BABEL_OP3_403_89358_20141030_231758_outLine +BABEL_OP3_403_90709_20141007_234900_inLine +BABEL_OP3_403_90709_20141007_234900_outLine +BABEL_OP3_403_90739_20141028_224009_inLine +BABEL_OP3_403_90739_20141028_224009_outLine +BABEL_OP3_403_92509_20141014_232528_inLine +BABEL_OP3_403_92509_20141014_232528_outLine +BABEL_OP3_403_92527_20141026_192704_inLine +BABEL_OP3_403_92527_20141026_192704_outLine +BABEL_OP3_403_92740_20141130_011740_inLine +BABEL_OP3_403_92740_20141130_011740_outLine +BABEL_OP3_403_94409_20141028_214356_inLine +BABEL_OP3_403_94409_20141028_214356_outLine +BABEL_OP3_403_94449_20150309_193606_inLine +BABEL_OP3_403_94449_20150309_193606_outLine +BABEL_OP3_403_94487_20150312_014136_inLine +BABEL_OP3_403_94487_20150312_014136_outLine +BABEL_OP3_403_95269_20141026_235206_inLine +BABEL_OP3_403_95269_20141026_235206_outLine +BABEL_OP3_403_95583_20141013_184937_inLine +BABEL_OP3_403_95583_20141013_184937_outLine +BABEL_OP3_403_95670_20141016_214958_inLine +BABEL_OP3_403_95670_20141016_214958_outLine +BABEL_OP3_403_96324_20141014_194024_inLine +BABEL_OP3_403_96324_20141014_194024_outLine +BABEL_OP3_403_96525_20141217_223842_inLine +BABEL_OP3_403_96525_20141217_223842_outLine +BABEL_OP3_403_96910_20141024_195822_inLine +BABEL_OP3_403_96910_20141024_195822_outLine +BABEL_OP3_403_97376_20141206_215930_inLine +BABEL_OP3_403_97376_20141206_215930_outLine +BABEL_OP3_403_97588_20141015_193851_inLine +BABEL_OP3_403_97588_20141015_193851_outLine +BABEL_OP3_403_97772_20141003_213919_inLine +BABEL_OP3_403_97772_20141003_213919_outLine +BABEL_OP3_403_98311_20141005_195843_inLine +BABEL_OP3_403_98311_20141005_195843_outLine +BABEL_OP3_403_98506_20150319_151741_inLine +BABEL_OP3_403_98506_20150319_151741_outLine +BABEL_OP3_403_99202_20141108_002737_inLine +BABEL_OP3_403_99202_20141108_002737_outLine +BABEL_OP3_403_99955_20150107_213836_inLine +BABEL_OP3_403_99955_20150107_213836_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list b/egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list new file mode 100644 index 00000000000..21ec3e2d9b4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list @@ -0,0 +1,533 @@ +BABEL_OP3_403_10319_20141014_223750_inLine +BABEL_OP3_403_10319_20141014_223750_outLine +BABEL_OP3_403_10901_20141103_193402_inLine +BABEL_OP3_403_10901_20141103_193402_outLine +BABEL_OP3_403_10974_20141107_215600_inLine +BABEL_OP3_403_10974_20141107_215600_outLine +BABEL_OP3_403_11673_20141013_203235_inLine +BABEL_OP3_403_11673_20141013_203235_outLine +BABEL_OP3_403_12767_20141006_233130_inLine +BABEL_OP3_403_12767_20141006_233130_outLine +BABEL_OP3_403_13561_20141106_192514_inLine +BABEL_OP3_403_13561_20141106_192514_outLine +BABEL_OP3_403_13664_20141002_224345_inLine +BABEL_OP3_403_13664_20141002_224345_outLine +BABEL_OP3_403_14028_20150407_193852_inLine +BABEL_OP3_403_14028_20150407_193852_outLine +BABEL_OP3_403_14158_20141201_232657_inLine +BABEL_OP3_403_14179_20141129_222039_inLine +BABEL_OP3_403_14179_20141129_222039_outLine +BABEL_OP3_403_14719_20141215_230523_inLine +BABEL_OP3_403_14719_20141215_230523_outLine +BABEL_OP3_403_14723_20150205_004549_inLine +BABEL_OP3_403_14723_20150205_004549_outLine +BABEL_OP3_403_14875_20141005_213932_inLine +BABEL_OP3_403_14875_20141005_213932_outLine +BABEL_OP3_403_14972_20141116_221959_inLine +BABEL_OP3_403_14972_20141116_221959_outLine +BABEL_OP3_403_15216_20141219_215848_inLine +BABEL_OP3_403_15216_20141219_215848_outLine +BABEL_OP3_403_15382_20141127_202406_inLine +BABEL_OP3_403_15382_20141127_202406_outLine +BABEL_OP3_403_15749_20141218_230749_inLine +BABEL_OP3_403_15749_20141218_230749_outLine +BABEL_OP3_403_16787_20141029_182118_inLine +BABEL_OP3_403_16787_20141029_182118_outLine +BABEL_OP3_403_17472_20141210_175708_inLine +BABEL_OP3_403_17472_20141210_175708_outLine +BABEL_OP3_403_17472_20141210_180354_inLine +BABEL_OP3_403_17472_20141210_180354_outLine +BABEL_OP3_403_17615_20141201_004945_inLine +BABEL_OP3_403_17615_20141201_004945_outLine +BABEL_OP3_403_17890_20141203_003513_inLine +BABEL_OP3_403_17890_20141203_003513_outLine +BABEL_OP3_403_18037_20150418_210458_inLine +BABEL_OP3_403_18037_20150418_210458_outLine +BABEL_OP3_403_18380_20141031_010759_inLine +BABEL_OP3_403_18380_20141031_010759_outLine +BABEL_OP3_403_19101_20141106_010120_inLine +BABEL_OP3_403_19101_20141106_010120_outLine +BABEL_OP3_403_19134_20141120_182846_inLine +BABEL_OP3_403_19134_20141120_182846_outLine +BABEL_OP3_403_19672_20141130_215944_outLine +BABEL_OP3_403_20330_20150304_212548_inLine +BABEL_OP3_403_20330_20150304_212548_outLine +BABEL_OP3_403_20724_20150414_221749_inLine +BABEL_OP3_403_20724_20150414_221749_outLine +BABEL_OP3_403_20768_20141215_213303_inLine +BABEL_OP3_403_20768_20141215_213303_outLine +BABEL_OP3_403_22641_20141016_232215_inLine +BABEL_OP3_403_22641_20141016_232215_outLine +BABEL_OP3_403_23098_20150413_232746_inLine +BABEL_OP3_403_23098_20150413_232746_outLine +BABEL_OP3_403_23395_20141121_205256_outLine +BABEL_OP3_403_23505_20141008_003349_inLine +BABEL_OP3_403_23505_20141008_003349_outLine +BABEL_OP3_403_23628_20141023_001612_outLine +BABEL_OP3_403_23980_20141029_203114_inLine +BABEL_OP3_403_23980_20141029_203114_outLine +BABEL_OP3_403_24239_20150310_212947_inLine +BABEL_OP3_403_24239_20150310_212947_outLine +BABEL_OP3_403_24239_20150310_213506_inLine +BABEL_OP3_403_24239_20150310_213506_outLine +BABEL_OP3_403_24239_20150310_214027_inLine +BABEL_OP3_403_24239_20150310_214027_outLine +BABEL_OP3_403_24270_20141118_203034_inLine +BABEL_OP3_403_24270_20141118_203034_outLine +BABEL_OP3_403_25719_20141220_201504_inLine +BABEL_OP3_403_25719_20141220_201504_outLine +BABEL_OP3_403_25895_20150413_214536_inLine +BABEL_OP3_403_25895_20150413_214536_outLine +BABEL_OP3_403_26836_20141005_221645_inLine +BABEL_OP3_403_26836_20141005_221645_outLine +BABEL_OP3_403_26869_20150416_200345_inLine +BABEL_OP3_403_26869_20150416_201016_inLine +BABEL_OP3_403_27046_20150406_230902_inLine +BABEL_OP3_403_27046_20150406_230902_outLine +BABEL_OP3_403_27082_20141103_211126_inLine +BABEL_OP3_403_27082_20141103_211126_outLine +BABEL_OP3_403_28422_20141130_174755_inLine +BABEL_OP3_403_28422_20141130_174755_outLine +BABEL_OP3_403_28538_20141031_223359_inLine +BABEL_OP3_403_28538_20141031_223359_outLine +BABEL_OP3_403_29072_20141202_234748_inLine +BABEL_OP3_403_29072_20141202_234748_outLine +BABEL_OP3_403_29663_20150415_221129_inLine +BABEL_OP3_403_29663_20150415_221129_outLine +BABEL_OP3_403_29663_20150415_221719_inLine +BABEL_OP3_403_29663_20150415_221719_outLine +BABEL_OP3_403_29685_20141026_200131_inLine +BABEL_OP3_403_29685_20141026_200131_outLine +BABEL_OP3_403_30013_20141203_194552_inLine +BABEL_OP3_403_30013_20141203_194552_outLine +BABEL_OP3_403_30426_20150412_233250_inLine +BABEL_OP3_403_30426_20150412_233250_outLine +BABEL_OP3_403_30645_20141006_230955_inLine +BABEL_OP3_403_30645_20141006_230955_outLine +BABEL_OP3_403_30653_20150318_184710_inLine +BABEL_OP3_403_30653_20150318_184710_outLine +BABEL_OP3_403_30869_20141220_202310_inLine +BABEL_OP3_403_30869_20141220_202310_outLine +BABEL_OP3_403_31182_20141218_211632_outLine +BABEL_OP3_403_31628_20141201_204314_inLine +BABEL_OP3_403_31628_20141201_204314_outLine +BABEL_OP3_403_32169_20150415_234749_inLine +BABEL_OP3_403_32169_20150415_234749_outLine +BABEL_OP3_403_32914_20141219_204204_inLine +BABEL_OP3_403_32914_20141219_204204_outLine +BABEL_OP3_403_33704_20141216_231752_inLine +BABEL_OP3_403_33704_20141216_231752_outLine +BABEL_OP3_403_33933_20150426_001110_inLine +BABEL_OP3_403_33933_20150426_001110_outLine +BABEL_OP3_403_34145_20141129_000102_inLine +BABEL_OP3_403_34145_20141129_000102_outLine +BABEL_OP3_403_34328_20141031_214721_inLine +BABEL_OP3_403_34328_20141031_214721_outLine +BABEL_OP3_403_34903_20141127_193345_inLine +BABEL_OP3_403_34903_20141127_193345_outLine +BABEL_OP3_403_35202_20141202_231605_inLine +BABEL_OP3_403_35202_20141202_231605_outLine +BABEL_OP3_403_36894_20141014_211920_inLine +BABEL_OP3_403_36894_20141014_211920_outLine +BABEL_OP3_403_37064_20141005_205701_inLine +BABEL_OP3_403_37064_20141005_205701_outLine +BABEL_OP3_403_37271_20141217_183739_inLine +BABEL_OP3_403_37271_20141217_183739_outLine +BABEL_OP3_403_38076_20141128_204027_inLine +BABEL_OP3_403_38076_20141128_204027_outLine +BABEL_OP3_403_38431_20141215_205449_inLine +BABEL_OP3_403_38431_20141215_205449_outLine +BABEL_OP3_403_38554_20141003_222444_outLine +BABEL_OP3_403_39426_20150304_222409_inLine +BABEL_OP3_403_39426_20150304_222409_outLine +BABEL_OP3_403_39426_20150304_223342_inLine +BABEL_OP3_403_39426_20150304_223342_outLine +BABEL_OP3_403_39744_20141013_213656_inLine +BABEL_OP3_403_39744_20141013_213656_outLine +BABEL_OP3_403_40565_20141203_003932_inLine +BABEL_OP3_403_40565_20141203_003932_outLine +BABEL_OP3_403_40740_20141220_223558_inLine +BABEL_OP3_403_40740_20141220_223558_outLine +BABEL_OP3_403_41038_20141201_222108_inLine +BABEL_OP3_403_41038_20141201_222108_outLine +BABEL_OP3_403_41174_20141028_231225_inLine +BABEL_OP3_403_41174_20141028_231225_outLine +BABEL_OP3_403_41272_20150312_024107_inLine +BABEL_OP3_403_41272_20150312_024107_outLine +BABEL_OP3_403_41493_20141006_010651_inLine +BABEL_OP3_403_41493_20141006_010651_outLine +BABEL_OP3_403_41745_20141027_234835_inLine +BABEL_OP3_403_41745_20141027_234835_outLine +BABEL_OP3_403_42155_20141116_204154_inLine +BABEL_OP3_403_42155_20141116_204154_outLine +BABEL_OP3_403_42526_20150106_224056_inLine +BABEL_OP3_403_42526_20150106_224056_outLine +BABEL_OP3_403_42834_20141128_220047_inLine +BABEL_OP3_403_42834_20141128_220047_outLine +BABEL_OP3_403_42942_20141030_235147_inLine +BABEL_OP3_403_42942_20141030_235147_outLine +BABEL_OP3_403_44255_20150305_013502_inLine +BABEL_OP3_403_44347_20141219_190407_inLine +BABEL_OP3_403_44347_20141219_190407_outLine +BABEL_OP3_403_44420_20141023_214836_inLine +BABEL_OP3_403_44420_20141023_214836_outLine +BABEL_OP3_403_44477_20141201_010216_inLine +BABEL_OP3_403_44477_20141201_010216_outLine +BABEL_OP3_403_44709_20141201_213014_inLine +BABEL_OP3_403_44709_20141201_213014_outLine +BABEL_OP3_403_45106_20141120_182301_inLine +BABEL_OP3_403_45106_20141120_182301_outLine +BABEL_OP3_403_46169_20141217_192351_inLine +BABEL_OP3_403_46169_20141217_192351_outLine +BABEL_OP3_403_46333_20141014_002918_inLine +BABEL_OP3_403_46333_20141014_002918_outLine +BABEL_OP3_403_46702_20141003_234833_inLine +BABEL_OP3_403_46702_20141003_234833_outLine +BABEL_OP3_403_46712_20141023_221319_inLine +BABEL_OP3_403_46712_20141023_221319_outLine +BABEL_OP3_403_46763_20150318_203035_inLine +BABEL_OP3_403_46763_20150318_203035_outLine +BABEL_OP3_403_47959_20141024_223125_inLine +BABEL_OP3_403_47959_20141024_223125_outLine +BABEL_OP3_403_49001_20141006_010425_inLine +BABEL_OP3_403_49001_20141006_010425_outLine +BABEL_OP3_403_49641_20150402_214738_outLine +BABEL_OP3_403_49775_20141004_211924_inLine +BABEL_OP3_403_49775_20141004_211924_outLine +BABEL_OP3_403_50962_20141005_192714_outLine +BABEL_OP3_403_51156_20150416_171911_inLine +BABEL_OP3_403_51156_20150416_171911_outLine +BABEL_OP3_403_51417_20141220_002122_inLine +BABEL_OP3_403_51417_20141220_002122_outLine +BABEL_OP3_403_51540_20141219_215956_inLine +BABEL_OP3_403_51540_20141219_215956_outLine +BABEL_OP3_403_51968_20141028_223645_inLine +BABEL_OP3_403_51968_20141028_223645_outLine +BABEL_OP3_403_52442_20141105_011029_outLine +BABEL_OP3_403_52818_20141203_184905_inLine +BABEL_OP3_403_52818_20141203_184905_outLine +BABEL_OP3_403_53010_20150418_185722_inLine +BABEL_OP3_403_53010_20150418_185722_outLine +BABEL_OP3_403_53068_20150426_230124_inLine +BABEL_OP3_403_53068_20150426_230124_outLine +BABEL_OP3_403_53144_20150319_193813_inLine +BABEL_OP3_403_53144_20150319_193813_outLine +BABEL_OP3_403_54040_20141219_003109_inLine +BABEL_OP3_403_54040_20141219_003109_outLine +BABEL_OP3_403_54390_20141006_214754_inLine +BABEL_OP3_403_54390_20141006_214754_outLine +BABEL_OP3_403_54697_20141215_211116_inLine +BABEL_OP3_403_54697_20141215_211116_outLine +BABEL_OP3_403_54953_20141027_223433_inLine +BABEL_OP3_403_54953_20141027_223433_outLine +BABEL_OP3_403_55042_20150331_225750_inLine +BABEL_OP3_403_55042_20150331_225750_outLine +BABEL_OP3_403_55381_20141218_191630_inLine +BABEL_OP3_403_55381_20141218_191630_outLine +BABEL_OP3_403_55742_20141004_201921_inLine +BABEL_OP3_403_55742_20141004_204835_inLine +BABEL_OP3_403_55818_20141006_220912_inLine +BABEL_OP3_403_55818_20141006_220912_outLine +BABEL_OP3_403_56370_20141014_185314_inLine +BABEL_OP3_403_56370_20141014_185314_outLine +BABEL_OP3_403_56677_20141208_200823_inLine +BABEL_OP3_403_56677_20141208_200823_outLine +BABEL_OP3_403_56826_20141217_233213_inLine +BABEL_OP3_403_56826_20141217_233213_outLine +BABEL_OP3_403_57919_20150418_200246_inLine +BABEL_OP3_403_57919_20150418_200246_outLine +BABEL_OP3_403_57919_20150418_201847_inLine +BABEL_OP3_403_57919_20150418_201847_outLine +BABEL_OP3_403_58107_20141106_011114_inLine +BABEL_OP3_403_58107_20141106_011114_outLine +BABEL_OP3_403_58145_20141120_190441_inLine +BABEL_OP3_403_58145_20141120_190441_outLine +BABEL_OP3_403_58636_20150426_202602_inLine +BABEL_OP3_403_58636_20150426_202602_outLine +BABEL_OP3_403_58717_20141104_223801_inLine +BABEL_OP3_403_58717_20141104_223801_outLine +BABEL_OP3_403_59301_20141220_230943_inLine +BABEL_OP3_403_59301_20141220_230943_outLine +BABEL_OP3_403_59549_20141004_234422_inLine +BABEL_OP3_403_59549_20141004_234422_outLine +BABEL_OP3_403_59747_20141014_191829_inLine +BABEL_OP3_403_59747_20141014_191829_outLine +BABEL_OP3_403_59864_20150306_210405_inLine +BABEL_OP3_403_59864_20150306_210405_outLine +BABEL_OP3_403_59993_20141005_224301_inLine +BABEL_OP3_403_59993_20141005_224301_outLine +BABEL_OP3_403_59993_20141005_225220_inLine +BABEL_OP3_403_59993_20141005_225220_outLine +BABEL_OP3_403_59993_20141005_230254_inLine +BABEL_OP3_403_59993_20141005_230254_outLine +BABEL_OP3_403_60026_20141006_002312_inLine +BABEL_OP3_403_60026_20141006_002312_outLine +BABEL_OP3_403_61011_20141013_233414_inLine +BABEL_OP3_403_61011_20141013_233414_outLine +BABEL_OP3_403_61040_20141217_000352_inLine +BABEL_OP3_403_61040_20141217_000352_outLine +BABEL_OP3_403_61167_20141026_233020_inLine +BABEL_OP3_403_61167_20141026_233020_outLine +BABEL_OP3_403_61435_20141216_223049_inLine +BABEL_OP3_403_61435_20141216_223049_outLine +BABEL_OP3_403_61963_20150106_232605_outLine +BABEL_OP3_403_62155_20150318_181046_inLine +BABEL_OP3_403_62155_20150318_181046_outLine +BABEL_OP3_403_62177_20150303_192318_inLine +BABEL_OP3_403_62177_20150303_192318_outLine +BABEL_OP3_403_62177_20150303_192933_inLine +BABEL_OP3_403_62177_20150303_192933_outLine +BABEL_OP3_403_62835_20141031_215252_inLine +BABEL_OP3_403_62976_20141120_211316_inLine +BABEL_OP3_403_62976_20141120_211316_outLine +BABEL_OP3_403_63220_20141128_003242_inLine +BABEL_OP3_403_63220_20141128_003242_outLine +BABEL_OP3_403_63265_20150416_213544_inLine +BABEL_OP3_403_63265_20150416_214859_inLine +BABEL_OP3_403_63307_20141116_205038_inLine +BABEL_OP3_403_63307_20141116_205038_outLine +BABEL_OP3_403_63484_20150413_210246_inLine +BABEL_OP3_403_63484_20150413_210246_outLine +BABEL_OP3_403_63757_20141118_001039_inLine +BABEL_OP3_403_63757_20141118_001039_outLine +BABEL_OP3_403_63920_20150413_175014_inLine +BABEL_OP3_403_63920_20150413_175014_outLine +BABEL_OP3_403_64635_20150418_171656_inLine +BABEL_OP3_403_64635_20150418_171656_outLine +BABEL_OP3_403_64638_20141130_205142_inLine +BABEL_OP3_403_64638_20141130_205142_outLine +BABEL_OP3_403_64688_20150327_215407_inLine +BABEL_OP3_403_64688_20150327_215407_outLine +BABEL_OP3_403_64759_20141012_211953_inLine +BABEL_OP3_403_64759_20141012_211953_outLine +BABEL_OP3_403_65064_20141120_180442_inLine +BABEL_OP3_403_65064_20141120_180442_outLine +BABEL_OP3_403_65561_20141206_000223_inLine +BABEL_OP3_403_65561_20141206_000223_outLine +BABEL_OP3_403_66959_20141211_191140_inLine +BABEL_OP3_403_66959_20141211_191140_outLine +BABEL_OP3_403_66967_20141016_233136_inLine +BABEL_OP3_403_66967_20141016_233136_outLine +BABEL_OP3_403_67373_20141010_191456_inLine +BABEL_OP3_403_67373_20141010_191456_outLine +BABEL_OP3_403_67401_20141118_192332_inLine +BABEL_OP3_403_67401_20141118_192332_outLine +BABEL_OP3_403_68823_20150416_201411_inLine +BABEL_OP3_403_68823_20150416_201411_outLine +BABEL_OP3_403_69153_20141207_201546_inLine +BABEL_OP3_403_69153_20141207_201546_outLine +BABEL_OP3_403_69153_20141207_202942_inLine +BABEL_OP3_403_69153_20141207_202942_outLine +BABEL_OP3_403_69474_20141204_202057_inLine +BABEL_OP3_403_69474_20141204_202057_outLine +BABEL_OP3_403_69992_20141006_215605_inLine +BABEL_OP3_403_69992_20141006_215605_outLine +BABEL_OP3_403_70526_20150317_192457_inLine +BABEL_OP3_403_70526_20150317_192457_outLine +BABEL_OP3_403_71038_20150106_205857_inLine +BABEL_OP3_403_71038_20150106_205857_outLine +BABEL_OP3_403_71067_20141120_212952_inLine +BABEL_OP3_403_71067_20141120_212952_outLine +BABEL_OP3_403_71067_20141120_214426_inLine +BABEL_OP3_403_71067_20141120_214426_outLine +BABEL_OP3_403_71189_20150318_162559_inLine +BABEL_OP3_403_71189_20150318_162559_outLine +BABEL_OP3_403_71263_20141120_195808_inLine +BABEL_OP3_403_71263_20141120_195808_outLine +BABEL_OP3_403_71263_20141120_200524_inLine +BABEL_OP3_403_71263_20141120_200524_outLine +BABEL_OP3_403_71263_20141120_201201_inLine +BABEL_OP3_403_71263_20141120_201201_outLine +BABEL_OP3_403_71419_20150130_163036_inLine +BABEL_OP3_403_71419_20150130_163036_outLine +BABEL_OP3_403_71419_20150130_170259_inLine +BABEL_OP3_403_71419_20150130_170259_outLine +BABEL_OP3_403_71850_20150317_201433_inLine +BABEL_OP3_403_71850_20150317_201433_outLine +BABEL_OP3_403_71850_20150317_204336_inLine +BABEL_OP3_403_71850_20150317_204336_outLine +BABEL_OP3_403_72587_20141127_221927_inLine +BABEL_OP3_403_72587_20141127_221927_outLine +BABEL_OP3_403_72587_20141127_222705_inLine +BABEL_OP3_403_72587_20141127_222705_outLine +BABEL_OP3_403_73446_20150317_233038_inLine +BABEL_OP3_403_73446_20150317_233038_outLine +BABEL_OP3_403_73757_20141103_184243_inLine +BABEL_OP3_403_74121_20141029_192619_inLine +BABEL_OP3_403_74121_20141029_192619_outLine +BABEL_OP3_403_74455_20150304_010648_inLine +BABEL_OP3_403_74455_20150304_010648_outLine +BABEL_OP3_403_74763_20150412_222934_inLine +BABEL_OP3_403_74763_20150412_222934_outLine +BABEL_OP3_403_75465_20141216_203010_inLine +BABEL_OP3_403_75764_20150106_010413_inLine +BABEL_OP3_403_75764_20150106_010413_outLine +BABEL_OP3_403_76238_20141207_205931_inLine +BABEL_OP3_403_76238_20141207_205931_outLine +BABEL_OP3_403_76238_20141207_211123_inLine +BABEL_OP3_403_76238_20141207_211123_outLine +BABEL_OP3_403_76756_20141121_192227_inLine +BABEL_OP3_403_76756_20141121_192227_outLine +BABEL_OP3_403_77146_20141013_203551_inLine +BABEL_OP3_403_77146_20141013_203551_outLine +BABEL_OP3_403_77391_20141025_014416_inLine +BABEL_OP3_403_77391_20141025_014416_outLine +BABEL_OP3_403_77803_20141013_223521_inLine +BABEL_OP3_403_77803_20141013_223521_outLine +BABEL_OP3_403_77904_20150426_181110_inLine +BABEL_OP3_403_77904_20150426_181110_outLine +BABEL_OP3_403_77909_20150330_191417_inLine +BABEL_OP3_403_77909_20150330_191417_outLine +BABEL_OP3_403_78609_20141217_215450_inLine +BABEL_OP3_403_78609_20141217_215450_outLine +BABEL_OP3_403_78743_20141216_183731_inLine +BABEL_OP3_403_78743_20141216_183731_outLine +BABEL_OP3_403_78976_20141025_002547_inLine +BABEL_OP3_403_78976_20141025_002547_outLine +BABEL_OP3_403_79045_20141219_213058_inLine +BABEL_OP3_403_79045_20141219_213058_outLine +BABEL_OP3_403_79129_20141117_210821_inLine +BABEL_OP3_403_79129_20141117_210821_outLine +BABEL_OP3_403_79139_20141103_204223_inLine +BABEL_OP3_403_79139_20141103_204223_outLine +BABEL_OP3_403_80881_20141016_231419_inLine +BABEL_OP3_403_80881_20141016_231419_outLine +BABEL_OP3_403_80897_20141118_205921_inLine +BABEL_OP3_403_81392_20141202_223505_inLine +BABEL_OP3_403_81392_20141202_223505_outLine +BABEL_OP3_403_81553_20150108_011830_inLine +BABEL_OP3_403_81553_20150108_011830_outLine +BABEL_OP3_403_81971_20141013_202229_inLine +BABEL_OP3_403_81971_20141013_202229_outLine +BABEL_OP3_403_82089_20141103_180402_inLine +BABEL_OP3_403_82089_20141103_180402_outLine +BABEL_OP3_403_82138_20141103_203306_inLine +BABEL_OP3_403_82138_20141103_203306_outLine +BABEL_OP3_403_82140_20141103_203606_inLine +BABEL_OP3_403_82140_20141103_203606_outLine +BABEL_OP3_403_82224_20141221_020512_inLine +BABEL_OP3_403_82224_20141221_020512_outLine +BABEL_OP3_403_82361_20150313_215812_inLine +BABEL_OP3_403_82361_20150313_215812_outLine +BABEL_OP3_403_82637_20141013_202558_inLine +BABEL_OP3_403_82637_20141013_202558_outLine +BABEL_OP3_403_82742_20141217_192623_inLine +BABEL_OP3_403_82742_20141217_192623_outLine +BABEL_OP3_403_82742_20141217_193955_inLine +BABEL_OP3_403_82742_20141217_193955_outLine +BABEL_OP3_403_82935_20141220_194756_inLine +BABEL_OP3_403_82935_20141220_194756_outLine +BABEL_OP3_403_83783_20141117_201033_inLine +BABEL_OP3_403_83783_20141117_201033_outLine +BABEL_OP3_403_83813_20150201_234438_inLine +BABEL_OP3_403_83813_20150201_234438_outLine +BABEL_OP3_403_83929_20140926_001811_inLine +BABEL_OP3_403_83929_20140926_001811_outLine +BABEL_OP3_403_83935_20141205_002539_inLine +BABEL_OP3_403_83935_20141205_002539_outLine +BABEL_OP3_403_83935_20141205_223342_inLine +BABEL_OP3_403_83935_20141205_223342_outLine +BABEL_OP3_403_84061_20141027_225533_inLine +BABEL_OP3_403_84061_20141027_225533_outLine +BABEL_OP3_403_84125_20141005_234430_inLine +BABEL_OP3_403_84125_20141005_234430_outLine +BABEL_OP3_403_84408_20141026_210154_inLine +BABEL_OP3_403_84408_20141026_210154_outLine +BABEL_OP3_403_84936_20141127_181420_inLine +BABEL_OP3_403_84936_20141127_181420_outLine +BABEL_OP3_403_85047_20141031_202048_inLine +BABEL_OP3_403_85047_20141031_202048_outLine +BABEL_OP3_403_85322_20141006_225220_inLine +BABEL_OP3_403_85322_20141006_225220_outLine +BABEL_OP3_403_85340_20141005_204959_inLine +BABEL_OP3_403_85340_20141005_204959_outLine +BABEL_OP3_403_86321_20141208_193101_inLine +BABEL_OP3_403_86321_20141208_193101_outLine +BABEL_OP3_403_86557_20141016_213938_inLine +BABEL_OP3_403_86557_20141016_213938_outLine +BABEL_OP3_403_86829_20150413_201100_inLine +BABEL_OP3_403_86829_20150413_201100_outLine +BABEL_OP3_403_87298_20141024_181414_inLine +BABEL_OP3_403_87298_20141024_181414_outLine +BABEL_OP3_403_87796_20141116_204525_inLine +BABEL_OP3_403_87796_20141116_204525_outLine +BABEL_OP3_403_87871_20141217_212127_inLine +BABEL_OP3_403_87871_20141217_212127_outLine +BABEL_OP3_403_88550_20150307_215430_inLine +BABEL_OP3_403_88550_20150307_215430_outLine +BABEL_OP3_403_88550_20150307_221516_inLine +BABEL_OP3_403_88550_20150307_221516_outLine +BABEL_OP3_403_88661_20141201_185938_inLine +BABEL_OP3_403_88661_20141201_185938_outLine +BABEL_OP3_403_88661_20141201_192152_inLine +BABEL_OP3_403_88661_20141201_192152_outLine +BABEL_OP3_403_88674_20150418_221617_inLine +BABEL_OP3_403_88674_20150418_221617_outLine +BABEL_OP3_403_89045_20141003_224541_outLine +BABEL_OP3_403_89372_20141003_233243_inLine +BABEL_OP3_403_89372_20141004_235806_inLine +BABEL_OP3_403_89560_20141217_191117_inLine +BABEL_OP3_403_89560_20141217_191117_outLine +BABEL_OP3_403_89794_20141129_193030_inLine +BABEL_OP3_403_89794_20141129_193030_outLine +BABEL_OP3_403_89877_20141120_182454_inLine +BABEL_OP3_403_89877_20141120_182454_outLine +BABEL_OP3_403_90347_20141207_221437_inLine +BABEL_OP3_403_90347_20141207_221437_outLine +BABEL_OP3_403_90935_20141026_200818_inLine +BABEL_OP3_403_91336_20141103_203505_inLine +BABEL_OP3_403_91336_20141103_203505_outLine +BABEL_OP3_403_91411_20150130_181331_inLine +BABEL_OP3_403_91411_20150130_181331_outLine +BABEL_OP3_403_91411_20150130_185140_inLine +BABEL_OP3_403_91411_20150130_185140_outLine +BABEL_OP3_403_91891_20141205_223437_inLine +BABEL_OP3_403_91891_20141205_223437_outLine +BABEL_OP3_403_91891_20141205_224513_inLine +BABEL_OP3_403_91891_20141205_224513_outLine +BABEL_OP3_403_92440_20150413_001701_inLine +BABEL_OP3_403_92440_20150413_001701_outLine +BABEL_OP3_403_92698_20141104_003927_inLine +BABEL_OP3_403_92698_20141104_003927_outLine +BABEL_OP3_403_92757_20150307_000144_inLine +BABEL_OP3_403_92757_20150307_000144_outLine +BABEL_OP3_403_92757_20150307_001520_inLine +BABEL_OP3_403_92757_20150307_001520_outLine +BABEL_OP3_403_92792_20150319_214450_inLine +BABEL_OP3_403_92792_20150319_214450_outLine +BABEL_OP3_403_93861_20141031_233412_inLine +BABEL_OP3_403_93946_20141208_202019_inLine +BABEL_OP3_403_93946_20141208_202019_outLine +BABEL_OP3_403_94002_20141120_194833_inLine +BABEL_OP3_403_94002_20141120_194833_outLine +BABEL_OP3_403_94141_20150311_224536_inLine +BABEL_OP3_403_94141_20150311_224536_outLine +BABEL_OP3_403_94666_20141106_230027_inLine +BABEL_OP3_403_94666_20141106_230027_outLine +BABEL_OP3_403_94745_20141202_235317_inLine +BABEL_OP3_403_94745_20141202_235317_outLine +BABEL_OP3_403_95294_20141202_001855_inLine +BABEL_OP3_403_95294_20141202_001855_outLine +BABEL_OP3_403_95598_20141004_012914_outLine +BABEL_OP3_403_95663_20141013_194657_inLine +BABEL_OP3_403_95663_20141013_194657_outLine +BABEL_OP3_403_95966_20141028_211011_inLine +BABEL_OP3_403_95966_20141028_211011_outLine +BABEL_OP3_403_96820_20141105_001821_inLine +BABEL_OP3_403_96820_20141105_001821_outLine +BABEL_OP3_403_97448_20150330_211249_inLine +BABEL_OP3_403_97448_20150330_211249_outLine +BABEL_OP3_403_97896_20141031_234221_inLine +BABEL_OP3_403_97896_20141031_234221_outLine +BABEL_OP3_403_97988_20141211_193604_inLine +BABEL_OP3_403_97988_20141211_193604_outLine +BABEL_OP3_403_98165_20141026_210536_inLine +BABEL_OP3_403_98165_20141026_210536_outLine +BABEL_OP3_403_98365_20141117_210300_inLine +BABEL_OP3_403_98365_20141117_210300_outLine +BABEL_OP3_403_98489_20141007_213814_inLine +BABEL_OP3_403_98489_20141007_213814_outLine +BABEL_OP3_403_99516_20141016_194316_inLine +BABEL_OP3_403_99516_20141016_194316_outLine +BABEL_OP3_403_99732_20141217_232949_inLine +BABEL_OP3_403_99732_20141217_232949_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/dev.2h.list b/egs/babel/s5d/conf/lists/404-georgian/dev.2h.list new file mode 100644 index 00000000000..a823552044c --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/dev.2h.list @@ -0,0 +1,124 @@ +BABEL_OP3_404_10184_20141107_212406_inLine +BABEL_OP3_404_10184_20141107_212406_outLine +BABEL_OP3_404_12851_20141013_024620_inLine +BABEL_OP3_404_12851_20141013_024620_outLine +BABEL_OP3_404_16184_20141020_233508_inLine +BABEL_OP3_404_16184_20141020_233508_outLine +BABEL_OP3_404_17165_20141117_063008_inLine +BABEL_OP3_404_17165_20141117_063008_outLine +BABEL_OP3_404_17472_20141201_023731_inLine +BABEL_OP3_404_17472_20141201_023731_outLine +BABEL_OP3_404_18380_20141118_001754_inLine +BABEL_OP3_404_18380_20141118_001754_outLine +BABEL_OP3_404_18939_20141009_063127_inLine +BABEL_OP3_404_18939_20141009_063127_outLine +BABEL_OP3_404_22446_20141013_062554_inLine +BABEL_OP3_404_22446_20141013_062554_outLine +BABEL_OP3_404_22466_20141018_193129_inLine +BABEL_OP3_404_22466_20141018_193129_outLine +BABEL_OP3_404_22494_20141127_221208_inLine +BABEL_OP3_404_22494_20141127_221208_outLine +BABEL_OP3_404_22494_20141127_222057_inLine +BABEL_OP3_404_22494_20141127_222057_outLine +BABEL_OP3_404_23239_20141127_054155_inLine +BABEL_OP3_404_23239_20141127_054155_outLine +BABEL_OP3_404_24253_20150513_212152_inLine +BABEL_OP3_404_24253_20150513_212152_outLine +BABEL_OP3_404_24779_20150620_032949_inLine +BABEL_OP3_404_24779_20150620_032949_outLine +BABEL_OP3_404_26074_20141120_050650_inLine +BABEL_OP3_404_26074_20141120_050650_outLine +BABEL_OP3_404_28419_20141028_024104_inLine +BABEL_OP3_404_28419_20141028_024104_outLine +BABEL_OP3_404_33476_20141114_205102_inLine +BABEL_OP3_404_33476_20141114_205102_outLine +BABEL_OP3_404_34564_20141211_015413_inLine +BABEL_OP3_404_34564_20141211_015413_outLine +BABEL_OP3_404_35467_20141020_054030_inLine +BABEL_OP3_404_35467_20141020_054030_outLine +BABEL_OP3_404_38431_20141130_190122_inLine +BABEL_OP3_404_38431_20141130_190122_outLine +BABEL_OP3_404_41592_20141117_033328_inLine +BABEL_OP3_404_41592_20141117_033328_outLine +BABEL_OP3_404_41741_20141019_015552_inLine +BABEL_OP3_404_41741_20141019_015552_outLine +BABEL_OP3_404_42231_20141130_013425_inLine +BABEL_OP3_404_42231_20141130_013425_outLine +BABEL_OP3_404_42231_20141130_014628_inLine +BABEL_OP3_404_42231_20141130_014628_outLine +BABEL_OP3_404_42600_20141029_174857_inLine +BABEL_OP3_404_42600_20141029_174857_outLine +BABEL_OP3_404_44619_20141028_234639_inLine +BABEL_OP3_404_44619_20141028_234639_outLine +BABEL_OP3_404_46535_20150216_024618_inLine +BABEL_OP3_404_46535_20150216_024618_outLine +BABEL_OP3_404_46757_20141123_021510_inLine +BABEL_OP3_404_46757_20141123_021510_outLine +BABEL_OP3_404_47487_20141030_235808_inLine +BABEL_OP3_404_47487_20141030_235808_outLine +BABEL_OP3_404_47866_20150526_162411_inLine +BABEL_OP3_404_47866_20150526_162411_outLine +BABEL_OP3_404_47959_20141026_214447_inLine +BABEL_OP3_404_47959_20141026_214447_outLine +BABEL_OP3_404_51955_20141024_012212_inLine +BABEL_OP3_404_51955_20141024_012212_outLine +BABEL_OP3_404_51968_20141117_023015_inLine +BABEL_OP3_404_51968_20141117_023015_outLine +BABEL_OP3_404_52804_20141023_174815_inLine +BABEL_OP3_404_52804_20141023_174815_outLine +BABEL_OP3_404_54567_20141119_040337_inLine +BABEL_OP3_404_54567_20141119_040337_outLine +BABEL_OP3_404_56677_20141201_065523_inLine +BABEL_OP3_404_56677_20141201_065523_outLine +BABEL_OP3_404_56826_20141201_042429_inLine +BABEL_OP3_404_56826_20141201_042429_outLine +BABEL_OP3_404_58047_20141110_215330_inLine +BABEL_OP3_404_58047_20141110_215330_outLine +BABEL_OP3_404_58313_20141119_234202_inLine +BABEL_OP3_404_58313_20141119_234202_outLine +BABEL_OP3_404_59549_20141102_190355_inLine +BABEL_OP3_404_59549_20141102_190355_outLine +BABEL_OP3_404_60307_20150625_022621_inLine +BABEL_OP3_404_60307_20150625_022621_outLine +BABEL_OP3_404_61040_20141211_011552_inLine +BABEL_OP3_404_61040_20141211_011552_outLine +BABEL_OP3_404_61190_20141029_013447_inLine +BABEL_OP3_404_61190_20141029_013447_outLine +BABEL_OP3_404_64638_20141130_205157_inLine +BABEL_OP3_404_64638_20141130_205157_outLine +BABEL_OP3_404_66472_20141107_204602_inLine +BABEL_OP3_404_66472_20141107_204602_outLine +BABEL_OP3_404_66519_20141031_015751_inLine +BABEL_OP3_404_66519_20141031_015751_outLine +BABEL_OP3_404_67794_20141103_023323_inLine +BABEL_OP3_404_67794_20141103_023323_outLine +BABEL_OP3_404_73696_20150618_060036_inLine +BABEL_OP3_404_73696_20150618_060036_outLine +BABEL_OP3_404_73757_20141117_025704_inLine +BABEL_OP3_404_73757_20141117_025704_outLine +BABEL_OP3_404_74121_20141120_020705_inLine +BABEL_OP3_404_74121_20141120_020705_outLine +BABEL_OP3_404_80781_20141104_212234_inLine +BABEL_OP3_404_80781_20141104_212234_outLine +BABEL_OP3_404_80881_20141010_222135_inLine +BABEL_OP3_404_80881_20141010_222135_outLine +BABEL_OP3_404_81424_20141123_000421_inLine +BABEL_OP3_404_81424_20141123_000421_outLine +BABEL_OP3_404_87298_20141025_213601_inLine +BABEL_OP3_404_87298_20141025_213601_outLine +BABEL_OP3_404_87313_20141119_014632_inLine +BABEL_OP3_404_87313_20141119_014632_outLine +BABEL_OP3_404_87796_20141120_065537_inLine +BABEL_OP3_404_87796_20141120_065537_outLine +BABEL_OP3_404_87884_20141128_211555_inLine +BABEL_OP3_404_87884_20141128_211555_outLine +BABEL_OP3_404_88776_20141006_193621_inLine +BABEL_OP3_404_88776_20141006_193621_outLine +BABEL_OP3_404_91760_20150609_033824_inLine +BABEL_OP3_404_91760_20150609_033824_outLine +BABEL_OP3_404_91930_20150522_034521_inLine +BABEL_OP3_404_91930_20150522_034521_outLine +BABEL_OP3_404_92740_20141126_025242_inLine +BABEL_OP3_404_92740_20141126_025242_outLine +BABEL_OP3_404_97376_20141126_024552_inLine +BABEL_OP3_404_97376_20141126_024552_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/dev.list b/egs/babel/s5d/conf/lists/404-georgian/dev.list new file mode 100644 index 00000000000..a823552044c --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/dev.list @@ -0,0 +1,124 @@ +BABEL_OP3_404_10184_20141107_212406_inLine +BABEL_OP3_404_10184_20141107_212406_outLine +BABEL_OP3_404_12851_20141013_024620_inLine +BABEL_OP3_404_12851_20141013_024620_outLine +BABEL_OP3_404_16184_20141020_233508_inLine +BABEL_OP3_404_16184_20141020_233508_outLine +BABEL_OP3_404_17165_20141117_063008_inLine +BABEL_OP3_404_17165_20141117_063008_outLine +BABEL_OP3_404_17472_20141201_023731_inLine +BABEL_OP3_404_17472_20141201_023731_outLine +BABEL_OP3_404_18380_20141118_001754_inLine +BABEL_OP3_404_18380_20141118_001754_outLine +BABEL_OP3_404_18939_20141009_063127_inLine +BABEL_OP3_404_18939_20141009_063127_outLine +BABEL_OP3_404_22446_20141013_062554_inLine +BABEL_OP3_404_22446_20141013_062554_outLine +BABEL_OP3_404_22466_20141018_193129_inLine +BABEL_OP3_404_22466_20141018_193129_outLine +BABEL_OP3_404_22494_20141127_221208_inLine +BABEL_OP3_404_22494_20141127_221208_outLine +BABEL_OP3_404_22494_20141127_222057_inLine +BABEL_OP3_404_22494_20141127_222057_outLine +BABEL_OP3_404_23239_20141127_054155_inLine +BABEL_OP3_404_23239_20141127_054155_outLine +BABEL_OP3_404_24253_20150513_212152_inLine +BABEL_OP3_404_24253_20150513_212152_outLine +BABEL_OP3_404_24779_20150620_032949_inLine +BABEL_OP3_404_24779_20150620_032949_outLine +BABEL_OP3_404_26074_20141120_050650_inLine +BABEL_OP3_404_26074_20141120_050650_outLine +BABEL_OP3_404_28419_20141028_024104_inLine +BABEL_OP3_404_28419_20141028_024104_outLine +BABEL_OP3_404_33476_20141114_205102_inLine +BABEL_OP3_404_33476_20141114_205102_outLine +BABEL_OP3_404_34564_20141211_015413_inLine +BABEL_OP3_404_34564_20141211_015413_outLine +BABEL_OP3_404_35467_20141020_054030_inLine +BABEL_OP3_404_35467_20141020_054030_outLine +BABEL_OP3_404_38431_20141130_190122_inLine +BABEL_OP3_404_38431_20141130_190122_outLine +BABEL_OP3_404_41592_20141117_033328_inLine +BABEL_OP3_404_41592_20141117_033328_outLine +BABEL_OP3_404_41741_20141019_015552_inLine +BABEL_OP3_404_41741_20141019_015552_outLine +BABEL_OP3_404_42231_20141130_013425_inLine +BABEL_OP3_404_42231_20141130_013425_outLine +BABEL_OP3_404_42231_20141130_014628_inLine +BABEL_OP3_404_42231_20141130_014628_outLine +BABEL_OP3_404_42600_20141029_174857_inLine +BABEL_OP3_404_42600_20141029_174857_outLine +BABEL_OP3_404_44619_20141028_234639_inLine +BABEL_OP3_404_44619_20141028_234639_outLine +BABEL_OP3_404_46535_20150216_024618_inLine +BABEL_OP3_404_46535_20150216_024618_outLine +BABEL_OP3_404_46757_20141123_021510_inLine +BABEL_OP3_404_46757_20141123_021510_outLine +BABEL_OP3_404_47487_20141030_235808_inLine +BABEL_OP3_404_47487_20141030_235808_outLine +BABEL_OP3_404_47866_20150526_162411_inLine +BABEL_OP3_404_47866_20150526_162411_outLine +BABEL_OP3_404_47959_20141026_214447_inLine +BABEL_OP3_404_47959_20141026_214447_outLine +BABEL_OP3_404_51955_20141024_012212_inLine +BABEL_OP3_404_51955_20141024_012212_outLine +BABEL_OP3_404_51968_20141117_023015_inLine +BABEL_OP3_404_51968_20141117_023015_outLine +BABEL_OP3_404_52804_20141023_174815_inLine +BABEL_OP3_404_52804_20141023_174815_outLine +BABEL_OP3_404_54567_20141119_040337_inLine +BABEL_OP3_404_54567_20141119_040337_outLine +BABEL_OP3_404_56677_20141201_065523_inLine +BABEL_OP3_404_56677_20141201_065523_outLine +BABEL_OP3_404_56826_20141201_042429_inLine +BABEL_OP3_404_56826_20141201_042429_outLine +BABEL_OP3_404_58047_20141110_215330_inLine +BABEL_OP3_404_58047_20141110_215330_outLine +BABEL_OP3_404_58313_20141119_234202_inLine +BABEL_OP3_404_58313_20141119_234202_outLine +BABEL_OP3_404_59549_20141102_190355_inLine +BABEL_OP3_404_59549_20141102_190355_outLine +BABEL_OP3_404_60307_20150625_022621_inLine +BABEL_OP3_404_60307_20150625_022621_outLine +BABEL_OP3_404_61040_20141211_011552_inLine +BABEL_OP3_404_61040_20141211_011552_outLine +BABEL_OP3_404_61190_20141029_013447_inLine +BABEL_OP3_404_61190_20141029_013447_outLine +BABEL_OP3_404_64638_20141130_205157_inLine +BABEL_OP3_404_64638_20141130_205157_outLine +BABEL_OP3_404_66472_20141107_204602_inLine +BABEL_OP3_404_66472_20141107_204602_outLine +BABEL_OP3_404_66519_20141031_015751_inLine +BABEL_OP3_404_66519_20141031_015751_outLine +BABEL_OP3_404_67794_20141103_023323_inLine +BABEL_OP3_404_67794_20141103_023323_outLine +BABEL_OP3_404_73696_20150618_060036_inLine +BABEL_OP3_404_73696_20150618_060036_outLine +BABEL_OP3_404_73757_20141117_025704_inLine +BABEL_OP3_404_73757_20141117_025704_outLine +BABEL_OP3_404_74121_20141120_020705_inLine +BABEL_OP3_404_74121_20141120_020705_outLine +BABEL_OP3_404_80781_20141104_212234_inLine +BABEL_OP3_404_80781_20141104_212234_outLine +BABEL_OP3_404_80881_20141010_222135_inLine +BABEL_OP3_404_80881_20141010_222135_outLine +BABEL_OP3_404_81424_20141123_000421_inLine +BABEL_OP3_404_81424_20141123_000421_outLine +BABEL_OP3_404_87298_20141025_213601_inLine +BABEL_OP3_404_87298_20141025_213601_outLine +BABEL_OP3_404_87313_20141119_014632_inLine +BABEL_OP3_404_87313_20141119_014632_outLine +BABEL_OP3_404_87796_20141120_065537_inLine +BABEL_OP3_404_87796_20141120_065537_outLine +BABEL_OP3_404_87884_20141128_211555_inLine +BABEL_OP3_404_87884_20141128_211555_outLine +BABEL_OP3_404_88776_20141006_193621_inLine +BABEL_OP3_404_88776_20141006_193621_outLine +BABEL_OP3_404_91760_20150609_033824_inLine +BABEL_OP3_404_91760_20150609_033824_outLine +BABEL_OP3_404_91930_20150522_034521_inLine +BABEL_OP3_404_91930_20150522_034521_outLine +BABEL_OP3_404_92740_20141126_025242_inLine +BABEL_OP3_404_92740_20141126_025242_outLine +BABEL_OP3_404_97376_20141126_024552_inLine +BABEL_OP3_404_97376_20141126_024552_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/eval.list b/egs/babel/s5d/conf/lists/404-georgian/eval.list new file mode 100644 index 00000000000..d197b90ee2f --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/eval.list @@ -0,0 +1,956 @@ +BABEL_OP3_404_10036_20141030_200515_inLine +BABEL_OP3_404_10036_20141030_200515_outLine +BABEL_OP3_404_10188_20141021_043537_inLine +BABEL_OP3_404_10188_20141021_043537_outLine +BABEL_OP3_404_10319_20141015_010220_inLine +BABEL_OP3_404_10319_20141015_010220_outLine +BABEL_OP3_404_10319_20141015_011118_inLine +BABEL_OP3_404_10319_20141015_011118_outLine +BABEL_OP3_404_10482_20141130_013900_inLine +BABEL_OP3_404_10482_20141130_013900_outLine +BABEL_OP3_404_10524_20150518_002415_inLine +BABEL_OP3_404_10524_20150518_002415_outLine +BABEL_OP3_404_10901_20141120_172058_inLine +BABEL_OP3_404_10901_20141120_172058_outLine +BABEL_OP3_404_10966_20141027_000701_inLine +BABEL_OP3_404_10966_20141027_000701_outLine +BABEL_OP3_404_11419_20150212_050835_inLine +BABEL_OP3_404_11419_20150212_050835_outLine +BABEL_OP3_404_11419_20150212_051550_inLine +BABEL_OP3_404_11419_20150212_051550_outLine +BABEL_OP3_404_11581_20141110_223927_inLine +BABEL_OP3_404_11581_20141110_223927_outLine +BABEL_OP3_404_11797_20141019_195244_inLine +BABEL_OP3_404_11797_20141019_195244_outLine +BABEL_OP3_404_12321_20141211_055837_inLine +BABEL_OP3_404_12321_20141211_055837_outLine +BABEL_OP3_404_13040_20141024_004921_inLine +BABEL_OP3_404_13040_20141024_004921_outLine +BABEL_OP3_404_13427_20141107_220103_inLine +BABEL_OP3_404_13427_20141107_220103_outLine +BABEL_OP3_404_13483_20141128_002800_inLine +BABEL_OP3_404_13483_20141128_002800_outLine +BABEL_OP3_404_13490_20141118_023408_inLine +BABEL_OP3_404_13490_20141118_023408_outLine +BABEL_OP3_404_13561_20141115_003843_inLine +BABEL_OP3_404_13561_20141115_003843_outLine +BABEL_OP3_404_13586_20141106_180057_inLine +BABEL_OP3_404_13586_20141106_180057_outLine +BABEL_OP3_404_13744_20141021_043037_inLine +BABEL_OP3_404_13744_20141021_043037_outLine +BABEL_OP3_404_13792_20141011_010111_inLine +BABEL_OP3_404_13792_20141011_010111_outLine +BABEL_OP3_404_14097_20150211_010746_inLine +BABEL_OP3_404_14097_20150211_010746_outLine +BABEL_OP3_404_14179_20141201_063636_inLine +BABEL_OP3_404_14179_20141201_063636_outLine +BABEL_OP3_404_14228_20141130_062059_inLine +BABEL_OP3_404_14228_20141130_062059_outLine +BABEL_OP3_404_14560_20141201_073709_inLine +BABEL_OP3_404_14560_20141201_073709_outLine +BABEL_OP3_404_14719_20141201_014614_inLine +BABEL_OP3_404_14719_20141201_014614_outLine +BABEL_OP3_404_14725_20141013_005356_inLine +BABEL_OP3_404_14725_20141013_005356_outLine +BABEL_OP3_404_15163_20141115_035641_inLine +BABEL_OP3_404_15163_20141115_035641_outLine +BABEL_OP3_404_15322_20150512_231817_inLine +BABEL_OP3_404_15322_20150512_231817_outLine +BABEL_OP3_404_15324_20141120_031528_inLine +BABEL_OP3_404_15324_20141120_031528_outLine +BABEL_OP3_404_15702_20141129_051812_inLine +BABEL_OP3_404_15702_20141129_051812_outLine +BABEL_OP3_404_15730_20141021_055606_inLine +BABEL_OP3_404_15730_20141021_055606_outLine +BABEL_OP3_404_15926_20141124_004339_inLine +BABEL_OP3_404_15926_20141124_004339_outLine +BABEL_OP3_404_15926_20141124_005513_inLine +BABEL_OP3_404_15926_20141124_005513_outLine +BABEL_OP3_404_16056_20141009_005123_inLine +BABEL_OP3_404_16056_20141009_005123_outLine +BABEL_OP3_404_16787_20141120_174312_inLine +BABEL_OP3_404_16787_20141120_174312_outLine +BABEL_OP3_404_16800_20141212_184132_inLine +BABEL_OP3_404_16800_20141212_184132_outLine +BABEL_OP3_404_16800_20141212_185849_inLine +BABEL_OP3_404_16800_20141212_185849_outLine +BABEL_OP3_404_16886_20141117_002313_inLine +BABEL_OP3_404_16886_20141117_002313_outLine +BABEL_OP3_404_16886_20141117_003801_inLine +BABEL_OP3_404_16886_20141117_003801_outLine +BABEL_OP3_404_16924_20141201_020122_inLine +BABEL_OP3_404_16924_20141201_020122_outLine +BABEL_OP3_404_16938_20141118_045730_inLine +BABEL_OP3_404_16938_20141118_045730_outLine +BABEL_OP3_404_17032_20141128_030249_inLine +BABEL_OP3_404_17032_20141128_030249_outLine +BABEL_OP3_404_17440_20141127_041844_inLine +BABEL_OP3_404_17440_20141127_041844_outLine +BABEL_OP3_404_17496_20141130_022805_inLine +BABEL_OP3_404_17496_20141130_022805_outLine +BABEL_OP3_404_17751_20150611_030539_inLine +BABEL_OP3_404_17751_20150611_030539_outLine +BABEL_OP3_404_17881_20150524_231317_inLine +BABEL_OP3_404_17881_20150524_231317_outLine +BABEL_OP3_404_17914_20150526_054931_inLine +BABEL_OP3_404_17914_20150526_054931_outLine +BABEL_OP3_404_18280_20150213_011322_inLine +BABEL_OP3_404_18280_20150213_011322_outLine +BABEL_OP3_404_18370_20150210_194727_inLine +BABEL_OP3_404_18370_20150210_194727_outLine +BABEL_OP3_404_18924_20141110_211055_inLine +BABEL_OP3_404_18924_20141110_211055_outLine +BABEL_OP3_404_19101_20141113_042102_inLine +BABEL_OP3_404_19101_20141113_042102_outLine +BABEL_OP3_404_19545_20141107_223152_inLine +BABEL_OP3_404_19545_20141107_223152_outLine +BABEL_OP3_404_19621_20141201_041129_inLine +BABEL_OP3_404_19621_20141201_041129_outLine +BABEL_OP3_404_19672_20141124_015046_inLine +BABEL_OP3_404_19672_20141124_015046_outLine +BABEL_OP3_404_19722_20141006_033717_inLine +BABEL_OP3_404_19722_20141006_033717_outLine +BABEL_OP3_404_19782_20141201_231608_inLine +BABEL_OP3_404_19782_20141201_231608_outLine +BABEL_OP3_404_19818_20141124_044516_inLine +BABEL_OP3_404_19818_20141124_044516_outLine +BABEL_OP3_404_20367_20150618_055644_inLine +BABEL_OP3_404_20367_20150618_055644_outLine +BABEL_OP3_404_20682_20141211_044056_inLine +BABEL_OP3_404_20682_20141211_044056_outLine +BABEL_OP3_404_20682_20141211_045257_inLine +BABEL_OP3_404_20682_20141211_045257_outLine +BABEL_OP3_404_20738_20150503_191409_inLine +BABEL_OP3_404_20738_20150503_191409_outLine +BABEL_OP3_404_20768_20141207_081305_inLine +BABEL_OP3_404_20768_20141207_081305_outLine +BABEL_OP3_404_20800_20141022_192312_inLine +BABEL_OP3_404_20800_20141022_192312_outLine +BABEL_OP3_404_20916_20141006_192451_inLine +BABEL_OP3_404_20916_20141006_192451_outLine +BABEL_OP3_404_21029_20141105_033902_inLine +BABEL_OP3_404_21029_20141105_033902_outLine +BABEL_OP3_404_21206_20141024_194128_inLine +BABEL_OP3_404_21206_20141024_194128_outLine +BABEL_OP3_404_21624_20150525_034841_inLine +BABEL_OP3_404_21624_20150525_034841_outLine +BABEL_OP3_404_21794_20141115_220258_inLine +BABEL_OP3_404_21794_20141115_220258_outLine +BABEL_OP3_404_22021_20150217_213437_inLine +BABEL_OP3_404_22021_20150217_213437_outLine +BABEL_OP3_404_22021_20150220_194248_inLine +BABEL_OP3_404_22021_20150220_194248_outLine +BABEL_OP3_404_22034_20150211_165126_inLine +BABEL_OP3_404_22034_20150211_165126_outLine +BABEL_OP3_404_22170_20150528_002541_inLine +BABEL_OP3_404_22170_20150528_002541_outLine +BABEL_OP3_404_22216_20141020_051333_inLine +BABEL_OP3_404_22216_20141020_051333_outLine +BABEL_OP3_404_22321_20141019_214812_inLine +BABEL_OP3_404_22321_20141019_214812_outLine +BABEL_OP3_404_22612_20141201_080517_inLine +BABEL_OP3_404_22612_20141201_080517_outLine +BABEL_OP3_404_22641_20141021_165119_inLine +BABEL_OP3_404_22641_20141021_165119_outLine +BABEL_OP3_404_22965_20141101_192617_inLine +BABEL_OP3_404_22965_20141101_192617_outLine +BABEL_OP3_404_23006_20141026_211155_inLine +BABEL_OP3_404_23006_20141026_211155_outLine +BABEL_OP3_404_23092_20141129_005335_inLine +BABEL_OP3_404_23092_20141129_005335_outLine +BABEL_OP3_404_23153_20141118_015224_inLine +BABEL_OP3_404_23153_20141118_015224_outLine +BABEL_OP3_404_23628_20141027_170345_inLine +BABEL_OP3_404_23628_20141027_170345_outLine +BABEL_OP3_404_24017_20141211_021947_inLine +BABEL_OP3_404_24017_20141211_021947_outLine +BABEL_OP3_404_24290_20150515_164252_inLine +BABEL_OP3_404_24290_20150515_164252_outLine +BABEL_OP3_404_24569_20141130_214924_inLine +BABEL_OP3_404_24569_20141130_214924_outLine +BABEL_OP3_404_24605_20141013_043620_inLine +BABEL_OP3_404_24605_20141013_043620_outLine +BABEL_OP3_404_25698_20150611_021501_inLine +BABEL_OP3_404_25698_20150611_021501_outLine +BABEL_OP3_404_25767_20141009_211814_inLine +BABEL_OP3_404_25767_20141009_211814_outLine +BABEL_OP3_404_26206_20141128_031139_inLine +BABEL_OP3_404_26206_20141128_031139_outLine +BABEL_OP3_404_26999_20141130_004320_inLine +BABEL_OP3_404_26999_20141130_004320_outLine +BABEL_OP3_404_27082_20141119_041436_inLine +BABEL_OP3_404_27082_20141119_041436_outLine +BABEL_OP3_404_27125_20141007_032335_inLine +BABEL_OP3_404_27125_20141007_032335_outLine +BABEL_OP3_404_27478_20150514_205232_inLine +BABEL_OP3_404_27478_20150514_205232_outLine +BABEL_OP3_404_28422_20141124_055809_inLine +BABEL_OP3_404_28422_20141124_055809_outLine +BABEL_OP3_404_28606_20141127_011719_inLine +BABEL_OP3_404_28606_20141127_011719_outLine +BABEL_OP3_404_28775_20141028_193907_inLine +BABEL_OP3_404_28775_20141028_193907_outLine +BABEL_OP3_404_29023_20141024_225827_inLine +BABEL_OP3_404_29023_20141024_225827_outLine +BABEL_OP3_404_29072_20141128_023212_inLine +BABEL_OP3_404_29072_20141128_023212_outLine +BABEL_OP3_404_29135_20141022_182050_inLine +BABEL_OP3_404_29135_20141022_182050_outLine +BABEL_OP3_404_29168_20141023_013832_inLine +BABEL_OP3_404_29168_20141023_013832_outLine +BABEL_OP3_404_29352_20150618_035033_inLine +BABEL_OP3_404_29352_20150618_035033_outLine +BABEL_OP3_404_29352_20150618_041025_inLine +BABEL_OP3_404_29352_20150618_041025_outLine +BABEL_OP3_404_29685_20141103_223309_inLine +BABEL_OP3_404_29685_20141103_223309_outLine +BABEL_OP3_404_29765_20150616_155830_inLine +BABEL_OP3_404_29765_20150616_155830_outLine +BABEL_OP3_404_30013_20141127_211853_inLine +BABEL_OP3_404_30013_20141127_211853_outLine +BABEL_OP3_404_30058_20150514_024957_inLine +BABEL_OP3_404_30058_20150514_024957_outLine +BABEL_OP3_404_30180_20141118_011806_inLine +BABEL_OP3_404_30180_20141118_011806_outLine +BABEL_OP3_404_30253_20141201_051926_inLine +BABEL_OP3_404_30253_20141201_051926_outLine +BABEL_OP3_404_30395_20141106_185545_inLine +BABEL_OP3_404_30395_20141106_185545_outLine +BABEL_OP3_404_31039_20150217_050120_inLine +BABEL_OP3_404_31039_20150217_050120_outLine +BABEL_OP3_404_31039_20150217_051317_inLine +BABEL_OP3_404_31039_20150217_051317_outLine +BABEL_OP3_404_31074_20150121_022649_inLine +BABEL_OP3_404_31074_20150121_022649_outLine +BABEL_OP3_404_31184_20141118_183536_inLine +BABEL_OP3_404_31184_20141118_183536_outLine +BABEL_OP3_404_31490_20141022_200135_inLine +BABEL_OP3_404_31490_20141022_200135_outLine +BABEL_OP3_404_31583_20141130_004731_inLine +BABEL_OP3_404_31583_20141130_004731_outLine +BABEL_OP3_404_31628_20141202_000346_inLine +BABEL_OP3_404_31628_20141202_000346_outLine +BABEL_OP3_404_32097_20141006_221638_inLine +BABEL_OP3_404_32097_20141006_221638_outLine +BABEL_OP3_404_32244_20150609_043200_inLine +BABEL_OP3_404_32244_20150609_043200_outLine +BABEL_OP3_404_32301_20141126_204138_inLine +BABEL_OP3_404_32301_20141126_204138_outLine +BABEL_OP3_404_33111_20150528_004829_inLine +BABEL_OP3_404_33111_20150528_004829_outLine +BABEL_OP3_404_33251_20141119_205146_inLine +BABEL_OP3_404_33251_20141119_205146_outLine +BABEL_OP3_404_33273_20141105_213401_inLine +BABEL_OP3_404_33273_20141105_213401_outLine +BABEL_OP3_404_33497_20141119_051436_inLine +BABEL_OP3_404_33497_20141119_051436_outLine +BABEL_OP3_404_33635_20141106_005750_inLine +BABEL_OP3_404_33635_20141106_005750_outLine +BABEL_OP3_404_33672_20141014_004055_inLine +BABEL_OP3_404_33672_20141014_004055_outLine +BABEL_OP3_404_33672_20141014_005233_inLine +BABEL_OP3_404_33672_20141014_005233_outLine +BABEL_OP3_404_33951_20141119_072531_inLine +BABEL_OP3_404_33951_20141119_072531_outLine +BABEL_OP3_404_34197_20141018_201528_inLine +BABEL_OP3_404_34197_20141018_201528_outLine +BABEL_OP3_404_34336_20141027_211535_inLine +BABEL_OP3_404_34336_20141027_211535_outLine +BABEL_OP3_404_34477_20141027_184645_inLine +BABEL_OP3_404_34477_20141027_184645_outLine +BABEL_OP3_404_34903_20141124_020719_inLine +BABEL_OP3_404_34903_20141124_020719_outLine +BABEL_OP3_404_35139_20141023_224322_inLine +BABEL_OP3_404_35139_20141023_224322_outLine +BABEL_OP3_404_35202_20141128_053756_inLine +BABEL_OP3_404_35202_20141128_053756_outLine +BABEL_OP3_404_35885_20150518_015426_inLine +BABEL_OP3_404_35885_20150518_015426_outLine +BABEL_OP3_404_36293_20141006_004659_inLine +BABEL_OP3_404_36293_20141006_004659_outLine +BABEL_OP3_404_36341_20141021_045218_inLine +BABEL_OP3_404_36341_20141021_045218_outLine +BABEL_OP3_404_36669_20141116_050542_inLine +BABEL_OP3_404_36669_20141116_050542_outLine +BABEL_OP3_404_36894_20141009_013557_inLine +BABEL_OP3_404_36894_20141009_013557_outLine +BABEL_OP3_404_36990_20141117_041052_inLine +BABEL_OP3_404_36990_20141117_041052_outLine +BABEL_OP3_404_37068_20150212_050250_inLine +BABEL_OP3_404_37068_20150212_050250_outLine +BABEL_OP3_404_37285_20141128_060822_inLine +BABEL_OP3_404_37285_20141128_060822_outLine +BABEL_OP3_404_37684_20150211_031551_inLine +BABEL_OP3_404_37684_20150211_031551_outLine +BABEL_OP3_404_38076_20141129_030136_inLine +BABEL_OP3_404_38076_20141129_030136_outLine +BABEL_OP3_404_38689_20141128_235841_inLine +BABEL_OP3_404_38689_20141128_235841_outLine +BABEL_OP3_404_38741_20141028_190310_inLine +BABEL_OP3_404_38741_20141028_190310_outLine +BABEL_OP3_404_38750_20141130_052516_inLine +BABEL_OP3_404_38750_20141130_052516_outLine +BABEL_OP3_404_38878_20141118_224023_inLine +BABEL_OP3_404_38878_20141118_224023_outLine +BABEL_OP3_404_39006_20150617_032943_inLine +BABEL_OP3_404_39006_20150617_032943_outLine +BABEL_OP3_404_39159_20141021_033733_inLine +BABEL_OP3_404_39159_20141021_033733_outLine +BABEL_OP3_404_39848_20141113_234103_inLine +BABEL_OP3_404_39848_20141113_234103_outLine +BABEL_OP3_404_40565_20141126_191549_inLine +BABEL_OP3_404_40565_20141126_191549_outLine +BABEL_OP3_404_41038_20141201_070557_inLine +BABEL_OP3_404_41038_20141201_070557_outLine +BABEL_OP3_404_41174_20141117_033354_inLine +BABEL_OP3_404_41174_20141117_033354_outLine +BABEL_OP3_404_41442_20141201_065524_inLine +BABEL_OP3_404_41442_20141201_065524_outLine +BABEL_OP3_404_41469_20141015_041032_inLine +BABEL_OP3_404_41469_20141015_041032_outLine +BABEL_OP3_404_41493_20141007_192601_inLine +BABEL_OP3_404_41493_20141007_192601_outLine +BABEL_OP3_404_41618_20141114_232533_inLine +BABEL_OP3_404_41618_20141114_232533_outLine +BABEL_OP3_404_41890_20150516_214915_inLine +BABEL_OP3_404_41890_20150516_214915_outLine +BABEL_OP3_404_42146_20150524_225524_inLine +BABEL_OP3_404_42146_20150524_225524_outLine +BABEL_OP3_404_42434_20141101_015900_inLine +BABEL_OP3_404_42434_20141101_015900_outLine +BABEL_OP3_404_42718_20150514_042601_inLine +BABEL_OP3_404_42718_20150514_042601_outLine +BABEL_OP3_404_42771_20141119_032738_inLine +BABEL_OP3_404_42771_20141119_032738_outLine +BABEL_OP3_404_42942_20141105_231330_inLine +BABEL_OP3_404_42942_20141105_231330_outLine +BABEL_OP3_404_42991_20141201_174138_inLine +BABEL_OP3_404_42991_20141201_174138_outLine +BABEL_OP3_404_43115_20150518_051249_inLine +BABEL_OP3_404_43115_20150518_051249_outLine +BABEL_OP3_404_43285_20141127_224948_inLine +BABEL_OP3_404_43285_20141127_224948_outLine +BABEL_OP3_404_43286_20141011_233252_inLine +BABEL_OP3_404_43286_20141011_233252_outLine +BABEL_OP3_404_43646_20141011_031534_inLine +BABEL_OP3_404_43646_20141011_031534_outLine +BABEL_OP3_404_43784_20141101_215816_inLine +BABEL_OP3_404_43784_20141101_215816_outLine +BABEL_OP3_404_43784_20141101_220445_inLine +BABEL_OP3_404_43784_20141101_220445_outLine +BABEL_OP3_404_43784_20141101_222312_inLine +BABEL_OP3_404_43784_20141101_222312_outLine +BABEL_OP3_404_43788_20141125_190621_inLine +BABEL_OP3_404_43788_20141125_190621_outLine +BABEL_OP3_404_43920_20141128_232903_inLine +BABEL_OP3_404_43920_20141128_232903_outLine +BABEL_OP3_404_44255_20150525_073716_inLine +BABEL_OP3_404_44255_20150525_073716_outLine +BABEL_OP3_404_44420_20141025_211032_inLine +BABEL_OP3_404_44420_20141025_211032_outLine +BABEL_OP3_404_44531_20150527_015805_inLine +BABEL_OP3_404_44531_20150527_015805_outLine +BABEL_OP3_404_44709_20141126_024811_inLine +BABEL_OP3_404_44709_20141126_024811_outLine +BABEL_OP3_404_44868_20141123_032254_inLine +BABEL_OP3_404_44868_20141123_032254_outLine +BABEL_OP3_404_45642_20141011_233950_inLine +BABEL_OP3_404_45642_20141011_233950_outLine +BABEL_OP3_404_45770_20141009_185730_inLine +BABEL_OP3_404_45770_20141009_185730_outLine +BABEL_OP3_404_45777_20141028_195713_inLine +BABEL_OP3_404_45777_20141028_195713_outLine +BABEL_OP3_404_45843_20141124_042608_inLine +BABEL_OP3_404_45843_20141124_042608_outLine +BABEL_OP3_404_46008_20150525_024936_inLine +BABEL_OP3_404_46008_20150525_024936_outLine +BABEL_OP3_404_46261_20141117_200301_inLine +BABEL_OP3_404_46261_20141117_200301_outLine +BABEL_OP3_404_46389_20150216_043700_inLine +BABEL_OP3_404_46389_20150216_043700_outLine +BABEL_OP3_404_46558_20141020_013256_inLine +BABEL_OP3_404_46558_20141020_013256_outLine +BABEL_OP3_404_46589_20141126_010932_inLine +BABEL_OP3_404_46589_20141126_010932_outLine +BABEL_OP3_404_46702_20141021_004925_inLine +BABEL_OP3_404_46702_20141021_004925_outLine +BABEL_OP3_404_47110_20150211_041423_inLine +BABEL_OP3_404_47110_20150211_041423_outLine +BABEL_OP3_404_47186_20141130_032126_inLine +BABEL_OP3_404_47186_20141130_032126_outLine +BABEL_OP3_404_47215_20141016_012848_inLine +BABEL_OP3_404_47215_20141016_012848_outLine +BABEL_OP3_404_47283_20141105_063730_inLine +BABEL_OP3_404_47283_20141105_063730_outLine +BABEL_OP3_404_47451_20141201_044107_inLine +BABEL_OP3_404_47451_20141201_044107_outLine +BABEL_OP3_404_47451_20141201_045923_inLine +BABEL_OP3_404_47451_20141201_045923_outLine +BABEL_OP3_404_47878_20141115_030044_inLine +BABEL_OP3_404_47878_20141115_030044_outLine +BABEL_OP3_404_48789_20141130_013950_inLine +BABEL_OP3_404_48789_20141130_013950_outLine +BABEL_OP3_404_49001_20141102_054949_inLine +BABEL_OP3_404_49001_20141102_054949_outLine +BABEL_OP3_404_49216_20141023_021720_inLine +BABEL_OP3_404_49216_20141023_021720_outLine +BABEL_OP3_404_49287_20141201_003931_inLine +BABEL_OP3_404_49287_20141201_003931_outLine +BABEL_OP3_404_49502_20141012_055001_inLine +BABEL_OP3_404_49502_20141012_055001_outLine +BABEL_OP3_404_49637_20141006_052951_inLine +BABEL_OP3_404_49637_20141006_052951_outLine +BABEL_OP3_404_50090_20141119_215921_inLine +BABEL_OP3_404_50090_20141119_215921_outLine +BABEL_OP3_404_50427_20141108_184045_inLine +BABEL_OP3_404_50427_20141108_184045_outLine +BABEL_OP3_404_50630_20141123_224108_inLine +BABEL_OP3_404_50630_20141123_224108_outLine +BABEL_OP3_404_50681_20141119_074034_inLine +BABEL_OP3_404_50681_20141119_074034_outLine +BABEL_OP3_404_50726_20141021_005526_inLine +BABEL_OP3_404_50726_20141021_005526_outLine +BABEL_OP3_404_50958_20141118_184358_inLine +BABEL_OP3_404_50958_20141118_184358_outLine +BABEL_OP3_404_50958_20141118_185604_inLine +BABEL_OP3_404_50958_20141118_185604_outLine +BABEL_OP3_404_50962_20141107_060744_inLine +BABEL_OP3_404_50962_20141107_060744_outLine +BABEL_OP3_404_51407_20141117_062029_inLine +BABEL_OP3_404_51407_20141117_062029_outLine +BABEL_OP3_404_51611_20141022_024919_inLine +BABEL_OP3_404_51611_20141022_024919_outLine +BABEL_OP3_404_51819_20141126_211917_inLine +BABEL_OP3_404_51819_20141126_211917_outLine +BABEL_OP3_404_52272_20141006_031940_inLine +BABEL_OP3_404_52272_20141006_031940_outLine +BABEL_OP3_404_52438_20141104_034612_inLine +BABEL_OP3_404_52438_20141104_034612_outLine +BABEL_OP3_404_52442_20141109_004908_inLine +BABEL_OP3_404_52442_20141109_004908_outLine +BABEL_OP3_404_52614_20150503_200805_inLine +BABEL_OP3_404_52614_20150503_200805_outLine +BABEL_OP3_404_52694_20141121_043410_inLine +BABEL_OP3_404_52694_20141121_043410_outLine +BABEL_OP3_404_52717_20141014_234034_inLine +BABEL_OP3_404_52717_20141014_234034_outLine +BABEL_OP3_404_52818_20141130_231525_inLine +BABEL_OP3_404_52818_20141130_231525_outLine +BABEL_OP3_404_52932_20141101_234724_inLine +BABEL_OP3_404_52932_20141101_234724_outLine +BABEL_OP3_404_53419_20141201_030819_inLine +BABEL_OP3_404_53419_20141201_030819_outLine +BABEL_OP3_404_53842_20141119_044935_inLine +BABEL_OP3_404_53842_20141119_044935_outLine +BABEL_OP3_404_54074_20141129_060147_inLine +BABEL_OP3_404_54074_20141129_060147_outLine +BABEL_OP3_404_54162_20141119_032442_inLine +BABEL_OP3_404_54162_20141119_032442_outLine +BABEL_OP3_404_54390_20141028_230702_inLine +BABEL_OP3_404_54390_20141028_230702_outLine +BABEL_OP3_404_54530_20141130_011651_inLine +BABEL_OP3_404_54530_20141130_011651_outLine +BABEL_OP3_404_54697_20141201_053854_inLine +BABEL_OP3_404_54697_20141201_053854_outLine +BABEL_OP3_404_54953_20141115_022411_inLine +BABEL_OP3_404_54953_20141115_022411_outLine +BABEL_OP3_404_55742_20141102_071943_inLine +BABEL_OP3_404_55742_20141102_071943_outLine +BABEL_OP3_404_55818_20141014_062259_inLine +BABEL_OP3_404_55818_20141014_062259_outLine +BABEL_OP3_404_55950_20150502_234657_inLine +BABEL_OP3_404_55950_20150502_234657_outLine +BABEL_OP3_404_55968_20141009_231223_inLine +BABEL_OP3_404_55968_20141009_231223_outLine +BABEL_OP3_404_56090_20141019_172050_inLine +BABEL_OP3_404_56090_20141019_172050_outLine +BABEL_OP3_404_56198_20141103_031752_inLine +BABEL_OP3_404_56198_20141103_031752_outLine +BABEL_OP3_404_56307_20141201_210608_inLine +BABEL_OP3_404_56307_20141201_210608_outLine +BABEL_OP3_404_56370_20141010_013542_inLine +BABEL_OP3_404_56370_20141010_013542_outLine +BABEL_OP3_404_56429_20141024_003551_inLine +BABEL_OP3_404_56429_20141024_003551_outLine +BABEL_OP3_404_56523_20141114_215534_inLine +BABEL_OP3_404_56523_20141114_215534_outLine +BABEL_OP3_404_56720_20141129_182808_inLine +BABEL_OP3_404_56720_20141129_182808_outLine +BABEL_OP3_404_56720_20141129_183649_inLine +BABEL_OP3_404_56720_20141129_183649_outLine +BABEL_OP3_404_57093_20141118_034107_inLine +BABEL_OP3_404_57093_20141118_034107_outLine +BABEL_OP3_404_57116_20141008_023139_inLine +BABEL_OP3_404_57116_20141008_023139_outLine +BABEL_OP3_404_57529_20141201_050129_inLine +BABEL_OP3_404_57529_20141201_050129_outLine +BABEL_OP3_404_57548_20141119_194430_inLine +BABEL_OP3_404_57548_20141119_194430_outLine +BABEL_OP3_404_57609_20141117_063904_inLine +BABEL_OP3_404_57609_20141117_063904_outLine +BABEL_OP3_404_57609_20141119_223552_inLine +BABEL_OP3_404_57609_20141119_223552_outLine +BABEL_OP3_404_57922_20141119_172249_inLine +BABEL_OP3_404_57922_20141119_172249_outLine +BABEL_OP3_404_57935_20141122_233816_inLine +BABEL_OP3_404_57935_20141122_233816_outLine +BABEL_OP3_404_58107_20141107_223929_inLine +BABEL_OP3_404_58107_20141107_223929_outLine +BABEL_OP3_404_58145_20141120_014653_inLine +BABEL_OP3_404_58145_20141120_014653_outLine +BABEL_OP3_404_58489_20141201_035927_inLine +BABEL_OP3_404_58489_20141201_035927_outLine +BABEL_OP3_404_58717_20141106_221300_inLine +BABEL_OP3_404_58717_20141106_221300_outLine +BABEL_OP3_404_58734_20141019_223233_inLine +BABEL_OP3_404_58734_20141019_223233_outLine +BABEL_OP3_404_58815_20141129_230108_inLine +BABEL_OP3_404_58815_20141129_230108_outLine +BABEL_OP3_404_58821_20141128_224222_inLine +BABEL_OP3_404_58821_20141128_224222_outLine +BABEL_OP3_404_58850_20141116_234915_inLine +BABEL_OP3_404_58850_20141116_234915_outLine +BABEL_OP3_404_58926_20141105_025457_inLine +BABEL_OP3_404_58926_20141105_025457_outLine +BABEL_OP3_404_59163_20150212_233430_inLine +BABEL_OP3_404_59163_20150212_233430_outLine +BABEL_OP3_404_59291_20141129_223855_inLine +BABEL_OP3_404_59291_20141129_223855_outLine +BABEL_OP3_404_59509_20141120_010036_inLine +BABEL_OP3_404_59509_20141120_010036_outLine +BABEL_OP3_404_59747_20141020_002625_inLine +BABEL_OP3_404_59747_20141020_002625_outLine +BABEL_OP3_404_59928_20141107_063850_inLine +BABEL_OP3_404_59928_20141107_063850_outLine +BABEL_OP3_404_59993_20141102_204023_inLine +BABEL_OP3_404_59993_20141102_204023_outLine +BABEL_OP3_404_60115_20141123_045055_inLine +BABEL_OP3_404_60115_20141123_045055_outLine +BABEL_OP3_404_60418_20141201_012853_inLine +BABEL_OP3_404_60418_20141201_012853_outLine +BABEL_OP3_404_60538_20141010_000421_inLine +BABEL_OP3_404_60538_20141010_000421_outLine +BABEL_OP3_404_60661_20141023_185331_inLine +BABEL_OP3_404_60661_20141023_185331_outLine +BABEL_OP3_404_60830_20141119_050849_inLine +BABEL_OP3_404_60830_20141119_050849_outLine +BABEL_OP3_404_60836_20141026_014449_inLine +BABEL_OP3_404_60836_20141026_014449_outLine +BABEL_OP3_404_61011_20141022_235244_inLine +BABEL_OP3_404_61011_20141022_235244_outLine +BABEL_OP3_404_61357_20141118_052326_inLine +BABEL_OP3_404_61357_20141118_052326_outLine +BABEL_OP3_404_61731_20141026_185743_inLine +BABEL_OP3_404_61731_20141026_185743_outLine +BABEL_OP3_404_62014_20141120_021455_inLine +BABEL_OP3_404_62014_20141120_021455_outLine +BABEL_OP3_404_62177_20150503_025324_inLine +BABEL_OP3_404_62177_20150503_025324_outLine +BABEL_OP3_404_62200_20141115_024033_inLine +BABEL_OP3_404_62200_20141115_024033_outLine +BABEL_OP3_404_62289_20150526_045908_inLine +BABEL_OP3_404_62289_20150526_045908_outLine +BABEL_OP3_404_62430_20150526_181036_inLine +BABEL_OP3_404_62430_20150526_181036_outLine +BABEL_OP3_404_62434_20141019_201121_inLine +BABEL_OP3_404_62434_20141019_201121_outLine +BABEL_OP3_404_62656_20150119_185511_inLine +BABEL_OP3_404_62656_20150119_185511_outLine +BABEL_OP3_404_62800_20141020_020318_inLine +BABEL_OP3_404_62800_20141020_020318_outLine +BABEL_OP3_404_62835_20141119_043323_inLine +BABEL_OP3_404_62835_20141119_043323_outLine +BABEL_OP3_404_62976_20141119_061748_inLine +BABEL_OP3_404_62976_20141119_061748_outLine +BABEL_OP3_404_63307_20141119_192444_inLine +BABEL_OP3_404_63307_20141119_192444_outLine +BABEL_OP3_404_63445_20141021_013007_inLine +BABEL_OP3_404_63445_20141021_013007_outLine +BABEL_OP3_404_63523_20150512_050203_inLine +BABEL_OP3_404_63523_20150512_050203_outLine +BABEL_OP3_404_63604_20141011_021042_inLine +BABEL_OP3_404_63604_20141011_021042_outLine +BABEL_OP3_404_63787_20141010_225937_inLine +BABEL_OP3_404_63787_20141010_225937_outLine +BABEL_OP3_404_63938_20150526_052814_inLine +BABEL_OP3_404_63938_20150526_052814_outLine +BABEL_OP3_404_64350_20141022_195842_inLine +BABEL_OP3_404_64350_20141022_195842_outLine +BABEL_OP3_404_64398_20141126_031756_inLine +BABEL_OP3_404_64398_20141126_031756_outLine +BABEL_OP3_404_64902_20150522_041540_inLine +BABEL_OP3_404_64902_20150522_041540_outLine +BABEL_OP3_404_65064_20141127_003631_inLine +BABEL_OP3_404_65064_20141127_003631_outLine +BABEL_OP3_404_65077_20141015_025834_inLine +BABEL_OP3_404_65077_20141015_025834_outLine +BABEL_OP3_404_65466_20150524_182317_inLine +BABEL_OP3_404_65466_20150524_182317_outLine +BABEL_OP3_404_65477_20141115_020305_inLine +BABEL_OP3_404_65477_20141115_020305_outLine +BABEL_OP3_404_65692_20141117_074414_inLine +BABEL_OP3_404_65692_20141117_074414_outLine +BABEL_OP3_404_65723_20141102_051040_inLine +BABEL_OP3_404_65723_20141102_051040_outLine +BABEL_OP3_404_65882_20141024_191236_inLine +BABEL_OP3_404_65882_20141024_191236_outLine +BABEL_OP3_404_66001_20141006_015944_inLine +BABEL_OP3_404_66001_20141006_015944_outLine +BABEL_OP3_404_66026_20141130_061639_inLine +BABEL_OP3_404_66026_20141130_061639_outLine +BABEL_OP3_404_66350_20150212_043953_inLine +BABEL_OP3_404_66350_20150212_043953_outLine +BABEL_OP3_404_66959_20141130_212725_inLine +BABEL_OP3_404_66959_20141130_212725_outLine +BABEL_OP3_404_66975_20150119_001417_inLine +BABEL_OP3_404_66975_20150119_001417_outLine +BABEL_OP3_404_67066_20150611_043029_inLine +BABEL_OP3_404_67066_20150611_043029_outLine +BABEL_OP3_404_67283_20141008_234315_inLine +BABEL_OP3_404_67283_20141008_234315_outLine +BABEL_OP3_404_67373_20141106_191525_inLine +BABEL_OP3_404_67373_20141106_191525_outLine +BABEL_OP3_404_67373_20141106_192955_inLine +BABEL_OP3_404_67373_20141106_192955_outLine +BABEL_OP3_404_67622_20141021_002234_inLine +BABEL_OP3_404_67622_20141021_002234_outLine +BABEL_OP3_404_67659_20141101_010904_inLine +BABEL_OP3_404_67659_20141101_010904_outLine +BABEL_OP3_404_67964_20150515_011635_inLine +BABEL_OP3_404_67964_20150515_011635_outLine +BABEL_OP3_404_68040_20141118_235516_inLine +BABEL_OP3_404_68040_20141118_235516_outLine +BABEL_OP3_404_68748_20141123_003226_inLine +BABEL_OP3_404_68748_20141123_003226_outLine +BABEL_OP3_404_68854_20150512_025452_inLine +BABEL_OP3_404_68854_20150512_025452_outLine +BABEL_OP3_404_68924_20141119_025325_inLine +BABEL_OP3_404_68924_20141119_025325_outLine +BABEL_OP3_404_69992_20141014_035441_inLine +BABEL_OP3_404_69992_20141014_035441_outLine +BABEL_OP3_404_70110_20141020_043016_inLine +BABEL_OP3_404_70110_20141020_043016_outLine +BABEL_OP3_404_70251_20141009_221726_inLine +BABEL_OP3_404_70251_20141009_221726_outLine +BABEL_OP3_404_70293_20150118_220441_inLine +BABEL_OP3_404_70293_20150118_220441_outLine +BABEL_OP3_404_70343_20141126_030147_inLine +BABEL_OP3_404_70343_20141126_030147_outLine +BABEL_OP3_404_70386_20141029_002717_inLine +BABEL_OP3_404_70386_20141029_002717_outLine +BABEL_OP3_404_70452_20141028_031043_inLine +BABEL_OP3_404_70452_20141028_031043_outLine +BABEL_OP3_404_70601_20141103_194852_inLine +BABEL_OP3_404_70601_20141103_194852_outLine +BABEL_OP3_404_71704_20141021_001821_inLine +BABEL_OP3_404_71704_20141021_001821_outLine +BABEL_OP3_404_71704_20141021_002603_inLine +BABEL_OP3_404_71704_20141021_002603_outLine +BABEL_OP3_404_72007_20141201_045843_inLine +BABEL_OP3_404_72007_20141201_045843_outLine +BABEL_OP3_404_72040_20141103_035957_inLine +BABEL_OP3_404_72040_20141103_035957_outLine +BABEL_OP3_404_72040_20141103_042101_inLine +BABEL_OP3_404_72040_20141103_042101_outLine +BABEL_OP3_404_72110_20141128_013317_inLine +BABEL_OP3_404_72110_20141128_013317_outLine +BABEL_OP3_404_72324_20141201_013717_inLine +BABEL_OP3_404_72324_20141201_013717_outLine +BABEL_OP3_404_72654_20141110_003307_inLine +BABEL_OP3_404_72654_20141110_003307_outLine +BABEL_OP3_404_73042_20141022_163748_inLine +BABEL_OP3_404_73042_20141022_163748_outLine +BABEL_OP3_404_73301_20141101_210322_inLine +BABEL_OP3_404_73301_20141101_210322_outLine +BABEL_OP3_404_73446_20150513_002217_inLine +BABEL_OP3_404_73446_20150513_002217_outLine +BABEL_OP3_404_73511_20141129_045420_inLine +BABEL_OP3_404_73511_20141129_045420_outLine +BABEL_OP3_404_73549_20150619_204148_inLine +BABEL_OP3_404_73549_20150619_204148_outLine +BABEL_OP3_404_73591_20141018_022404_inLine +BABEL_OP3_404_73591_20141018_022404_outLine +BABEL_OP3_404_73622_20141016_060513_inLine +BABEL_OP3_404_73622_20141016_060513_outLine +BABEL_OP3_404_73814_20141120_180559_inLine +BABEL_OP3_404_73814_20141120_180559_outLine +BABEL_OP3_404_74226_20141130_235823_inLine +BABEL_OP3_404_74226_20141130_235823_outLine +BABEL_OP3_404_74253_20141201_231036_inLine +BABEL_OP3_404_74253_20141201_231036_outLine +BABEL_OP3_404_74280_20141010_230433_inLine +BABEL_OP3_404_74280_20141010_230433_outLine +BABEL_OP3_404_74667_20141114_221123_inLine +BABEL_OP3_404_74667_20141114_221123_outLine +BABEL_OP3_404_74886_20141022_200909_inLine +BABEL_OP3_404_74886_20141022_200909_outLine +BABEL_OP3_404_74921_20141124_030609_inLine +BABEL_OP3_404_74921_20141124_030609_outLine +BABEL_OP3_404_75223_20141012_224637_inLine +BABEL_OP3_404_75223_20141012_224637_outLine +BABEL_OP3_404_75342_20141130_193132_inLine +BABEL_OP3_404_75342_20141130_193132_outLine +BABEL_OP3_404_75930_20150206_063407_inLine +BABEL_OP3_404_75930_20150206_063407_outLine +BABEL_OP3_404_75993_20141102_192754_inLine +BABEL_OP3_404_75993_20141102_192754_outLine +BABEL_OP3_404_76155_20141118_052757_inLine +BABEL_OP3_404_76155_20141118_052757_outLine +BABEL_OP3_404_76218_20141119_232010_inLine +BABEL_OP3_404_76218_20141119_232010_outLine +BABEL_OP3_404_76499_20141117_005535_inLine +BABEL_OP3_404_76499_20141117_005535_outLine +BABEL_OP3_404_76756_20141120_014151_inLine +BABEL_OP3_404_76756_20141120_014151_outLine +BABEL_OP3_404_77033_20150503_233304_inLine +BABEL_OP3_404_77033_20150503_233304_outLine +BABEL_OP3_404_77112_20141105_062419_inLine +BABEL_OP3_404_77112_20141105_062419_outLine +BABEL_OP3_404_77139_20141022_022951_inLine +BABEL_OP3_404_77139_20141022_022951_outLine +BABEL_OP3_404_77744_20141103_034001_inLine +BABEL_OP3_404_77744_20141103_034001_outLine +BABEL_OP3_404_78116_20141128_231322_inLine +BABEL_OP3_404_78116_20141128_231322_outLine +BABEL_OP3_404_78194_20141019_052949_inLine +BABEL_OP3_404_78194_20141019_052949_outLine +BABEL_OP3_404_78398_20141022_235403_inLine +BABEL_OP3_404_78398_20141022_235403_outLine +BABEL_OP3_404_78544_20141130_192658_inLine +BABEL_OP3_404_78544_20141130_192658_outLine +BABEL_OP3_404_78604_20141022_164244_inLine +BABEL_OP3_404_78604_20141022_164244_outLine +BABEL_OP3_404_78630_20141025_220904_inLine +BABEL_OP3_404_78630_20141025_220904_outLine +BABEL_OP3_404_78743_20141202_001451_inLine +BABEL_OP3_404_78743_20141202_001451_outLine +BABEL_OP3_404_78943_20141025_004503_inLine +BABEL_OP3_404_78943_20141025_004503_outLine +BABEL_OP3_404_79028_20150213_002817_inLine +BABEL_OP3_404_79028_20150213_002817_outLine +BABEL_OP3_404_79107_20150614_013139_inLine +BABEL_OP3_404_79107_20150614_013139_outLine +BABEL_OP3_404_79129_20141110_183305_inLine +BABEL_OP3_404_79129_20141110_183305_outLine +BABEL_OP3_404_79367_20141008_232735_inLine +BABEL_OP3_404_79367_20141008_232735_outLine +BABEL_OP3_404_79451_20141031_025601_inLine +BABEL_OP3_404_79451_20141031_025601_outLine +BABEL_OP3_404_79995_20141201_013108_inLine +BABEL_OP3_404_79995_20141201_013108_outLine +BABEL_OP3_404_80622_20141119_054644_inLine +BABEL_OP3_404_80622_20141119_054644_outLine +BABEL_OP3_404_80721_20141201_013404_inLine +BABEL_OP3_404_80721_20141201_013404_outLine +BABEL_OP3_404_81287_20141130_024232_inLine +BABEL_OP3_404_81287_20141130_024232_outLine +BABEL_OP3_404_81392_20141130_022613_inLine +BABEL_OP3_404_81392_20141130_022613_outLine +BABEL_OP3_404_81392_20141130_023326_inLine +BABEL_OP3_404_81392_20141130_023326_outLine +BABEL_OP3_404_81404_20141104_055546_inLine +BABEL_OP3_404_81404_20141104_055546_outLine +BABEL_OP3_404_81433_20141119_073031_inLine +BABEL_OP3_404_81433_20141119_073031_outLine +BABEL_OP3_404_81435_20141128_235050_inLine +BABEL_OP3_404_81435_20141128_235050_outLine +BABEL_OP3_404_81622_20141129_212937_inLine +BABEL_OP3_404_81622_20141129_212937_outLine +BABEL_OP3_404_81810_20141126_051528_inLine +BABEL_OP3_404_81810_20141126_051528_outLine +BABEL_OP3_404_82030_20150517_193420_inLine +BABEL_OP3_404_82030_20150517_193420_outLine +BABEL_OP3_404_82035_20141119_063429_inLine +BABEL_OP3_404_82035_20141119_063429_outLine +BABEL_OP3_404_82138_20141116_234338_inLine +BABEL_OP3_404_82138_20141116_234338_outLine +BABEL_OP3_404_82140_20141117_021927_inLine +BABEL_OP3_404_82140_20141117_021927_outLine +BABEL_OP3_404_82145_20150502_232707_inLine +BABEL_OP3_404_82145_20150502_232707_outLine +BABEL_OP3_404_82391_20141128_063323_inLine +BABEL_OP3_404_82391_20141128_063323_outLine +BABEL_OP3_404_82496_20141009_062659_inLine +BABEL_OP3_404_82496_20141009_062659_outLine +BABEL_OP3_404_82622_20141008_042910_inLine +BABEL_OP3_404_82622_20141008_042910_outLine +BABEL_OP3_404_82904_20150523_231750_inLine +BABEL_OP3_404_82904_20150523_231750_outLine +BABEL_OP3_404_83455_20141112_000643_inLine +BABEL_OP3_404_83455_20141112_000643_outLine +BABEL_OP3_404_83783_20141115_005815_inLine +BABEL_OP3_404_83783_20141115_005815_outLine +BABEL_OP3_404_83935_20141201_214527_inLine +BABEL_OP3_404_83935_20141201_214527_outLine +BABEL_OP3_404_84327_20141130_185722_inLine +BABEL_OP3_404_84327_20141130_185722_outLine +BABEL_OP3_404_84408_20141105_182756_inLine +BABEL_OP3_404_84408_20141105_182756_outLine +BABEL_OP3_404_84469_20141130_030156_inLine +BABEL_OP3_404_84469_20141130_030156_outLine +BABEL_OP3_404_84547_20141022_025230_inLine +BABEL_OP3_404_84547_20141022_025230_outLine +BABEL_OP3_404_84605_20141026_234127_inLine +BABEL_OP3_404_84605_20141026_234127_outLine +BABEL_OP3_404_84611_20141024_005352_inLine +BABEL_OP3_404_84611_20141024_005352_outLine +BABEL_OP3_404_84768_20141012_183416_inLine +BABEL_OP3_404_84768_20141012_183416_outLine +BABEL_OP3_404_84823_20141201_061552_inLine +BABEL_OP3_404_84823_20141201_061552_outLine +BABEL_OP3_404_84936_20141130_025359_inLine +BABEL_OP3_404_84936_20141130_025359_outLine +BABEL_OP3_404_85647_20141111_231451_inLine +BABEL_OP3_404_85647_20141111_231451_outLine +BABEL_OP3_404_86321_20141127_025302_inLine +BABEL_OP3_404_86321_20141127_025302_outLine +BABEL_OP3_404_86433_20141201_005203_inLine +BABEL_OP3_404_86433_20141201_005203_outLine +BABEL_OP3_404_86433_20141201_010208_inLine +BABEL_OP3_404_86433_20141201_010208_outLine +BABEL_OP3_404_86433_20141201_011757_inLine +BABEL_OP3_404_86433_20141201_011757_outLine +BABEL_OP3_404_86467_20141019_022847_inLine +BABEL_OP3_404_86467_20141019_022847_outLine +BABEL_OP3_404_86467_20141019_024243_inLine +BABEL_OP3_404_86467_20141019_024243_outLine +BABEL_OP3_404_86557_20141021_041027_inLine +BABEL_OP3_404_86557_20141021_041027_outLine +BABEL_OP3_404_86676_20141125_223657_inLine +BABEL_OP3_404_86676_20141125_223657_outLine +BABEL_OP3_404_86952_20141008_194318_inLine +BABEL_OP3_404_86952_20141008_194318_outLine +BABEL_OP3_404_87073_20141007_223759_inLine +BABEL_OP3_404_87073_20141007_223759_outLine +BABEL_OP3_404_87280_20141201_232519_inLine +BABEL_OP3_404_87280_20141201_232519_outLine +BABEL_OP3_404_87693_20141105_002311_inLine +BABEL_OP3_404_87693_20141105_002311_outLine +BABEL_OP3_404_88601_20141115_021916_inLine +BABEL_OP3_404_88601_20141115_021916_outLine +BABEL_OP3_404_88601_20141115_024632_inLine +BABEL_OP3_404_88601_20141115_024632_outLine +BABEL_OP3_404_88686_20141019_023828_inLine +BABEL_OP3_404_88686_20141019_023828_outLine +BABEL_OP3_404_88925_20141201_043633_inLine +BABEL_OP3_404_88925_20141201_043633_outLine +BABEL_OP3_404_88982_20141106_212556_inLine +BABEL_OP3_404_88982_20141106_212556_outLine +BABEL_OP3_404_89358_20141119_055634_inLine +BABEL_OP3_404_89358_20141119_055634_outLine +BABEL_OP3_404_89695_20141115_212119_inLine +BABEL_OP3_404_89695_20141115_212119_outLine +BABEL_OP3_404_89794_20141130_055655_inLine +BABEL_OP3_404_89794_20141130_055655_outLine +BABEL_OP3_404_89877_20141120_061055_inLine +BABEL_OP3_404_89877_20141120_061055_outLine +BABEL_OP3_404_90417_20150611_052409_inLine +BABEL_OP3_404_90417_20150611_052409_outLine +BABEL_OP3_404_90737_20141116_233627_inLine +BABEL_OP3_404_90737_20141116_233627_outLine +BABEL_OP3_404_90739_20141116_034352_inLine +BABEL_OP3_404_90739_20141116_034352_outLine +BABEL_OP3_404_90777_20141115_012657_inLine +BABEL_OP3_404_90777_20141115_012657_outLine +BABEL_OP3_404_90935_20141104_195620_inLine +BABEL_OP3_404_90935_20141104_195620_outLine +BABEL_OP3_404_91080_20141119_062453_inLine +BABEL_OP3_404_91080_20141119_062453_outLine +BABEL_OP3_404_91125_20141010_234127_inLine +BABEL_OP3_404_91125_20141010_234127_outLine +BABEL_OP3_404_91336_20141110_011202_inLine +BABEL_OP3_404_91336_20141110_011202_outLine +BABEL_OP3_404_92065_20141201_041019_inLine +BABEL_OP3_404_92065_20141201_041019_outLine +BABEL_OP3_404_92077_20150610_053919_inLine +BABEL_OP3_404_92077_20150610_053919_outLine +BABEL_OP3_404_92459_20141026_000227_inLine +BABEL_OP3_404_92459_20141026_000227_outLine +BABEL_OP3_404_92459_20141026_000839_inLine +BABEL_OP3_404_92459_20141026_000839_outLine +BABEL_OP3_404_92509_20141020_034921_inLine +BABEL_OP3_404_92509_20141020_034921_outLine +BABEL_OP3_404_92527_20141115_024550_inLine +BABEL_OP3_404_92527_20141115_024550_outLine +BABEL_OP3_404_92809_20141009_080406_inLine +BABEL_OP3_404_92809_20141009_080406_outLine +BABEL_OP3_404_92886_20141103_032433_inLine +BABEL_OP3_404_92886_20141103_032433_outLine +BABEL_OP3_404_92941_20141027_175733_inLine +BABEL_OP3_404_92941_20141027_175733_outLine +BABEL_OP3_404_92941_20141027_180356_inLine +BABEL_OP3_404_92941_20141027_180356_outLine +BABEL_OP3_404_93224_20141119_210156_inLine +BABEL_OP3_404_93224_20141119_210156_outLine +BABEL_OP3_404_93411_20141119_193212_inLine +BABEL_OP3_404_93411_20141119_193212_outLine +BABEL_OP3_404_93861_20141111_181324_inLine +BABEL_OP3_404_93861_20141111_181324_outLine +BABEL_OP3_404_93946_20141129_015946_inLine +BABEL_OP3_404_93946_20141129_015946_outLine +BABEL_OP3_404_93964_20141111_213251_inLine +BABEL_OP3_404_93964_20141111_213251_outLine +BABEL_OP3_404_94141_20150516_175827_inLine +BABEL_OP3_404_94141_20150516_175827_outLine +BABEL_OP3_404_94253_20141029_184039_inLine +BABEL_OP3_404_94253_20141029_184039_outLine +BABEL_OP3_404_94409_20141117_003829_inLine +BABEL_OP3_404_94409_20141117_003829_outLine +BABEL_OP3_404_94666_20141119_231115_inLine +BABEL_OP3_404_94666_20141119_231115_outLine +BABEL_OP3_404_94745_20141201_033432_inLine +BABEL_OP3_404_94745_20141201_033432_outLine +BABEL_OP3_404_94923_20141116_230334_inLine +BABEL_OP3_404_94923_20141116_230334_outLine +BABEL_OP3_404_94978_20150528_024921_inLine +BABEL_OP3_404_94978_20150528_024921_outLine +BABEL_OP3_404_95294_20141129_062228_inLine +BABEL_OP3_404_95294_20141129_062228_outLine +BABEL_OP3_404_95467_20150612_031400_inLine +BABEL_OP3_404_95467_20150612_031400_outLine +BABEL_OP3_404_95490_20141021_050016_inLine +BABEL_OP3_404_95490_20141021_050016_outLine +BABEL_OP3_404_95663_20141022_043520_inLine +BABEL_OP3_404_95663_20141022_043520_outLine +BABEL_OP3_404_95670_20141019_224431_inLine +BABEL_OP3_404_95670_20141019_224431_outLine +BABEL_OP3_404_95677_20150220_205948_inLine +BABEL_OP3_404_95677_20150220_205948_outLine +BABEL_OP3_404_95942_20150514_235402_inLine +BABEL_OP3_404_95942_20150514_235402_outLine +BABEL_OP3_404_96088_20150524_191148_inLine +BABEL_OP3_404_96088_20150524_191148_outLine +BABEL_OP3_404_96190_20141107_040725_inLine +BABEL_OP3_404_96190_20141107_040725_outLine +BABEL_OP3_404_96405_20141026_045704_inLine +BABEL_OP3_404_96405_20141026_045704_outLine +BABEL_OP3_404_96820_20141109_204448_inLine +BABEL_OP3_404_96820_20141109_204448_outLine +BABEL_OP3_404_96842_20150610_040559_inLine +BABEL_OP3_404_96842_20150610_040559_outLine +BABEL_OP3_404_96910_20141026_195400_inLine +BABEL_OP3_404_96910_20141026_195400_outLine +BABEL_OP3_404_96934_20141025_223703_inLine +BABEL_OP3_404_96934_20141025_223703_outLine +BABEL_OP3_404_96934_20141025_225156_inLine +BABEL_OP3_404_96934_20141025_225156_outLine +BABEL_OP3_404_96985_20141013_053332_inLine +BABEL_OP3_404_96985_20141013_053332_outLine +BABEL_OP3_404_97363_20141120_034843_inLine +BABEL_OP3_404_97363_20141120_034843_outLine +BABEL_OP3_404_97570_20141120_050344_inLine +BABEL_OP3_404_97570_20141120_050344_outLine +BABEL_OP3_404_98311_20141022_042555_inLine +BABEL_OP3_404_98311_20141022_042555_outLine +BABEL_OP3_404_98356_20141123_013523_inLine +BABEL_OP3_404_98356_20141123_013523_outLine +BABEL_OP3_404_98390_20141014_024134_inLine +BABEL_OP3_404_98390_20141014_024134_outLine +BABEL_OP3_404_98565_20150217_195949_inLine +BABEL_OP3_404_98565_20150217_195949_outLine +BABEL_OP3_404_98580_20141130_022138_inLine +BABEL_OP3_404_98580_20141130_022138_outLine +BABEL_OP3_404_98909_20141027_032903_inLine +BABEL_OP3_404_98909_20141027_032903_outLine +BABEL_OP3_404_99516_20141019_071828_inLine +BABEL_OP3_404_99516_20141019_071828_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/sub-train.list b/egs/babel/s5d/conf/lists/404-georgian/sub-train.list new file mode 100644 index 00000000000..a042ee569ef --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/sub-train.list @@ -0,0 +1,124 @@ +BABEL_OP3_404_11663_20141118_032146_inLine +BABEL_OP3_404_11663_20141118_032146_outLine +BABEL_OP3_404_12242_20141028_021853_inLine +BABEL_OP3_404_12242_20141028_021853_outLine +BABEL_OP3_404_13178_20141129_192909_inLine +BABEL_OP3_404_13178_20141129_192909_outLine +BABEL_OP3_404_14137_20141025_202817_inLine +BABEL_OP3_404_14137_20141025_202817_outLine +BABEL_OP3_404_14875_20141026_230227_inLine +BABEL_OP3_404_14875_20141026_230227_outLine +BABEL_OP3_404_15869_20150218_225936_inLine +BABEL_OP3_404_15869_20150218_225936_outLine +BABEL_OP3_404_17113_20150611_050102_inLine +BABEL_OP3_404_17113_20150611_050102_outLine +BABEL_OP3_404_23505_20141021_032033_inLine +BABEL_OP3_404_23505_20141021_032033_outLine +BABEL_OP3_404_24470_20141111_184651_inLine +BABEL_OP3_404_24470_20141111_184651_outLine +BABEL_OP3_404_24470_20141111_190229_inLine +BABEL_OP3_404_24470_20141111_190229_outLine +BABEL_OP3_404_24679_20141018_015615_inLine +BABEL_OP3_404_24679_20141018_015615_outLine +BABEL_OP3_404_26388_20141026_014207_inLine +BABEL_OP3_404_26388_20141026_014207_outLine +BABEL_OP3_404_27042_20141201_215107_inLine +BABEL_OP3_404_27042_20141201_215107_outLine +BABEL_OP3_404_28538_20141119_005526_inLine +BABEL_OP3_404_28538_20141119_005526_outLine +BABEL_OP3_404_29208_20141106_013309_inLine +BABEL_OP3_404_29208_20141106_013309_outLine +BABEL_OP3_404_30461_20150620_020316_inLine +BABEL_OP3_404_30461_20150620_020316_outLine +BABEL_OP3_404_31979_20141106_000523_inLine +BABEL_OP3_404_31979_20141106_000523_outLine +BABEL_OP3_404_31992_20141014_221817_inLine +BABEL_OP3_404_31992_20141014_221817_outLine +BABEL_OP3_404_37064_20141102_063308_inLine +BABEL_OP3_404_37064_20141102_063308_outLine +BABEL_OP3_404_37281_20141119_053453_inLine +BABEL_OP3_404_37281_20141119_053453_outLine +BABEL_OP3_404_37853_20150602_030625_inLine +BABEL_OP3_404_37853_20150602_030625_outLine +BABEL_OP3_404_40713_20141028_221207_inLine +BABEL_OP3_404_40713_20141028_221207_outLine +BABEL_OP3_404_41680_20141012_040411_inLine +BABEL_OP3_404_41680_20141012_040411_outLine +BABEL_OP3_404_41920_20141008_040539_inLine +BABEL_OP3_404_41920_20141008_040539_outLine +BABEL_OP3_404_42877_20150212_052937_inLine +BABEL_OP3_404_42877_20150212_052937_outLine +BABEL_OP3_404_45121_20150609_055234_inLine +BABEL_OP3_404_45121_20150609_055234_outLine +BABEL_OP3_404_46169_20141130_224339_inLine +BABEL_OP3_404_46169_20141130_224339_outLine +BABEL_OP3_404_46625_20141011_040505_inLine +BABEL_OP3_404_46625_20141011_040505_outLine +BABEL_OP3_404_46681_20141021_040451_inLine +BABEL_OP3_404_46681_20141021_040451_outLine +BABEL_OP3_404_47270_20150512_053415_inLine +BABEL_OP3_404_47270_20150512_053415_outLine +BABEL_OP3_404_48844_20141020_065414_inLine +BABEL_OP3_404_48844_20141020_065414_outLine +BABEL_OP3_404_49768_20141026_022902_inLine +BABEL_OP3_404_49768_20141026_022902_outLine +BABEL_OP3_404_50175_20141021_025726_inLine +BABEL_OP3_404_50175_20141021_025726_outLine +BABEL_OP3_404_52301_20141009_051739_inLine +BABEL_OP3_404_52301_20141009_051739_outLine +BABEL_OP3_404_52301_20141009_054049_inLine +BABEL_OP3_404_52301_20141009_054049_outLine +BABEL_OP3_404_52490_20141016_020323_inLine +BABEL_OP3_404_52490_20141016_020323_outLine +BABEL_OP3_404_56213_20141201_000837_inLine +BABEL_OP3_404_56213_20141201_000837_outLine +BABEL_OP3_404_58103_20141030_002209_inLine +BABEL_OP3_404_58103_20141030_002209_outLine +BABEL_OP3_404_59078_20141111_004941_inLine +BABEL_OP3_404_59078_20141111_004941_outLine +BABEL_OP3_404_61225_20141009_174003_inLine +BABEL_OP3_404_61225_20141009_174003_outLine +BABEL_OP3_404_63220_20141127_033605_inLine +BABEL_OP3_404_63220_20141127_033605_outLine +BABEL_OP3_404_64494_20141026_203549_inLine +BABEL_OP3_404_64494_20141026_203549_outLine +BABEL_OP3_404_64768_20141027_201818_inLine +BABEL_OP3_404_64768_20141027_201818_outLine +BABEL_OP3_404_66916_20141022_000731_inLine +BABEL_OP3_404_66916_20141022_000731_outLine +BABEL_OP3_404_67401_20141109_211809_inLine +BABEL_OP3_404_67401_20141109_211809_outLine +BABEL_OP3_404_68059_20141109_052011_inLine +BABEL_OP3_404_68059_20141109_052011_outLine +BABEL_OP3_404_68068_20141201_054518_inLine +BABEL_OP3_404_68068_20141201_054518_outLine +BABEL_OP3_404_68384_20141130_035214_inLine +BABEL_OP3_404_68384_20141130_035214_outLine +BABEL_OP3_404_68627_20141105_190511_inLine +BABEL_OP3_404_68627_20141105_190511_outLine +BABEL_OP3_404_72844_20141007_033837_inLine +BABEL_OP3_404_72844_20141007_033837_outLine +BABEL_OP3_404_73837_20141026_191037_inLine +BABEL_OP3_404_73837_20141026_191037_outLine +BABEL_OP3_404_78511_20141201_003606_inLine +BABEL_OP3_404_78511_20141201_003606_outLine +BABEL_OP3_404_79139_20141117_054733_inLine +BABEL_OP3_404_79139_20141117_054733_outLine +BABEL_OP3_404_81971_20141022_025641_inLine +BABEL_OP3_404_81971_20141022_025641_outLine +BABEL_OP3_404_83062_20150523_220236_inLine +BABEL_OP3_404_83062_20150523_220236_outLine +BABEL_OP3_404_83775_20141030_230742_inLine +BABEL_OP3_404_83775_20141030_230742_outLine +BABEL_OP3_404_84339_20150502_014143_inLine +BABEL_OP3_404_84339_20150502_014143_outLine +BABEL_OP3_404_86191_20141027_013544_inLine +BABEL_OP3_404_86191_20141027_013544_outLine +BABEL_OP3_404_86888_20141119_022459_inLine +BABEL_OP3_404_86888_20141119_022459_outLine +BABEL_OP3_404_95966_20141129_060246_inLine +BABEL_OP3_404_95966_20141129_060246_outLine +BABEL_OP3_404_97461_20141118_230730_inLine +BABEL_OP3_404_97461_20141118_230730_outLine +BABEL_OP3_404_99487_20141021_053024_inLine +BABEL_OP3_404_99487_20141021_053024_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/404-georgian/sub-train.untranscribed.list new file mode 100644 index 00000000000..32d863a65ad --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/sub-train.untranscribed.list @@ -0,0 +1,929 @@ +BABEL_OP3_404_10019_20141101_191932_inLine +BABEL_OP3_404_10019_20141101_191932_outLine +BABEL_OP3_404_10058_20150526_034808_inLine +BABEL_OP3_404_10411_20150611_172027_inLine +BABEL_OP3_404_10411_20150611_172027_outLine +BABEL_OP3_404_10416_20141117_064700_inLine +BABEL_OP3_404_10416_20141117_064700_outLine +BABEL_OP3_404_10647_20150514_001106_inLine +BABEL_OP3_404_10647_20150514_001106_outLine +BABEL_OP3_404_10938_20141030_023413_inLine +BABEL_OP3_404_10938_20141030_023413_outLine +BABEL_OP3_404_10974_20141119_205506_inLine +BABEL_OP3_404_10974_20141119_205506_outLine +BABEL_OP3_404_11352_20150513_002642_inLine +BABEL_OP3_404_11352_20150513_002642_outLine +BABEL_OP3_404_11673_20141023_035438_inLine +BABEL_OP3_404_11673_20141023_035438_outLine +BABEL_OP3_404_11681_20141107_190101_inLine +BABEL_OP3_404_11681_20141107_190101_outLine +BABEL_OP3_404_11859_20150611_041737_inLine +BABEL_OP3_404_11859_20150611_041737_outLine +BABEL_OP3_404_12220_20141116_205911_inLine +BABEL_OP3_404_12220_20141116_205911_outLine +BABEL_OP3_404_12609_20150524_172934_inLine +BABEL_OP3_404_12609_20150524_172934_outLine +BABEL_OP3_404_13030_20141101_200709_inLine +BABEL_OP3_404_13030_20141101_200709_outLine +BABEL_OP3_404_13126_20150524_221540_inLine +BABEL_OP3_404_13126_20150524_221540_outLine +BABEL_OP3_404_13324_20141022_200257_inLine +BABEL_OP3_404_13324_20141022_200257_outLine +BABEL_OP3_404_13664_20141012_013523_inLine +BABEL_OP3_404_13664_20141012_013523_outLine +BABEL_OP3_404_13709_20150512_015216_inLine +BABEL_OP3_404_13709_20150512_015216_outLine +BABEL_OP3_404_14158_20141130_030130_inLine +BABEL_OP3_404_14158_20141130_030130_outLine +BABEL_OP3_404_14229_20141029_200136_inLine +BABEL_OP3_404_14229_20141029_200136_outLine +BABEL_OP3_404_14237_20141006_171921_inLine +BABEL_OP3_404_14237_20141006_171921_outLine +BABEL_OP3_404_14440_20141127_213106_inLine +BABEL_OP3_404_14440_20141127_213106_outLine +BABEL_OP3_404_14807_20141110_231934_inLine +BABEL_OP3_404_14807_20141110_231934_outLine +BABEL_OP3_404_14899_20141022_202217_inLine +BABEL_OP3_404_14899_20141022_202217_outLine +BABEL_OP3_404_14929_20141129_192841_inLine +BABEL_OP3_404_14929_20141129_192841_outLine +BABEL_OP3_404_15024_20141118_234824_inLine +BABEL_OP3_404_15024_20141118_234824_outLine +BABEL_OP3_404_15042_20150506_232829_inLine +BABEL_OP3_404_15042_20150506_232829_outLine +BABEL_OP3_404_15382_20141130_213942_inLine +BABEL_OP3_404_15382_20141130_213942_outLine +BABEL_OP3_404_15535_20141129_021659_inLine +BABEL_OP3_404_15535_20141129_021659_outLine +BABEL_OP3_404_15638_20141127_220502_outLine +BABEL_OP3_404_15848_20141006_231138_inLine +BABEL_OP3_404_15848_20141006_231138_outLine +BABEL_OP3_404_15902_20141020_173105_outLine +BABEL_OP3_404_16149_20141010_173548_inLine +BABEL_OP3_404_16149_20141010_173548_outLine +BABEL_OP3_404_16467_20141130_014316_inLine +BABEL_OP3_404_16467_20141130_014316_outLine +BABEL_OP3_404_16467_20141130_015010_inLine +BABEL_OP3_404_16467_20141130_015010_outLine +BABEL_OP3_404_16475_20141116_052010_outLine +BABEL_OP3_404_16601_20141201_041704_inLine +BABEL_OP3_404_16601_20141201_041704_outLine +BABEL_OP3_404_17280_20141103_190330_inLine +BABEL_OP3_404_17280_20141103_190330_outLine +BABEL_OP3_404_17320_20150524_213213_inLine +BABEL_OP3_404_17320_20150524_213213_outLine +BABEL_OP3_404_17420_20150503_201902_inLine +BABEL_OP3_404_17420_20150503_201902_outLine +BABEL_OP3_404_17420_20150527_025815_inLine +BABEL_OP3_404_17420_20150527_025815_outLine +BABEL_OP3_404_17420_20150527_034621_inLine +BABEL_OP3_404_17420_20150527_034621_outLine +BABEL_OP3_404_17520_20141113_032534_inLine +BABEL_OP3_404_17567_20141117_182919_inLine +BABEL_OP3_404_17567_20141117_182919_outLine +BABEL_OP3_404_17573_20141129_035040_inLine +BABEL_OP3_404_17573_20141129_035040_outLine +BABEL_OP3_404_17615_20141201_025917_inLine +BABEL_OP3_404_17615_20141201_025917_outLine +BABEL_OP3_404_17890_20141128_040046_inLine +BABEL_OP3_404_17890_20141128_040046_outLine +BABEL_OP3_404_17923_20141022_231429_outLine +BABEL_OP3_404_18118_20150503_165936_inLine +BABEL_OP3_404_18118_20150503_165936_outLine +BABEL_OP3_404_18291_20150611_062705_outLine +BABEL_OP3_404_18291_20150611_063700_outLine +BABEL_OP3_404_18766_20150610_064349_inLine +BABEL_OP3_404_19120_20150525_014657_inLine +BABEL_OP3_404_19120_20150525_014657_outLine +BABEL_OP3_404_19120_20150525_015635_inLine +BABEL_OP3_404_19120_20150525_015635_outLine +BABEL_OP3_404_19134_20141120_053128_inLine +BABEL_OP3_404_19134_20141120_053128_outLine +BABEL_OP3_404_19703_20141027_004315_inLine +BABEL_OP3_404_19703_20141027_004315_outLine +BABEL_OP3_404_19877_20150506_202237_outLine +BABEL_OP3_404_20133_20141010_195231_inLine +BABEL_OP3_404_20133_20141010_195231_outLine +BABEL_OP3_404_20454_20150218_171143_inLine +BABEL_OP3_404_20454_20150218_171143_outLine +BABEL_OP3_404_20985_20141126_183236_inLine +BABEL_OP3_404_20985_20141126_183236_outLine +BABEL_OP3_404_21004_20141201_035831_inLine +BABEL_OP3_404_21004_20141201_035831_outLine +BABEL_OP3_404_21159_20150615_021612_inLine +BABEL_OP3_404_21435_20150523_030702_inLine +BABEL_OP3_404_21435_20150523_030702_outLine +BABEL_OP3_404_21581_20141101_011021_inLine +BABEL_OP3_404_21581_20141101_011021_outLine +BABEL_OP3_404_21807_20141112_225225_outLine +BABEL_OP3_404_22280_20141111_020522_inLine +BABEL_OP3_404_22280_20141111_020522_outLine +BABEL_OP3_404_22591_20150217_220714_inLine +BABEL_OP3_404_23046_20141031_030755_inLine +BABEL_OP3_404_23046_20141031_030755_outLine +BABEL_OP3_404_23731_20141130_033602_inLine +BABEL_OP3_404_23731_20141130_033602_outLine +BABEL_OP3_404_23980_20141106_225951_inLine +BABEL_OP3_404_23980_20141106_225951_outLine +BABEL_OP3_404_24209_20150212_224614_inLine +BABEL_OP3_404_24239_20150517_203015_inLine +BABEL_OP3_404_24270_20141111_012902_inLine +BABEL_OP3_404_24270_20141111_012902_outLine +BABEL_OP3_404_24323_20141117_020615_outLine +BABEL_OP3_404_24501_20150522_030231_inLine +BABEL_OP3_404_24532_20141007_211325_inLine +BABEL_OP3_404_24532_20141007_211325_outLine +BABEL_OP3_404_24586_20150524_190657_inLine +BABEL_OP3_404_24586_20150524_190657_outLine +BABEL_OP3_404_24589_20141031_020641_inLine +BABEL_OP3_404_24589_20141031_020641_outLine +BABEL_OP3_404_24590_20141116_230233_inLine +BABEL_OP3_404_24590_20141116_230233_outLine +BABEL_OP3_404_24982_20141102_021352_inLine +BABEL_OP3_404_24982_20141102_021352_outLine +BABEL_OP3_404_25068_20150206_022730_outLine +BABEL_OP3_404_25085_20150611_040906_inLine +BABEL_OP3_404_25085_20150611_040906_outLine +BABEL_OP3_404_25412_20141120_031532_inLine +BABEL_OP3_404_25412_20141120_031532_outLine +BABEL_OP3_404_25496_20150613_034126_inLine +BABEL_OP3_404_25496_20150613_034126_outLine +BABEL_OP3_404_26398_20150527_032152_inLine +BABEL_OP3_404_26398_20150527_032152_outLine +BABEL_OP3_404_26478_20150617_004029_inLine +BABEL_OP3_404_26478_20150617_004029_outLine +BABEL_OP3_404_26836_20141102_024528_inLine +BABEL_OP3_404_26836_20141102_024528_outLine +BABEL_OP3_404_27203_20141119_185720_inLine +BABEL_OP3_404_27203_20141119_185720_outLine +BABEL_OP3_404_27203_20141119_191138_inLine +BABEL_OP3_404_27203_20141119_191138_outLine +BABEL_OP3_404_27590_20141128_051454_inLine +BABEL_OP3_404_28280_20150619_024509_inLine +BABEL_OP3_404_28280_20150619_024509_outLine +BABEL_OP3_404_28280_20150619_025848_inLine +BABEL_OP3_404_28280_20150619_025848_outLine +BABEL_OP3_404_28303_20141028_182204_inLine +BABEL_OP3_404_28303_20141028_182204_outLine +BABEL_OP3_404_28522_20141124_222758_inLine +BABEL_OP3_404_28522_20141124_222758_outLine +BABEL_OP3_404_28600_20141201_223206_inLine +BABEL_OP3_404_28600_20141201_223206_outLine +BABEL_OP3_404_28871_20141019_181913_inLine +BABEL_OP3_404_28871_20141019_181913_outLine +BABEL_OP3_404_28945_20141104_060349_outLine +BABEL_OP3_404_29039_20141128_035839_inLine +BABEL_OP3_404_29039_20141128_035839_outLine +BABEL_OP3_404_29076_20141109_215142_inLine +BABEL_OP3_404_29076_20141109_215142_outLine +BABEL_OP3_404_29230_20150611_051340_inLine +BABEL_OP3_404_29230_20150611_051340_outLine +BABEL_OP3_404_29439_20150524_201524_inLine +BABEL_OP3_404_29439_20150524_201524_outLine +BABEL_OP3_404_30098_20150610_150504_inLine +BABEL_OP3_404_30098_20150610_150504_outLine +BABEL_OP3_404_30432_20141126_052839_inLine +BABEL_OP3_404_30432_20141126_052839_outLine +BABEL_OP3_404_30497_20150525_194737_inLine +BABEL_OP3_404_30497_20150525_194737_outLine +BABEL_OP3_404_30645_20141019_220859_inLine +BABEL_OP3_404_30653_20150514_014515_inLine +BABEL_OP3_404_31267_20150615_011004_outLine +BABEL_OP3_404_31484_20141122_232804_inLine +BABEL_OP3_404_31484_20141122_232804_outLine +BABEL_OP3_404_31624_20141105_214349_inLine +BABEL_OP3_404_31624_20141105_214349_outLine +BABEL_OP3_404_31919_20150526_220911_inLine +BABEL_OP3_404_31919_20150526_220911_outLine +BABEL_OP3_404_32122_20141115_022841_inLine +BABEL_OP3_404_32122_20141115_022841_outLine +BABEL_OP3_404_32287_20150210_060823_inLine +BABEL_OP3_404_32287_20150210_060823_outLine +BABEL_OP3_404_32630_20150609_012137_inLine +BABEL_OP3_404_32630_20150609_012137_outLine +BABEL_OP3_404_32708_20141106_032826_inLine +BABEL_OP3_404_32708_20141106_032826_outLine +BABEL_OP3_404_32727_20141128_203500_inLine +BABEL_OP3_404_32727_20141128_203500_outLine +BABEL_OP3_404_32727_20141128_204751_inLine +BABEL_OP3_404_32727_20141128_204751_outLine +BABEL_OP3_404_32959_20141201_005331_inLine +BABEL_OP3_404_32959_20141201_005331_outLine +BABEL_OP3_404_32998_20141112_054111_inLine +BABEL_OP3_404_33355_20141019_032024_inLine +BABEL_OP3_404_33355_20141019_032024_outLine +BABEL_OP3_404_33355_20141019_034109_inLine +BABEL_OP3_404_33355_20141019_034109_outLine +BABEL_OP3_404_33704_20141207_073436_inLine +BABEL_OP3_404_33704_20141207_073436_outLine +BABEL_OP3_404_34328_20141119_054513_outLine +BABEL_OP3_404_34328_20141119_055432_outLine +BABEL_OP3_404_34679_20141102_052808_inLine +BABEL_OP3_404_34679_20141102_052808_outLine +BABEL_OP3_404_34688_20141009_073303_inLine +BABEL_OP3_404_34688_20141009_073303_outLine +BABEL_OP3_404_34811_20141109_001009_inLine +BABEL_OP3_404_34811_20141109_001009_outLine +BABEL_OP3_404_34899_20150611_060602_outLine +BABEL_OP3_404_35008_20141201_023042_inLine +BABEL_OP3_404_35008_20141201_023042_outLine +BABEL_OP3_404_35143_20141130_181111_inLine +BABEL_OP3_404_35143_20141130_181111_outLine +BABEL_OP3_404_35181_20150526_211416_inLine +BABEL_OP3_404_35181_20150526_211416_outLine +BABEL_OP3_404_35706_20150523_015900_inLine +BABEL_OP3_404_35706_20150523_015900_outLine +BABEL_OP3_404_35786_20150604_015518_inLine +BABEL_OP3_404_35786_20150604_015518_outLine +BABEL_OP3_404_36017_20150528_192934_inLine +BABEL_OP3_404_36017_20150528_192934_outLine +BABEL_OP3_404_36039_20150526_230125_inLine +BABEL_OP3_404_36039_20150526_230125_outLine +BABEL_OP3_404_36059_20150601_023254_inLine +BABEL_OP3_404_36059_20150601_023254_outLine +BABEL_OP3_404_36059_20150601_033346_inLine +BABEL_OP3_404_36059_20150601_033346_outLine +BABEL_OP3_404_36147_20150211_013803_outLine +BABEL_OP3_404_36219_20141104_012216_inLine +BABEL_OP3_404_36219_20141104_012216_outLine +BABEL_OP3_404_36642_20150610_161207_inLine +BABEL_OP3_404_36642_20150610_161207_outLine +BABEL_OP3_404_37290_20141115_050457_inLine +BABEL_OP3_404_37290_20141115_050457_outLine +BABEL_OP3_404_37598_20141119_045926_inLine +BABEL_OP3_404_37598_20141119_045926_outLine +BABEL_OP3_404_37682_20141101_221445_inLine +BABEL_OP3_404_37682_20141101_221445_outLine +BABEL_OP3_404_38125_20150526_233108_inLine +BABEL_OP3_404_38125_20150526_233108_outLine +BABEL_OP3_404_38323_20150615_021843_inLine +BABEL_OP3_404_38340_20141103_231545_inLine +BABEL_OP3_404_38340_20141103_231545_outLine +BABEL_OP3_404_38554_20141010_224451_inLine +BABEL_OP3_404_38554_20141010_224451_outLine +BABEL_OP3_404_38588_20141118_163844_inLine +BABEL_OP3_404_38588_20141118_163844_outLine +BABEL_OP3_404_38664_20141030_175135_inLine +BABEL_OP3_404_38664_20141030_175135_outLine +BABEL_OP3_404_38979_20150503_202406_outLine +BABEL_OP3_404_39099_20150511_053646_outLine +BABEL_OP3_404_39307_20141022_200554_inLine +BABEL_OP3_404_39307_20141022_201758_inLine +BABEL_OP3_404_39426_20150527_181901_outLine +BABEL_OP3_404_39744_20141023_002710_inLine +BABEL_OP3_404_39893_20150611_034149_inLine +BABEL_OP3_404_39920_20150503_205354_outLine +BABEL_OP3_404_40557_20141127_200639_inLine +BABEL_OP3_404_40557_20141127_200639_outLine +BABEL_OP3_404_40939_20150210_212748_inLine +BABEL_OP3_404_40939_20150210_212748_outLine +BABEL_OP3_404_41097_20141129_055801_inLine +BABEL_OP3_404_41097_20141129_055801_outLine +BABEL_OP3_404_41100_20141021_022126_inLine +BABEL_OP3_404_41100_20141021_022126_outLine +BABEL_OP3_404_41272_20150503_232941_inLine +BABEL_OP3_404_41334_20150617_041322_inLine +BABEL_OP3_404_41400_20150515_021408_inLine +BABEL_OP3_404_41609_20141009_013405_inLine +BABEL_OP3_404_41609_20141009_013405_outLine +BABEL_OP3_404_41692_20150604_005657_inLine +BABEL_OP3_404_41692_20150604_005657_outLine +BABEL_OP3_404_41745_20141114_235452_inLine +BABEL_OP3_404_41745_20141114_235452_outLine +BABEL_OP3_404_41958_20141029_212755_inLine +BABEL_OP3_404_41958_20141029_212755_outLine +BABEL_OP3_404_42155_20141127_055149_inLine +BABEL_OP3_404_42619_20141130_012456_outLine +BABEL_OP3_404_42834_20141125_004837_inLine +BABEL_OP3_404_42834_20141125_004837_outLine +BABEL_OP3_404_42883_20150604_035732_inLine +BABEL_OP3_404_42883_20150604_035732_outLine +BABEL_OP3_404_43368_20141031_010629_inLine +BABEL_OP3_404_43368_20141031_010629_outLine +BABEL_OP3_404_43388_20141114_212210_inLine +BABEL_OP3_404_43388_20141114_214120_inLine +BABEL_OP3_404_43588_20150517_233637_inLine +BABEL_OP3_404_43789_20141120_011327_outLine +BABEL_OP3_404_44114_20150614_012319_inLine +BABEL_OP3_404_44114_20150614_012319_outLine +BABEL_OP3_404_44309_20150525_022635_inLine +BABEL_OP3_404_44309_20150525_022635_outLine +BABEL_OP3_404_44477_20141201_180604_inLine +BABEL_OP3_404_44477_20141201_180604_outLine +BABEL_OP3_404_44478_20150512_225118_inLine +BABEL_OP3_404_44847_20141130_221248_inLine +BABEL_OP3_404_44847_20141130_221248_outLine +BABEL_OP3_404_45106_20141119_050859_inLine +BABEL_OP3_404_45106_20141119_050859_outLine +BABEL_OP3_404_45374_20150122_014830_outLine +BABEL_OP3_404_45374_20150122_015920_outLine +BABEL_OP3_404_45459_20150525_020410_inLine +BABEL_OP3_404_45459_20150525_020410_outLine +BABEL_OP3_404_45560_20141012_030417_inLine +BABEL_OP3_404_45560_20141012_030417_outLine +BABEL_OP3_404_45699_20150205_021829_inLine +BABEL_OP3_404_45851_20150514_155157_inLine +BABEL_OP3_404_45851_20150514_155157_outLine +BABEL_OP3_404_45908_20150515_004218_outLine +BABEL_OP3_404_46268_20141019_032022_inLine +BABEL_OP3_404_46268_20141019_032022_outLine +BABEL_OP3_404_46310_20141015_051100_inLine +BABEL_OP3_404_46310_20141015_051100_outLine +BABEL_OP3_404_46315_20141129_012912_inLine +BABEL_OP3_404_46315_20141129_012912_outLine +BABEL_OP3_404_46550_20141105_072519_inLine +BABEL_OP3_404_46550_20141105_072519_outLine +BABEL_OP3_404_46688_20141015_211329_inLine +BABEL_OP3_404_46688_20141015_211329_outLine +BABEL_OP3_404_46712_20141027_224004_inLine +BABEL_OP3_404_46712_20141027_224004_outLine +BABEL_OP3_404_46881_20141012_020055_inLine +BABEL_OP3_404_46881_20141012_020055_outLine +BABEL_OP3_404_46974_20141128_055136_inLine +BABEL_OP3_404_46974_20141128_055136_outLine +BABEL_OP3_404_46976_20141107_183806_inLine +BABEL_OP3_404_46976_20141107_183806_outLine +BABEL_OP3_404_47156_20150625_025324_inLine +BABEL_OP3_404_47156_20150625_025324_outLine +BABEL_OP3_404_47802_20141110_200430_inLine +BABEL_OP3_404_47802_20141110_200430_outLine +BABEL_OP3_404_47823_20141201_044425_inLine +BABEL_OP3_404_47823_20141201_044425_outLine +BABEL_OP3_404_48016_20150615_000741_inLine +BABEL_OP3_404_48016_20150615_000741_outLine +BABEL_OP3_404_48243_20141023_200903_inLine +BABEL_OP3_404_48243_20141023_200903_outLine +BABEL_OP3_404_48610_20141013_011505_inLine +BABEL_OP3_404_48610_20141013_012904_inLine +BABEL_OP3_404_48663_20150512_202837_inLine +BABEL_OP3_404_48663_20150512_202837_outLine +BABEL_OP3_404_49197_20141117_024730_inLine +BABEL_OP3_404_49197_20141117_024730_outLine +BABEL_OP3_404_49306_20150524_003356_inLine +BABEL_OP3_404_49306_20150524_003356_outLine +BABEL_OP3_404_49630_20141128_020114_inLine +BABEL_OP3_404_49630_20141128_020114_outLine +BABEL_OP3_404_49767_20150613_050113_inLine +BABEL_OP3_404_49767_20150613_050113_outLine +BABEL_OP3_404_49775_20141011_005306_inLine +BABEL_OP3_404_49775_20141011_005306_outLine +BABEL_OP3_404_49902_20141101_175534_inLine +BABEL_OP3_404_49902_20141101_175534_outLine +BABEL_OP3_404_49907_20141103_050534_inLine +BABEL_OP3_404_49907_20141103_050534_outLine +BABEL_OP3_404_49945_20150610_154709_inLine +BABEL_OP3_404_50601_20141127_032527_inLine +BABEL_OP3_404_50601_20141127_032527_outLine +BABEL_OP3_404_50745_20150513_162805_inLine +BABEL_OP3_404_50745_20150513_162805_outLine +BABEL_OP3_404_50779_20141115_012852_inLine +BABEL_OP3_404_50779_20141115_012852_outLine +BABEL_OP3_404_50810_20141007_234432_inLine +BABEL_OP3_404_50810_20141007_234432_outLine +BABEL_OP3_404_51015_20141123_193824_inLine +BABEL_OP3_404_51015_20141123_193824_outLine +BABEL_OP3_404_51414_20150604_001601_inLine +BABEL_OP3_404_51414_20150604_001601_outLine +BABEL_OP3_404_51484_20141202_000325_inLine +BABEL_OP3_404_51484_20141202_000325_outLine +BABEL_OP3_404_51701_20150620_010924_outLine +BABEL_OP3_404_52070_20150620_014422_outLine +BABEL_OP3_404_52070_20150620_020559_outLine +BABEL_OP3_404_52246_20141118_035022_inLine +BABEL_OP3_404_52246_20141118_035022_outLine +BABEL_OP3_404_52246_20141118_040850_inLine +BABEL_OP3_404_52246_20141118_040850_outLine +BABEL_OP3_404_52404_20141125_004855_inLine +BABEL_OP3_404_52404_20141125_004855_outLine +BABEL_OP3_404_52725_20150522_222730_inLine +BABEL_OP3_404_52725_20150522_222730_outLine +BABEL_OP3_404_53063_20141201_005237_inLine +BABEL_OP3_404_53063_20141201_005237_outLine +BABEL_OP3_404_53072_20150518_015132_inLine +BABEL_OP3_404_53415_20150503_225920_inLine +BABEL_OP3_404_53415_20150503_225920_outLine +BABEL_OP3_404_53492_20150525_055025_inLine +BABEL_OP3_404_53492_20150525_055025_outLine +BABEL_OP3_404_53665_20150526_004549_inLine +BABEL_OP3_404_53917_20150503_205456_outLine +BABEL_OP3_404_53957_20141201_051933_inLine +BABEL_OP3_404_54104_20141008_214620_inLine +BABEL_OP3_404_54104_20141008_214620_outLine +BABEL_OP3_404_54160_20141009_180704_inLine +BABEL_OP3_404_54160_20141009_180704_outLine +BABEL_OP3_404_54160_20141009_184719_inLine +BABEL_OP3_404_54160_20141009_184719_outLine +BABEL_OP3_404_54160_20141009_185557_inLine +BABEL_OP3_404_54160_20141009_185557_outLine +BABEL_OP3_404_54405_20141117_054820_inLine +BABEL_OP3_404_54405_20141117_054820_outLine +BABEL_OP3_404_54477_20141211_033627_inLine +BABEL_OP3_404_54477_20141211_033627_outLine +BABEL_OP3_404_54744_20141015_012011_inLine +BABEL_OP3_404_54744_20141015_012011_outLine +BABEL_OP3_404_55013_20150525_222257_inLine +BABEL_OP3_404_55013_20150525_222257_outLine +BABEL_OP3_404_55259_20141029_225631_inLine +BABEL_OP3_404_55259_20141029_225631_outLine +BABEL_OP3_404_55267_20141130_212756_inLine +BABEL_OP3_404_55349_20150523_031602_inLine +BABEL_OP3_404_55349_20150523_031602_outLine +BABEL_OP3_404_56019_20150502_020750_inLine +BABEL_OP3_404_56019_20150502_020750_outLine +BABEL_OP3_404_56076_20150516_164959_inLine +BABEL_OP3_404_56076_20150516_164959_outLine +BABEL_OP3_404_56331_20150526_020747_inLine +BABEL_OP3_404_56331_20150526_020747_outLine +BABEL_OP3_404_56743_20141114_223719_inLine +BABEL_OP3_404_56743_20141114_223719_outLine +BABEL_OP3_404_57065_20141201_002920_inLine +BABEL_OP3_404_57219_20150618_045613_inLine +BABEL_OP3_404_57219_20150618_045613_outLine +BABEL_OP3_404_57464_20150523_224617_inLine +BABEL_OP3_404_57542_20150526_233832_inLine +BABEL_OP3_404_57542_20150526_233832_outLine +BABEL_OP3_404_57542_20150526_235003_inLine +BABEL_OP3_404_57542_20150526_235003_outLine +BABEL_OP3_404_57654_20141023_235628_inLine +BABEL_OP3_404_57654_20141023_235628_outLine +BABEL_OP3_404_57678_20141104_023128_inLine +BABEL_OP3_404_57678_20141104_023128_outLine +BABEL_OP3_404_57919_20150127_041057_inLine +BABEL_OP3_404_57919_20150127_041057_outLine +BABEL_OP3_404_58006_20150526_024205_inLine +BABEL_OP3_404_58006_20150526_024205_outLine +BABEL_OP3_404_58026_20150615_004130_inLine +BABEL_OP3_404_58026_20150615_004130_outLine +BABEL_OP3_404_58915_20150611_034220_outLine +BABEL_OP3_404_59262_20141130_212633_inLine +BABEL_OP3_404_59262_20141130_212633_outLine +BABEL_OP3_404_59307_20150504_003405_inLine +BABEL_OP3_404_59307_20150504_003405_outLine +BABEL_OP3_404_59720_20141029_204612_inLine +BABEL_OP3_404_59720_20141029_204612_outLine +BABEL_OP3_404_59864_20150602_014458_inLine +BABEL_OP3_404_60026_20141008_051633_inLine +BABEL_OP3_404_60026_20141008_051633_outLine +BABEL_OP3_404_60299_20150611_040929_inLine +BABEL_OP3_404_60310_20141130_231532_inLine +BABEL_OP3_404_60310_20141130_231532_outLine +BABEL_OP3_404_60352_20141201_060712_inLine +BABEL_OP3_404_60352_20141201_060712_outLine +BABEL_OP3_404_60352_20141201_061821_inLine +BABEL_OP3_404_60352_20141201_061821_outLine +BABEL_OP3_404_60458_20150609_021527_inLine +BABEL_OP3_404_60458_20150609_021527_outLine +BABEL_OP3_404_60474_20141029_182816_inLine +BABEL_OP3_404_60474_20141029_182816_outLine +BABEL_OP3_404_60477_20150613_223056_inLine +BABEL_OP3_404_60477_20150613_224002_inLine +BABEL_OP3_404_60498_20150606_022221_inLine +BABEL_OP3_404_60498_20150606_022221_outLine +BABEL_OP3_404_60626_20141028_212539_inLine +BABEL_OP3_404_60626_20141028_212539_outLine +BABEL_OP3_404_60706_20141020_215729_inLine +BABEL_OP3_404_60706_20141020_215729_outLine +BABEL_OP3_404_61167_20141030_222711_inLine +BABEL_OP3_404_61167_20141030_222711_outLine +BABEL_OP3_404_61219_20141025_193634_inLine +BABEL_OP3_404_61219_20141025_193634_outLine +BABEL_OP3_404_61678_20141019_201928_inLine +BABEL_OP3_404_61678_20141019_201928_outLine +BABEL_OP3_404_61873_20141108_214852_inLine +BABEL_OP3_404_61873_20141108_214852_outLine +BABEL_OP3_404_61888_20150504_171019_inLine +BABEL_OP3_404_61971_20150525_020101_outLine +BABEL_OP3_404_62155_20150522_032307_inLine +BABEL_OP3_404_62155_20150522_032307_outLine +BABEL_OP3_404_62286_20141105_204359_inLine +BABEL_OP3_404_62286_20141105_204359_outLine +BABEL_OP3_404_62360_20150517_033230_inLine +BABEL_OP3_404_62360_20150517_033230_outLine +BABEL_OP3_404_62456_20141108_202333_inLine +BABEL_OP3_404_62456_20141108_202333_outLine +BABEL_OP3_404_62714_20150522_011337_inLine +BABEL_OP3_404_62714_20150522_011337_outLine +BABEL_OP3_404_62724_20141130_200827_inLine +BABEL_OP3_404_62724_20141130_200827_outLine +BABEL_OP3_404_62734_20141029_221513_inLine +BABEL_OP3_404_62734_20141029_221513_outLine +BABEL_OP3_404_62852_20141013_054854_outLine +BABEL_OP3_404_63081_20141021_032233_inLine +BABEL_OP3_404_63081_20141021_032233_outLine +BABEL_OP3_404_63081_20141021_033457_inLine +BABEL_OP3_404_63081_20141021_033457_outLine +BABEL_OP3_404_63084_20141130_221452_inLine +BABEL_OP3_404_63084_20141130_221452_outLine +BABEL_OP3_404_63425_20141126_054504_inLine +BABEL_OP3_404_63481_20141020_221014_outLine +BABEL_OP3_404_63481_20141020_224225_outLine +BABEL_OP3_404_63670_20141130_050318_inLine +BABEL_OP3_404_63670_20141130_050318_outLine +BABEL_OP3_404_63757_20141111_180721_inLine +BABEL_OP3_404_63757_20141111_180721_outLine +BABEL_OP3_404_63906_20150525_050310_inLine +BABEL_OP3_404_63906_20150525_050310_outLine +BABEL_OP3_404_63999_20150610_041309_inLine +BABEL_OP3_404_64014_20150503_032745_inLine +BABEL_OP3_404_64014_20150503_032745_outLine +BABEL_OP3_404_64722_20150514_034208_outLine +BABEL_OP3_404_64759_20141014_044027_inLine +BABEL_OP3_404_64759_20141014_045519_inLine +BABEL_OP3_404_64796_20141022_055826_inLine +BABEL_OP3_404_64870_20141108_192546_inLine +BABEL_OP3_404_64870_20141108_192546_outLine +BABEL_OP3_404_65561_20141124_060558_inLine +BABEL_OP3_404_65561_20141124_060558_outLine +BABEL_OP3_404_65640_20150528_211835_inLine +BABEL_OP3_404_65640_20150528_211835_outLine +BABEL_OP3_404_66045_20141117_035937_inLine +BABEL_OP3_404_66045_20141117_035937_outLine +BABEL_OP3_404_66177_20150503_202932_inLine +BABEL_OP3_404_66177_20150503_202932_outLine +BABEL_OP3_404_66822_20141117_020953_inLine +BABEL_OP3_404_66822_20141117_020953_outLine +BABEL_OP3_404_66967_20141008_202611_inLine +BABEL_OP3_404_66967_20141008_202611_outLine +BABEL_OP3_404_67152_20150503_201836_inLine +BABEL_OP3_404_67152_20150503_201836_outLine +BABEL_OP3_404_67304_20150211_054416_inLine +BABEL_OP3_404_67304_20150211_054416_outLine +BABEL_OP3_404_67552_20141126_011955_inLine +BABEL_OP3_404_67552_20141126_011955_outLine +BABEL_OP3_404_67842_20141104_051753_inLine +BABEL_OP3_404_67842_20141104_051753_outLine +BABEL_OP3_404_68244_20141119_065540_inLine +BABEL_OP3_404_68244_20141119_065540_outLine +BABEL_OP3_404_68306_20141126_180315_inLine +BABEL_OP3_404_68306_20141126_180315_outLine +BABEL_OP3_404_68385_20141017_031005_inLine +BABEL_OP3_404_68385_20141017_031005_outLine +BABEL_OP3_404_68823_20150212_041147_inLine +BABEL_OP3_404_68823_20150212_041147_outLine +BABEL_OP3_404_69096_20150512_165126_inLine +BABEL_OP3_404_69096_20150512_165126_outLine +BABEL_OP3_404_69107_20141120_010459_inLine +BABEL_OP3_404_69107_20141120_010459_outLine +BABEL_OP3_404_69153_20141130_221412_inLine +BABEL_OP3_404_69153_20141130_221412_outLine +BABEL_OP3_404_69153_20141130_222842_inLine +BABEL_OP3_404_69153_20141130_222842_outLine +BABEL_OP3_404_69474_20141128_051323_outLine +BABEL_OP3_404_69574_20141006_023156_inLine +BABEL_OP3_404_69574_20141006_023156_outLine +BABEL_OP3_404_69578_20141117_003921_inLine +BABEL_OP3_404_69578_20141117_003921_outLine +BABEL_OP3_404_69633_20141129_051648_inLine +BABEL_OP3_404_69633_20141129_051648_outLine +BABEL_OP3_404_69636_20141126_061322_inLine +BABEL_OP3_404_69636_20141126_061322_outLine +BABEL_OP3_404_69885_20150503_011226_inLine +BABEL_OP3_404_69885_20150503_011226_outLine +BABEL_OP3_404_69937_20150620_015912_inLine +BABEL_OP3_404_69964_20150524_015556_inLine +BABEL_OP3_404_69964_20150524_015556_outLine +BABEL_OP3_404_69982_20150625_035440_outLine +BABEL_OP3_404_70121_20141104_202610_inLine +BABEL_OP3_404_70121_20141104_202610_outLine +BABEL_OP3_404_70221_20141124_052004_inLine +BABEL_OP3_404_70221_20141124_052004_outLine +BABEL_OP3_404_70282_20141111_000251_inLine +BABEL_OP3_404_70282_20141111_000251_outLine +BABEL_OP3_404_70460_20150527_015340_inLine +BABEL_OP3_404_70460_20150527_015340_outLine +BABEL_OP3_404_70526_20150501_015444_inLine +BABEL_OP3_404_70526_20150501_015444_outLine +BABEL_OP3_404_70713_20150527_013058_inLine +BABEL_OP3_404_70713_20150527_013058_outLine +BABEL_OP3_404_70794_20141021_185105_inLine +BABEL_OP3_404_70794_20141021_185105_outLine +BABEL_OP3_404_71189_20150523_005918_inLine +BABEL_OP3_404_71189_20150523_005918_outLine +BABEL_OP3_404_71263_20141119_234747_inLine +BABEL_OP3_404_71263_20141119_234747_outLine +BABEL_OP3_404_71278_20150211_052730_inLine +BABEL_OP3_404_71278_20150211_052730_outLine +BABEL_OP3_404_71278_20150211_054040_inLine +BABEL_OP3_404_71278_20150211_054040_outLine +BABEL_OP3_404_71333_20141102_023503_inLine +BABEL_OP3_404_71333_20141102_023503_outLine +BABEL_OP3_404_71401_20150206_070446_inLine +BABEL_OP3_404_71401_20150206_070446_outLine +BABEL_OP3_404_71404_20141023_215509_inLine +BABEL_OP3_404_71404_20141023_215509_outLine +BABEL_OP3_404_71460_20150206_015309_outLine +BABEL_OP3_404_71559_20141210_220929_outLine +BABEL_OP3_404_71566_20141130_035713_inLine +BABEL_OP3_404_71566_20141130_035713_outLine +BABEL_OP3_404_71566_20141130_040359_inLine +BABEL_OP3_404_71566_20141130_040359_outLine +BABEL_OP3_404_71780_20141105_055543_inLine +BABEL_OP3_404_71780_20141105_055543_outLine +BABEL_OP3_404_72319_20150502_041426_inLine +BABEL_OP3_404_72319_20150502_041426_outLine +BABEL_OP3_404_72733_20150515_044419_inLine +BABEL_OP3_404_72733_20150515_044419_outLine +BABEL_OP3_404_73072_20141012_012029_inLine +BABEL_OP3_404_73072_20141012_012029_outLine +BABEL_OP3_404_73119_20141026_232203_inLine +BABEL_OP3_404_73119_20141026_232203_outLine +BABEL_OP3_404_73258_20141117_010123_inLine +BABEL_OP3_404_73258_20141117_010123_outLine +BABEL_OP3_404_73485_20150512_234636_inLine +BABEL_OP3_404_73485_20150512_234636_outLine +BABEL_OP3_404_73964_20150512_205010_inLine +BABEL_OP3_404_73964_20150512_205010_outLine +BABEL_OP3_404_74641_20141108_223951_inLine +BABEL_OP3_404_74641_20141108_223951_outLine +BABEL_OP3_404_74728_20150503_042547_inLine +BABEL_OP3_404_74728_20150503_042547_outLine +BABEL_OP3_404_74799_20141109_222638_inLine +BABEL_OP3_404_74799_20141109_222638_outLine +BABEL_OP3_404_75465_20141129_223330_outLine +BABEL_OP3_404_75869_20150527_230650_inLine +BABEL_OP3_404_75869_20150527_230650_outLine +BABEL_OP3_404_75975_20150127_051140_outLine +BABEL_OP3_404_76126_20141201_202238_inLine +BABEL_OP3_404_76126_20141201_202238_outLine +BABEL_OP3_404_76238_20141129_223455_inLine +BABEL_OP3_404_76238_20141129_223455_outLine +BABEL_OP3_404_76372_20150601_014341_inLine +BABEL_OP3_404_76372_20150601_014341_outLine +BABEL_OP3_404_76437_20141019_202715_inLine +BABEL_OP3_404_76437_20141019_202715_outLine +BABEL_OP3_404_76444_20141127_032124_inLine +BABEL_OP3_404_76444_20141127_032124_outLine +BABEL_OP3_404_76482_20150618_063131_outLine +BABEL_OP3_404_76683_20141110_191551_inLine +BABEL_OP3_404_76683_20141110_191551_outLine +BABEL_OP3_404_76837_20150124_222250_outLine +BABEL_OP3_404_76970_20150625_191722_inLine +BABEL_OP3_404_77126_20141022_202348_inLine +BABEL_OP3_404_77126_20141022_202348_outLine +BABEL_OP3_404_77146_20141019_060916_inLine +BABEL_OP3_404_77242_20150612_024655_inLine +BABEL_OP3_404_77391_20141026_222314_inLine +BABEL_OP3_404_77391_20141026_222314_outLine +BABEL_OP3_404_77427_20141030_192713_inLine +BABEL_OP3_404_77427_20141030_192713_outLine +BABEL_OP3_404_77567_20141021_021210_inLine +BABEL_OP3_404_77567_20141021_021210_outLine +BABEL_OP3_404_77730_20141014_201059_inLine +BABEL_OP3_404_77730_20141014_201059_outLine +BABEL_OP3_404_77803_20141020_030844_inLine +BABEL_OP3_404_77803_20141020_030844_outLine +BABEL_OP3_404_77990_20141024_215822_inLine +BABEL_OP3_404_77990_20141024_215822_outLine +BABEL_OP3_404_78016_20141029_233059_inLine +BABEL_OP3_404_78016_20141029_233059_outLine +BABEL_OP3_404_78254_20141025_202742_inLine +BABEL_OP3_404_78254_20141025_202742_outLine +BABEL_OP3_404_78254_20141025_204922_inLine +BABEL_OP3_404_78254_20141025_204922_outLine +BABEL_OP3_404_78454_20141115_043455_inLine +BABEL_OP3_404_78749_20150620_025728_inLine +BABEL_OP3_404_78749_20150620_025728_outLine +BABEL_OP3_404_78976_20141025_183704_inLine +BABEL_OP3_404_78976_20141025_183704_outLine +BABEL_OP3_404_79190_20141108_232204_inLine +BABEL_OP3_404_79190_20141108_232204_outLine +BABEL_OP3_404_79590_20141129_025808_outLine +BABEL_OP3_404_79751_20141101_232250_inLine +BABEL_OP3_404_79751_20141101_232250_outLine +BABEL_OP3_404_79820_20141104_045340_inLine +BABEL_OP3_404_79820_20141104_045340_outLine +BABEL_OP3_404_79858_20141015_200446_inLine +BABEL_OP3_404_79898_20150620_022648_inLine +BABEL_OP3_404_79898_20150620_022648_outLine +BABEL_OP3_404_79898_20150620_024014_inLine +BABEL_OP3_404_79898_20150620_024014_outLine +BABEL_OP3_404_80069_20150614_233606_inLine +BABEL_OP3_404_80069_20150614_233606_outLine +BABEL_OP3_404_80306_20141119_003833_inLine +BABEL_OP3_404_80306_20141119_003833_outLine +BABEL_OP3_404_80306_20141119_005121_inLine +BABEL_OP3_404_80306_20141119_005121_outLine +BABEL_OP3_404_80439_20141026_005410_inLine +BABEL_OP3_404_80439_20141026_005410_outLine +BABEL_OP3_404_80559_20141022_010255_inLine +BABEL_OP3_404_80655_20150525_221544_inLine +BABEL_OP3_404_80655_20150525_221544_outLine +BABEL_OP3_404_80897_20141119_233718_inLine +BABEL_OP3_404_80897_20141119_233718_outLine +BABEL_OP3_404_81149_20150525_003741_inLine +BABEL_OP3_404_81149_20150525_003741_outLine +BABEL_OP3_404_81213_20141102_205052_inLine +BABEL_OP3_404_81213_20141102_205052_outLine +BABEL_OP3_404_81229_20141117_041745_inLine +BABEL_OP3_404_81229_20141117_041745_outLine +BABEL_OP3_404_81427_20141030_015136_inLine +BABEL_OP3_404_81427_20141030_015136_outLine +BABEL_OP3_404_81854_20150610_060437_inLine +BABEL_OP3_404_82089_20141117_045302_inLine +BABEL_OP3_404_82089_20141117_045302_outLine +BABEL_OP3_404_82303_20150614_024236_inLine +BABEL_OP3_404_82303_20150614_024236_outLine +BABEL_OP3_404_82473_20141026_060037_inLine +BABEL_OP3_404_82473_20141026_060037_outLine +BABEL_OP3_404_82626_20150615_014517_inLine +BABEL_OP3_404_82637_20141021_010105_inLine +BABEL_OP3_404_82637_20141021_010105_outLine +BABEL_OP3_404_82742_20141201_234306_inLine +BABEL_OP3_404_82742_20141201_234306_outLine +BABEL_OP3_404_82863_20141119_044230_inLine +BABEL_OP3_404_82863_20141119_044230_outLine +BABEL_OP3_404_83238_20141119_180953_inLine +BABEL_OP3_404_83238_20141119_180953_outLine +BABEL_OP3_404_83366_20141120_192208_inLine +BABEL_OP3_404_83366_20141120_192208_outLine +BABEL_OP3_404_83651_20141102_170912_inLine +BABEL_OP3_404_83651_20141102_170912_outLine +BABEL_OP3_404_83771_20150604_012300_outLine +BABEL_OP3_404_83851_20141028_203735_inLine +BABEL_OP3_404_83851_20141028_203735_outLine +BABEL_OP3_404_83929_20141018_184023_inLine +BABEL_OP3_404_83929_20141018_184023_outLine +BABEL_OP3_404_83974_20150617_022055_inLine +BABEL_OP3_404_84055_20150504_002015_inLine +BABEL_OP3_404_84055_20150504_002015_outLine +BABEL_OP3_404_84061_20141030_205021_inLine +BABEL_OP3_404_84061_20141030_205021_outLine +BABEL_OP3_404_84125_20141018_023340_inLine +BABEL_OP3_404_84125_20141018_023340_outLine +BABEL_OP3_404_84458_20141130_053628_outLine +BABEL_OP3_404_84815_20141127_011952_inLine +BABEL_OP3_404_84815_20141127_013345_inLine +BABEL_OP3_404_85047_20141117_014630_inLine +BABEL_OP3_404_85047_20141117_014630_outLine +BABEL_OP3_404_85048_20141127_023704_inLine +BABEL_OP3_404_85048_20141127_023704_outLine +BABEL_OP3_404_85254_20150620_035606_inLine +BABEL_OP3_404_85254_20150620_035606_outLine +BABEL_OP3_404_85322_20141008_235518_inLine +BABEL_OP3_404_85322_20141008_235518_outLine +BABEL_OP3_404_85340_20141103_022707_inLine +BABEL_OP3_404_85340_20141103_022707_outLine +BABEL_OP3_404_85651_20141211_032650_inLine +BABEL_OP3_404_85651_20141211_032650_outLine +BABEL_OP3_404_86472_20141201_011325_inLine +BABEL_OP3_404_86472_20141201_011325_outLine +BABEL_OP3_404_86597_20150612_170328_inLine +BABEL_OP3_404_86597_20150612_170328_outLine +BABEL_OP3_404_86635_20141127_204158_inLine +BABEL_OP3_404_86635_20141127_204158_outLine +BABEL_OP3_404_86722_20141029_192140_inLine +BABEL_OP3_404_86722_20141029_192140_outLine +BABEL_OP3_404_87074_20141105_190107_outLine +BABEL_OP3_404_87470_20141114_214639_inLine +BABEL_OP3_404_87470_20141114_214639_outLine +BABEL_OP3_404_87629_20141127_020403_inLine +BABEL_OP3_404_87629_20141127_020403_outLine +BABEL_OP3_404_87777_20141127_040747_inLine +BABEL_OP3_404_87777_20141127_040747_outLine +BABEL_OP3_404_87871_20141201_023608_inLine +BABEL_OP3_404_87871_20141201_023608_outLine +BABEL_OP3_404_87921_20141201_023029_inLine +BABEL_OP3_404_87921_20141201_023029_outLine +BABEL_OP3_404_88260_20141103_234824_inLine +BABEL_OP3_404_88260_20141103_234824_outLine +BABEL_OP3_404_88445_20141119_043713_inLine +BABEL_OP3_404_88445_20141119_043713_outLine +BABEL_OP3_404_88661_20141127_025208_inLine +BABEL_OP3_404_88661_20141127_025208_outLine +BABEL_OP3_404_88669_20141119_000147_inLine +BABEL_OP3_404_88669_20141119_000147_outLine +BABEL_OP3_404_88783_20141201_045305_inLine +BABEL_OP3_404_88783_20141201_045305_outLine +BABEL_OP3_404_88873_20141028_190127_inLine +BABEL_OP3_404_88873_20141028_190127_outLine +BABEL_OP3_404_89045_20141022_193202_inLine +BABEL_OP3_404_89045_20141022_193202_outLine +BABEL_OP3_404_89330_20150616_002908_inLine +BABEL_OP3_404_89330_20150616_002908_outLine +BABEL_OP3_404_89372_20141010_000950_inLine +BABEL_OP3_404_89372_20141010_000950_outLine +BABEL_OP3_404_89650_20150220_222402_inLine +BABEL_OP3_404_89650_20150220_222402_outLine +BABEL_OP3_404_89650_20150220_224606_inLine +BABEL_OP3_404_89650_20150220_224606_outLine +BABEL_OP3_404_89665_20141103_202723_inLine +BABEL_OP3_404_89665_20141103_202723_outLine +BABEL_OP3_404_89943_20141105_211847_outLine +BABEL_OP3_404_90347_20141119_012016_inLine +BABEL_OP3_404_90347_20141119_012016_outLine +BABEL_OP3_404_90760_20150611_151739_inLine +BABEL_OP3_404_90760_20150611_151739_outLine +BABEL_OP3_404_90832_20150616_012728_inLine +BABEL_OP3_404_90832_20150616_012728_outLine +BABEL_OP3_404_90930_20150119_021352_inLine +BABEL_OP3_404_90930_20150119_021352_outLine +BABEL_OP3_404_91383_20150618_035815_inLine +BABEL_OP3_404_91463_20141116_023036_inLine +BABEL_OP3_404_91463_20141116_023036_outLine +BABEL_OP3_404_91475_20150614_034536_inLine +BABEL_OP3_404_91581_20141129_045608_inLine +BABEL_OP3_404_91581_20141129_045608_outLine +BABEL_OP3_404_91581_20141129_050730_inLine +BABEL_OP3_404_91581_20141129_050730_outLine +BABEL_OP3_404_91593_20150611_021825_inLine +BABEL_OP3_404_91593_20150611_021825_outLine +BABEL_OP3_404_91825_20141009_181224_inLine +BABEL_OP3_404_91825_20141009_181224_outLine +BABEL_OP3_404_91825_20141009_183843_inLine +BABEL_OP3_404_91825_20141009_183843_outLine +BABEL_OP3_404_91884_20150503_022858_inLine +BABEL_OP3_404_91884_20150503_022858_outLine +BABEL_OP3_404_91888_20150512_191012_inLine +BABEL_OP3_404_91888_20150512_191012_outLine +BABEL_OP3_404_91891_20141129_005825_inLine +BABEL_OP3_404_91891_20141129_005825_outLine +BABEL_OP3_404_91944_20141022_021002_inLine +BABEL_OP3_404_91971_20150217_041455_inLine +BABEL_OP3_404_91971_20150217_041455_outLine +BABEL_OP3_404_91977_20141122_230420_outLine +BABEL_OP3_404_92176_20141119_195614_inLine +BABEL_OP3_404_92176_20141119_195614_outLine +BABEL_OP3_404_92281_20150625_185123_inLine +BABEL_OP3_404_92698_20141117_072302_inLine +BABEL_OP3_404_92698_20141117_072302_outLine +BABEL_OP3_404_92736_20141201_011442_inLine +BABEL_OP3_404_92736_20141201_011442_outLine +BABEL_OP3_404_92757_20150525_200048_inLine +BABEL_OP3_404_92757_20150525_200048_outLine +BABEL_OP3_404_92792_20150503_182854_outLine +BABEL_OP3_404_92792_20150525_025523_outLine +BABEL_OP3_404_92942_20141120_022830_inLine +BABEL_OP3_404_92942_20141120_022830_outLine +BABEL_OP3_404_93007_20150615_051230_inLine +BABEL_OP3_404_93007_20150615_051230_outLine +BABEL_OP3_404_93858_20150611_043732_inLine +BABEL_OP3_404_94002_20141119_015307_inLine +BABEL_OP3_404_94002_20141119_015307_outLine +BABEL_OP3_404_94025_20141129_180207_inLine +BABEL_OP3_404_94025_20141129_180207_outLine +BABEL_OP3_404_94333_20141020_024439_outLine +BABEL_OP3_404_94487_20150518_005132_outLine +BABEL_OP3_404_94869_20141007_194254_inLine +BABEL_OP3_404_94869_20141007_194254_outLine +BABEL_OP3_404_95077_20141201_055702_outLine +BABEL_OP3_404_95269_20141105_221810_inLine +BABEL_OP3_404_95269_20141105_221810_outLine +BABEL_OP3_404_95338_20150610_211203_inLine +BABEL_OP3_404_95338_20150610_211203_outLine +BABEL_OP3_404_95399_20141119_001023_inLine +BABEL_OP3_404_95399_20141119_001023_outLine +BABEL_OP3_404_95583_20141019_010741_inLine +BABEL_OP3_404_95583_20141019_010741_outLine +BABEL_OP3_404_96059_20150524_042224_outLine +BABEL_OP3_404_96205_20141119_033053_inLine +BABEL_OP3_404_96205_20141119_033053_outLine +BABEL_OP3_404_96205_20141119_034909_inLine +BABEL_OP3_404_96205_20141119_034909_outLine +BABEL_OP3_404_96247_20150526_202623_outLine +BABEL_OP3_404_96376_20150503_033706_inLine +BABEL_OP3_404_96376_20150503_033706_outLine +BABEL_OP3_404_96504_20141103_031329_inLine +BABEL_OP3_404_96504_20141103_031329_outLine +BABEL_OP3_404_96690_20141117_053054_inLine +BABEL_OP3_404_96690_20141117_053054_outLine +BABEL_OP3_404_96808_20150609_034129_inLine +BABEL_OP3_404_97097_20150601_042649_outLine +BABEL_OP3_404_97136_20150528_011250_inLine +BABEL_OP3_404_97136_20150528_011250_outLine +BABEL_OP3_404_97557_20141119_230718_inLine +BABEL_OP3_404_97557_20141119_230718_outLine +BABEL_OP3_404_97588_20141018_234016_inLine +BABEL_OP3_404_97588_20141018_234016_outLine +BABEL_OP3_404_97588_20141018_235425_inLine +BABEL_OP3_404_97588_20141018_235425_outLine +BABEL_OP3_404_97896_20141116_221329_inLine +BABEL_OP3_404_97896_20141116_221329_outLine +BABEL_OP3_404_97911_20150613_195820_outLine +BABEL_OP3_404_97988_20141201_030306_inLine +BABEL_OP3_404_97988_20141201_030306_outLine +BABEL_OP3_404_98165_20141030_214051_inLine +BABEL_OP3_404_98165_20141030_214051_outLine +BABEL_OP3_404_98192_20150617_021906_outLine +BABEL_OP3_404_98489_20141102_002030_inLine +BABEL_OP3_404_98489_20141102_004054_inLine +BABEL_OP3_404_98678_20150528_021605_inLine +BABEL_OP3_404_98678_20150528_023029_inLine +BABEL_OP3_404_98888_20141113_212715_inLine +BABEL_OP3_404_98888_20141113_212715_outLine +BABEL_OP3_404_99202_20141108_210814_inLine +BABEL_OP3_404_99202_20141108_210814_outLine +BABEL_OP3_404_99289_20150521_220314_inLine +BABEL_OP3_404_99289_20150521_220314_outLine +BABEL_OP3_404_99289_20150521_222144_inLine +BABEL_OP3_404_99289_20150521_222144_outLine +BABEL_OP3_404_99594_20141105_194545_inLine +BABEL_OP3_404_99594_20141105_194545_outLine +BABEL_OP3_404_99718_20141019_051850_inLine +BABEL_OP3_404_99718_20141019_051850_outLine +BABEL_OP3_404_99718_20141019_053305_inLine +BABEL_OP3_404_99718_20141019_053305_outLine +BABEL_OP3_404_99732_20141130_232553_inLine +BABEL_OP3_404_99732_20141130_232553_outLine +BABEL_OP3_404_99813_20141120_025129_inLine +BABEL_OP3_404_99813_20141120_025129_outLine +BABEL_OP3_404_99920_20141022_052026_inLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/training.list b/egs/babel/s5d/conf/lists/404-georgian/training.list new file mode 100644 index 00000000000..efc0afb8219 --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/training.list @@ -0,0 +1,518 @@ +BABEL_OP3_404_10019_20141101_191932_inLine +BABEL_OP3_404_10019_20141101_191932_outLine +BABEL_OP3_404_10416_20141117_064700_inLine +BABEL_OP3_404_10416_20141117_064700_outLine +BABEL_OP3_404_10647_20150514_001106_inLine +BABEL_OP3_404_10647_20150514_001106_outLine +BABEL_OP3_404_10974_20141119_205506_inLine +BABEL_OP3_404_10974_20141119_205506_outLine +BABEL_OP3_404_11663_20141118_032146_inLine +BABEL_OP3_404_11663_20141118_032146_outLine +BABEL_OP3_404_11673_20141023_035438_inLine +BABEL_OP3_404_11673_20141023_035438_outLine +BABEL_OP3_404_11681_20141107_190101_inLine +BABEL_OP3_404_11681_20141107_190101_outLine +BABEL_OP3_404_12242_20141028_021853_inLine +BABEL_OP3_404_12242_20141028_021853_outLine +BABEL_OP3_404_13030_20141101_200709_inLine +BABEL_OP3_404_13030_20141101_200709_outLine +BABEL_OP3_404_13178_20141129_192909_inLine +BABEL_OP3_404_13178_20141129_192909_outLine +BABEL_OP3_404_13324_20141022_200257_inLine +BABEL_OP3_404_13324_20141022_200257_outLine +BABEL_OP3_404_13664_20141012_013523_inLine +BABEL_OP3_404_13664_20141012_013523_outLine +BABEL_OP3_404_13709_20150512_015216_inLine +BABEL_OP3_404_13709_20150512_015216_outLine +BABEL_OP3_404_14137_20141025_202817_inLine +BABEL_OP3_404_14137_20141025_202817_outLine +BABEL_OP3_404_14229_20141029_200136_inLine +BABEL_OP3_404_14229_20141029_200136_outLine +BABEL_OP3_404_14237_20141006_171921_inLine +BABEL_OP3_404_14237_20141006_171921_outLine +BABEL_OP3_404_14440_20141127_213106_inLine +BABEL_OP3_404_14440_20141127_213106_outLine +BABEL_OP3_404_14807_20141110_231934_inLine +BABEL_OP3_404_14807_20141110_231934_outLine +BABEL_OP3_404_14875_20141026_230227_inLine +BABEL_OP3_404_14875_20141026_230227_outLine +BABEL_OP3_404_14899_20141022_202217_inLine +BABEL_OP3_404_14899_20141022_202217_outLine +BABEL_OP3_404_14929_20141129_192841_inLine +BABEL_OP3_404_14929_20141129_192841_outLine +BABEL_OP3_404_15382_20141130_213942_inLine +BABEL_OP3_404_15382_20141130_213942_outLine +BABEL_OP3_404_15848_20141006_231138_inLine +BABEL_OP3_404_15848_20141006_231138_outLine +BABEL_OP3_404_15869_20150218_225936_inLine +BABEL_OP3_404_15869_20150218_225936_outLine +BABEL_OP3_404_16149_20141010_173548_inLine +BABEL_OP3_404_16149_20141010_173548_outLine +BABEL_OP3_404_16467_20141130_014316_inLine +BABEL_OP3_404_16467_20141130_014316_outLine +BABEL_OP3_404_16467_20141130_015010_inLine +BABEL_OP3_404_16467_20141130_015010_outLine +BABEL_OP3_404_17113_20150611_050102_inLine +BABEL_OP3_404_17113_20150611_050102_outLine +BABEL_OP3_404_17280_20141103_190330_inLine +BABEL_OP3_404_17280_20141103_190330_outLine +BABEL_OP3_404_17615_20141201_025917_inLine +BABEL_OP3_404_17615_20141201_025917_outLine +BABEL_OP3_404_19134_20141120_053128_inLine +BABEL_OP3_404_19134_20141120_053128_outLine +BABEL_OP3_404_19703_20141027_004315_inLine +BABEL_OP3_404_19703_20141027_004315_outLine +BABEL_OP3_404_20133_20141010_195231_inLine +BABEL_OP3_404_20133_20141010_195231_outLine +BABEL_OP3_404_20985_20141126_183236_inLine +BABEL_OP3_404_20985_20141126_183236_outLine +BABEL_OP3_404_21004_20141201_035831_inLine +BABEL_OP3_404_21004_20141201_035831_outLine +BABEL_OP3_404_22280_20141111_020522_inLine +BABEL_OP3_404_22280_20141111_020522_outLine +BABEL_OP3_404_23046_20141031_030755_inLine +BABEL_OP3_404_23046_20141031_030755_outLine +BABEL_OP3_404_23505_20141021_032033_inLine +BABEL_OP3_404_23505_20141021_032033_outLine +BABEL_OP3_404_23731_20141130_033602_inLine +BABEL_OP3_404_23731_20141130_033602_outLine +BABEL_OP3_404_23980_20141106_225951_inLine +BABEL_OP3_404_23980_20141106_225951_outLine +BABEL_OP3_404_24270_20141111_012902_inLine +BABEL_OP3_404_24270_20141111_012902_outLine +BABEL_OP3_404_24470_20141111_184651_inLine +BABEL_OP3_404_24470_20141111_184651_outLine +BABEL_OP3_404_24470_20141111_190229_inLine +BABEL_OP3_404_24470_20141111_190229_outLine +BABEL_OP3_404_24532_20141007_211325_inLine +BABEL_OP3_404_24532_20141007_211325_outLine +BABEL_OP3_404_24589_20141031_020641_inLine +BABEL_OP3_404_24589_20141031_020641_outLine +BABEL_OP3_404_24679_20141018_015615_inLine +BABEL_OP3_404_24679_20141018_015615_outLine +BABEL_OP3_404_24982_20141102_021352_inLine +BABEL_OP3_404_24982_20141102_021352_outLine +BABEL_OP3_404_26388_20141026_014207_inLine +BABEL_OP3_404_26388_20141026_014207_outLine +BABEL_OP3_404_27042_20141201_215107_inLine +BABEL_OP3_404_27042_20141201_215107_outLine +BABEL_OP3_404_28303_20141028_182204_inLine +BABEL_OP3_404_28303_20141028_182204_outLine +BABEL_OP3_404_28522_20141124_222758_inLine +BABEL_OP3_404_28522_20141124_222758_outLine +BABEL_OP3_404_28538_20141119_005526_inLine +BABEL_OP3_404_28538_20141119_005526_outLine +BABEL_OP3_404_28871_20141019_181913_inLine +BABEL_OP3_404_28871_20141019_181913_outLine +BABEL_OP3_404_29039_20141128_035839_inLine +BABEL_OP3_404_29039_20141128_035839_outLine +BABEL_OP3_404_29208_20141106_013309_inLine +BABEL_OP3_404_29208_20141106_013309_outLine +BABEL_OP3_404_30098_20150610_150504_inLine +BABEL_OP3_404_30098_20150610_150504_outLine +BABEL_OP3_404_30432_20141126_052839_inLine +BABEL_OP3_404_30432_20141126_052839_outLine +BABEL_OP3_404_30461_20150620_020316_inLine +BABEL_OP3_404_30461_20150620_020316_outLine +BABEL_OP3_404_31624_20141105_214349_inLine +BABEL_OP3_404_31624_20141105_214349_outLine +BABEL_OP3_404_31979_20141106_000523_inLine +BABEL_OP3_404_31979_20141106_000523_outLine +BABEL_OP3_404_31992_20141014_221817_inLine +BABEL_OP3_404_31992_20141014_221817_outLine +BABEL_OP3_404_32122_20141115_022841_inLine +BABEL_OP3_404_32122_20141115_022841_outLine +BABEL_OP3_404_32287_20150210_060823_inLine +BABEL_OP3_404_32287_20150210_060823_outLine +BABEL_OP3_404_32708_20141106_032826_inLine +BABEL_OP3_404_32708_20141106_032826_outLine +BABEL_OP3_404_32727_20141128_203500_inLine +BABEL_OP3_404_32727_20141128_203500_outLine +BABEL_OP3_404_32727_20141128_204751_inLine +BABEL_OP3_404_32727_20141128_204751_outLine +BABEL_OP3_404_33355_20141019_032024_inLine +BABEL_OP3_404_33355_20141019_032024_outLine +BABEL_OP3_404_33355_20141019_034109_inLine +BABEL_OP3_404_33355_20141019_034109_outLine +BABEL_OP3_404_33704_20141207_073436_inLine +BABEL_OP3_404_33704_20141207_073436_outLine +BABEL_OP3_404_34679_20141102_052808_inLine +BABEL_OP3_404_34679_20141102_052808_outLine +BABEL_OP3_404_34688_20141009_073303_inLine +BABEL_OP3_404_34688_20141009_073303_outLine +BABEL_OP3_404_35143_20141130_181111_inLine +BABEL_OP3_404_35143_20141130_181111_outLine +BABEL_OP3_404_37064_20141102_063308_inLine +BABEL_OP3_404_37064_20141102_063308_outLine +BABEL_OP3_404_37281_20141119_053453_inLine +BABEL_OP3_404_37281_20141119_053453_outLine +BABEL_OP3_404_37598_20141119_045926_inLine +BABEL_OP3_404_37598_20141119_045926_outLine +BABEL_OP3_404_37682_20141101_221445_inLine +BABEL_OP3_404_37682_20141101_221445_outLine +BABEL_OP3_404_37853_20150602_030625_inLine +BABEL_OP3_404_37853_20150602_030625_outLine +BABEL_OP3_404_38588_20141118_163844_inLine +BABEL_OP3_404_38588_20141118_163844_outLine +BABEL_OP3_404_40557_20141127_200639_inLine +BABEL_OP3_404_40557_20141127_200639_outLine +BABEL_OP3_404_40713_20141028_221207_inLine +BABEL_OP3_404_40713_20141028_221207_outLine +BABEL_OP3_404_40939_20150210_212748_inLine +BABEL_OP3_404_40939_20150210_212748_outLine +BABEL_OP3_404_41100_20141021_022126_inLine +BABEL_OP3_404_41100_20141021_022126_outLine +BABEL_OP3_404_41609_20141009_013405_inLine +BABEL_OP3_404_41609_20141009_013405_outLine +BABEL_OP3_404_41680_20141012_040411_inLine +BABEL_OP3_404_41680_20141012_040411_outLine +BABEL_OP3_404_41920_20141008_040539_inLine +BABEL_OP3_404_41920_20141008_040539_outLine +BABEL_OP3_404_41958_20141029_212755_inLine +BABEL_OP3_404_41958_20141029_212755_outLine +BABEL_OP3_404_42877_20150212_052937_inLine +BABEL_OP3_404_42877_20150212_052937_outLine +BABEL_OP3_404_43368_20141031_010629_inLine +BABEL_OP3_404_43368_20141031_010629_outLine +BABEL_OP3_404_44114_20150614_012319_inLine +BABEL_OP3_404_44114_20150614_012319_outLine +BABEL_OP3_404_44477_20141201_180604_inLine +BABEL_OP3_404_44477_20141201_180604_outLine +BABEL_OP3_404_44847_20141130_221248_inLine +BABEL_OP3_404_44847_20141130_221248_outLine +BABEL_OP3_404_45121_20150609_055234_inLine +BABEL_OP3_404_45121_20150609_055234_outLine +BABEL_OP3_404_45560_20141012_030417_inLine +BABEL_OP3_404_45560_20141012_030417_outLine +BABEL_OP3_404_46169_20141130_224339_inLine +BABEL_OP3_404_46169_20141130_224339_outLine +BABEL_OP3_404_46268_20141019_032022_inLine +BABEL_OP3_404_46268_20141019_032022_outLine +BABEL_OP3_404_46550_20141105_072519_inLine +BABEL_OP3_404_46550_20141105_072519_outLine +BABEL_OP3_404_46625_20141011_040505_inLine +BABEL_OP3_404_46625_20141011_040505_outLine +BABEL_OP3_404_46681_20141021_040451_inLine +BABEL_OP3_404_46681_20141021_040451_outLine +BABEL_OP3_404_46881_20141012_020055_inLine +BABEL_OP3_404_46881_20141012_020055_outLine +BABEL_OP3_404_46976_20141107_183806_inLine +BABEL_OP3_404_46976_20141107_183806_outLine +BABEL_OP3_404_47270_20150512_053415_inLine +BABEL_OP3_404_47270_20150512_053415_outLine +BABEL_OP3_404_47802_20141110_200430_inLine +BABEL_OP3_404_47802_20141110_200430_outLine +BABEL_OP3_404_48243_20141023_200903_inLine +BABEL_OP3_404_48243_20141023_200903_outLine +BABEL_OP3_404_48844_20141020_065414_inLine +BABEL_OP3_404_48844_20141020_065414_outLine +BABEL_OP3_404_49197_20141117_024730_inLine +BABEL_OP3_404_49197_20141117_024730_outLine +BABEL_OP3_404_49768_20141026_022902_inLine +BABEL_OP3_404_49768_20141026_022902_outLine +BABEL_OP3_404_49902_20141101_175534_inLine +BABEL_OP3_404_49902_20141101_175534_outLine +BABEL_OP3_404_49907_20141103_050534_inLine +BABEL_OP3_404_49907_20141103_050534_outLine +BABEL_OP3_404_50175_20141021_025726_inLine +BABEL_OP3_404_50175_20141021_025726_outLine +BABEL_OP3_404_50745_20150513_162805_inLine +BABEL_OP3_404_50745_20150513_162805_outLine +BABEL_OP3_404_51015_20141123_193824_inLine +BABEL_OP3_404_51015_20141123_193824_outLine +BABEL_OP3_404_52246_20141118_035022_inLine +BABEL_OP3_404_52246_20141118_035022_outLine +BABEL_OP3_404_52246_20141118_040850_inLine +BABEL_OP3_404_52246_20141118_040850_outLine +BABEL_OP3_404_52301_20141009_051739_inLine +BABEL_OP3_404_52301_20141009_051739_outLine +BABEL_OP3_404_52301_20141009_054049_inLine +BABEL_OP3_404_52301_20141009_054049_outLine +BABEL_OP3_404_52490_20141016_020323_inLine +BABEL_OP3_404_52490_20141016_020323_outLine +BABEL_OP3_404_52725_20150522_222730_inLine +BABEL_OP3_404_52725_20150522_222730_outLine +BABEL_OP3_404_54104_20141008_214620_inLine +BABEL_OP3_404_54104_20141008_214620_outLine +BABEL_OP3_404_54160_20141009_180704_inLine +BABEL_OP3_404_54160_20141009_180704_outLine +BABEL_OP3_404_54160_20141009_184719_inLine +BABEL_OP3_404_54160_20141009_184719_outLine +BABEL_OP3_404_54160_20141009_185557_inLine +BABEL_OP3_404_54160_20141009_185557_outLine +BABEL_OP3_404_54405_20141117_054820_inLine +BABEL_OP3_404_54405_20141117_054820_outLine +BABEL_OP3_404_54744_20141015_012011_inLine +BABEL_OP3_404_54744_20141015_012011_outLine +BABEL_OP3_404_55259_20141029_225631_inLine +BABEL_OP3_404_55259_20141029_225631_outLine +BABEL_OP3_404_56213_20141201_000837_inLine +BABEL_OP3_404_56213_20141201_000837_outLine +BABEL_OP3_404_57654_20141023_235628_inLine +BABEL_OP3_404_57654_20141023_235628_outLine +BABEL_OP3_404_57678_20141104_023128_inLine +BABEL_OP3_404_57678_20141104_023128_outLine +BABEL_OP3_404_57919_20150127_041057_inLine +BABEL_OP3_404_57919_20150127_041057_outLine +BABEL_OP3_404_58103_20141030_002209_inLine +BABEL_OP3_404_58103_20141030_002209_outLine +BABEL_OP3_404_59078_20141111_004941_inLine +BABEL_OP3_404_59078_20141111_004941_outLine +BABEL_OP3_404_59262_20141130_212633_inLine +BABEL_OP3_404_59262_20141130_212633_outLine +BABEL_OP3_404_59720_20141029_204612_inLine +BABEL_OP3_404_59720_20141029_204612_outLine +BABEL_OP3_404_60026_20141008_051633_inLine +BABEL_OP3_404_60026_20141008_051633_outLine +BABEL_OP3_404_60474_20141029_182816_inLine +BABEL_OP3_404_60474_20141029_182816_outLine +BABEL_OP3_404_60626_20141028_212539_inLine +BABEL_OP3_404_60626_20141028_212539_outLine +BABEL_OP3_404_61167_20141030_222711_inLine +BABEL_OP3_404_61167_20141030_222711_outLine +BABEL_OP3_404_61219_20141025_193634_inLine +BABEL_OP3_404_61219_20141025_193634_outLine +BABEL_OP3_404_61225_20141009_174003_inLine +BABEL_OP3_404_61225_20141009_174003_outLine +BABEL_OP3_404_61678_20141019_201928_inLine +BABEL_OP3_404_61678_20141019_201928_outLine +BABEL_OP3_404_61873_20141108_214852_inLine +BABEL_OP3_404_61873_20141108_214852_outLine +BABEL_OP3_404_62155_20150522_032307_inLine +BABEL_OP3_404_62155_20150522_032307_outLine +BABEL_OP3_404_62286_20141105_204359_inLine +BABEL_OP3_404_62286_20141105_204359_outLine +BABEL_OP3_404_62456_20141108_202333_inLine +BABEL_OP3_404_62456_20141108_202333_outLine +BABEL_OP3_404_62714_20150522_011337_inLine +BABEL_OP3_404_62714_20150522_011337_outLine +BABEL_OP3_404_62734_20141029_221513_inLine +BABEL_OP3_404_62734_20141029_221513_outLine +BABEL_OP3_404_63081_20141021_032233_inLine +BABEL_OP3_404_63081_20141021_032233_outLine +BABEL_OP3_404_63081_20141021_033457_inLine +BABEL_OP3_404_63081_20141021_033457_outLine +BABEL_OP3_404_63084_20141130_221452_inLine +BABEL_OP3_404_63084_20141130_221452_outLine +BABEL_OP3_404_63220_20141127_033605_inLine +BABEL_OP3_404_63220_20141127_033605_outLine +BABEL_OP3_404_63757_20141111_180721_inLine +BABEL_OP3_404_63757_20141111_180721_outLine +BABEL_OP3_404_64494_20141026_203549_inLine +BABEL_OP3_404_64494_20141026_203549_outLine +BABEL_OP3_404_64768_20141027_201818_inLine +BABEL_OP3_404_64768_20141027_201818_outLine +BABEL_OP3_404_64870_20141108_192546_inLine +BABEL_OP3_404_64870_20141108_192546_outLine +BABEL_OP3_404_66045_20141117_035937_inLine +BABEL_OP3_404_66045_20141117_035937_outLine +BABEL_OP3_404_66177_20150503_202932_inLine +BABEL_OP3_404_66177_20150503_202932_outLine +BABEL_OP3_404_66822_20141117_020953_inLine +BABEL_OP3_404_66822_20141117_020953_outLine +BABEL_OP3_404_66916_20141022_000731_inLine +BABEL_OP3_404_66916_20141022_000731_outLine +BABEL_OP3_404_67401_20141109_211809_inLine +BABEL_OP3_404_67401_20141109_211809_outLine +BABEL_OP3_404_67842_20141104_051753_inLine +BABEL_OP3_404_67842_20141104_051753_outLine +BABEL_OP3_404_68059_20141109_052011_inLine +BABEL_OP3_404_68059_20141109_052011_outLine +BABEL_OP3_404_68068_20141201_054518_inLine +BABEL_OP3_404_68068_20141201_054518_outLine +BABEL_OP3_404_68244_20141119_065540_inLine +BABEL_OP3_404_68244_20141119_065540_outLine +BABEL_OP3_404_68384_20141130_035214_inLine +BABEL_OP3_404_68384_20141130_035214_outLine +BABEL_OP3_404_68385_20141017_031005_inLine +BABEL_OP3_404_68385_20141017_031005_outLine +BABEL_OP3_404_68627_20141105_190511_inLine +BABEL_OP3_404_68627_20141105_190511_outLine +BABEL_OP3_404_68823_20150212_041147_inLine +BABEL_OP3_404_68823_20150212_041147_outLine +BABEL_OP3_404_69107_20141120_010459_inLine +BABEL_OP3_404_69107_20141120_010459_outLine +BABEL_OP3_404_69574_20141006_023156_inLine +BABEL_OP3_404_69574_20141006_023156_outLine +BABEL_OP3_404_69578_20141117_003921_inLine +BABEL_OP3_404_69578_20141117_003921_outLine +BABEL_OP3_404_70121_20141104_202610_inLine +BABEL_OP3_404_70121_20141104_202610_outLine +BABEL_OP3_404_70282_20141111_000251_inLine +BABEL_OP3_404_70282_20141111_000251_outLine +BABEL_OP3_404_70794_20141021_185105_inLine +BABEL_OP3_404_70794_20141021_185105_outLine +BABEL_OP3_404_71263_20141119_234747_inLine +BABEL_OP3_404_71263_20141119_234747_outLine +BABEL_OP3_404_71401_20150206_070446_inLine +BABEL_OP3_404_71401_20150206_070446_outLine +BABEL_OP3_404_71404_20141023_215509_inLine +BABEL_OP3_404_71404_20141023_215509_outLine +BABEL_OP3_404_71566_20141130_035713_inLine +BABEL_OP3_404_71566_20141130_035713_outLine +BABEL_OP3_404_71566_20141130_040359_inLine +BABEL_OP3_404_71566_20141130_040359_outLine +BABEL_OP3_404_72844_20141007_033837_inLine +BABEL_OP3_404_72844_20141007_033837_outLine +BABEL_OP3_404_73119_20141026_232203_inLine +BABEL_OP3_404_73119_20141026_232203_outLine +BABEL_OP3_404_73485_20150512_234636_inLine +BABEL_OP3_404_73485_20150512_234636_outLine +BABEL_OP3_404_73837_20141026_191037_inLine +BABEL_OP3_404_73837_20141026_191037_outLine +BABEL_OP3_404_74641_20141108_223951_inLine +BABEL_OP3_404_74641_20141108_223951_outLine +BABEL_OP3_404_74799_20141109_222638_inLine +BABEL_OP3_404_74799_20141109_222638_outLine +BABEL_OP3_404_75869_20150527_230650_inLine +BABEL_OP3_404_75869_20150527_230650_outLine +BABEL_OP3_404_76437_20141019_202715_inLine +BABEL_OP3_404_76437_20141019_202715_outLine +BABEL_OP3_404_77126_20141022_202348_inLine +BABEL_OP3_404_77126_20141022_202348_outLine +BABEL_OP3_404_77391_20141026_222314_inLine +BABEL_OP3_404_77391_20141026_222314_outLine +BABEL_OP3_404_77427_20141030_192713_inLine +BABEL_OP3_404_77427_20141030_192713_outLine +BABEL_OP3_404_77730_20141014_201059_inLine +BABEL_OP3_404_77730_20141014_201059_outLine +BABEL_OP3_404_77990_20141024_215822_inLine +BABEL_OP3_404_77990_20141024_215822_outLine +BABEL_OP3_404_78016_20141029_233059_inLine +BABEL_OP3_404_78016_20141029_233059_outLine +BABEL_OP3_404_78254_20141025_202742_inLine +BABEL_OP3_404_78254_20141025_202742_outLine +BABEL_OP3_404_78254_20141025_204922_inLine +BABEL_OP3_404_78254_20141025_204922_outLine +BABEL_OP3_404_78511_20141201_003606_inLine +BABEL_OP3_404_78511_20141201_003606_outLine +BABEL_OP3_404_78976_20141025_183704_inLine +BABEL_OP3_404_78976_20141025_183704_outLine +BABEL_OP3_404_79139_20141117_054733_inLine +BABEL_OP3_404_79139_20141117_054733_outLine +BABEL_OP3_404_79751_20141101_232250_inLine +BABEL_OP3_404_79751_20141101_232250_outLine +BABEL_OP3_404_80439_20141026_005410_inLine +BABEL_OP3_404_80439_20141026_005410_outLine +BABEL_OP3_404_81213_20141102_205052_inLine +BABEL_OP3_404_81213_20141102_205052_outLine +BABEL_OP3_404_81229_20141117_041745_inLine +BABEL_OP3_404_81229_20141117_041745_outLine +BABEL_OP3_404_81971_20141022_025641_inLine +BABEL_OP3_404_81971_20141022_025641_outLine +BABEL_OP3_404_82089_20141117_045302_inLine +BABEL_OP3_404_82089_20141117_045302_outLine +BABEL_OP3_404_82303_20150614_024236_inLine +BABEL_OP3_404_82303_20150614_024236_outLine +BABEL_OP3_404_82473_20141026_060037_inLine +BABEL_OP3_404_82473_20141026_060037_outLine +BABEL_OP3_404_82637_20141021_010105_inLine +BABEL_OP3_404_82637_20141021_010105_outLine +BABEL_OP3_404_82742_20141201_234306_inLine +BABEL_OP3_404_82742_20141201_234306_outLine +BABEL_OP3_404_83062_20150523_220236_inLine +BABEL_OP3_404_83062_20150523_220236_outLine +BABEL_OP3_404_83238_20141119_180953_inLine +BABEL_OP3_404_83238_20141119_180953_outLine +BABEL_OP3_404_83366_20141120_192208_inLine +BABEL_OP3_404_83366_20141120_192208_outLine +BABEL_OP3_404_83775_20141030_230742_inLine +BABEL_OP3_404_83775_20141030_230742_outLine +BABEL_OP3_404_83851_20141028_203735_inLine +BABEL_OP3_404_83851_20141028_203735_outLine +BABEL_OP3_404_83929_20141018_184023_inLine +BABEL_OP3_404_83929_20141018_184023_outLine +BABEL_OP3_404_84055_20150504_002015_inLine +BABEL_OP3_404_84055_20150504_002015_outLine +BABEL_OP3_404_84061_20141030_205021_inLine +BABEL_OP3_404_84061_20141030_205021_outLine +BABEL_OP3_404_84339_20150502_014143_inLine +BABEL_OP3_404_84339_20150502_014143_outLine +BABEL_OP3_404_85048_20141127_023704_inLine +BABEL_OP3_404_85048_20141127_023704_outLine +BABEL_OP3_404_85254_20150620_035606_inLine +BABEL_OP3_404_85254_20150620_035606_outLine +BABEL_OP3_404_85322_20141008_235518_inLine +BABEL_OP3_404_85322_20141008_235518_outLine +BABEL_OP3_404_85651_20141211_032650_inLine +BABEL_OP3_404_85651_20141211_032650_outLine +BABEL_OP3_404_86191_20141027_013544_inLine +BABEL_OP3_404_86191_20141027_013544_outLine +BABEL_OP3_404_86472_20141201_011325_inLine +BABEL_OP3_404_86472_20141201_011325_outLine +BABEL_OP3_404_86635_20141127_204158_inLine +BABEL_OP3_404_86635_20141127_204158_outLine +BABEL_OP3_404_86722_20141029_192140_inLine +BABEL_OP3_404_86722_20141029_192140_outLine +BABEL_OP3_404_86888_20141119_022459_inLine +BABEL_OP3_404_86888_20141119_022459_outLine +BABEL_OP3_404_87470_20141114_214639_inLine +BABEL_OP3_404_87470_20141114_214639_outLine +BABEL_OP3_404_87629_20141127_020403_inLine +BABEL_OP3_404_87629_20141127_020403_outLine +BABEL_OP3_404_88260_20141103_234824_inLine +BABEL_OP3_404_88260_20141103_234824_outLine +BABEL_OP3_404_88445_20141119_043713_inLine +BABEL_OP3_404_88445_20141119_043713_outLine +BABEL_OP3_404_88661_20141127_025208_inLine +BABEL_OP3_404_88661_20141127_025208_outLine +BABEL_OP3_404_88669_20141119_000147_inLine +BABEL_OP3_404_88669_20141119_000147_outLine +BABEL_OP3_404_88783_20141201_045305_inLine +BABEL_OP3_404_88783_20141201_045305_outLine +BABEL_OP3_404_89045_20141022_193202_inLine +BABEL_OP3_404_89045_20141022_193202_outLine +BABEL_OP3_404_89372_20141010_000950_inLine +BABEL_OP3_404_89372_20141010_000950_outLine +BABEL_OP3_404_89650_20150220_222402_inLine +BABEL_OP3_404_89650_20150220_222402_outLine +BABEL_OP3_404_89650_20150220_224606_inLine +BABEL_OP3_404_89650_20150220_224606_outLine +BABEL_OP3_404_89665_20141103_202723_inLine +BABEL_OP3_404_89665_20141103_202723_outLine +BABEL_OP3_404_90930_20150119_021352_inLine +BABEL_OP3_404_90930_20150119_021352_outLine +BABEL_OP3_404_91463_20141116_023036_inLine +BABEL_OP3_404_91463_20141116_023036_outLine +BABEL_OP3_404_91825_20141009_181224_inLine +BABEL_OP3_404_91825_20141009_181224_outLine +BABEL_OP3_404_91825_20141009_183843_inLine +BABEL_OP3_404_91825_20141009_183843_outLine +BABEL_OP3_404_91971_20150217_041455_inLine +BABEL_OP3_404_91971_20150217_041455_outLine +BABEL_OP3_404_92698_20141117_072302_inLine +BABEL_OP3_404_92698_20141117_072302_outLine +BABEL_OP3_404_92736_20141201_011442_inLine +BABEL_OP3_404_92736_20141201_011442_outLine +BABEL_OP3_404_94025_20141129_180207_inLine +BABEL_OP3_404_94025_20141129_180207_outLine +BABEL_OP3_404_94869_20141007_194254_inLine +BABEL_OP3_404_94869_20141007_194254_outLine +BABEL_OP3_404_95966_20141129_060246_inLine +BABEL_OP3_404_95966_20141129_060246_outLine +BABEL_OP3_404_96376_20150503_033706_inLine +BABEL_OP3_404_96376_20150503_033706_outLine +BABEL_OP3_404_96504_20141103_031329_inLine +BABEL_OP3_404_96504_20141103_031329_outLine +BABEL_OP3_404_97461_20141118_230730_inLine +BABEL_OP3_404_97461_20141118_230730_outLine +BABEL_OP3_404_97557_20141119_230718_inLine +BABEL_OP3_404_97557_20141119_230718_outLine +BABEL_OP3_404_97588_20141018_234016_inLine +BABEL_OP3_404_97588_20141018_234016_outLine +BABEL_OP3_404_97588_20141018_235425_inLine +BABEL_OP3_404_97588_20141018_235425_outLine +BABEL_OP3_404_97896_20141116_221329_inLine +BABEL_OP3_404_97896_20141116_221329_outLine +BABEL_OP3_404_97988_20141201_030306_inLine +BABEL_OP3_404_97988_20141201_030306_outLine +BABEL_OP3_404_98888_20141113_212715_inLine +BABEL_OP3_404_98888_20141113_212715_outLine +BABEL_OP3_404_99202_20141108_210814_inLine +BABEL_OP3_404_99202_20141108_210814_outLine +BABEL_OP3_404_99487_20141021_053024_inLine +BABEL_OP3_404_99487_20141021_053024_outLine +BABEL_OP3_404_99594_20141105_194545_inLine +BABEL_OP3_404_99594_20141105_194545_outLine +BABEL_OP3_404_99813_20141120_025129_inLine +BABEL_OP3_404_99813_20141120_025129_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/untranscribed-training.list b/egs/babel/s5d/conf/lists/404-georgian/untranscribed-training.list new file mode 100644 index 00000000000..8d6682cc789 --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/untranscribed-training.list @@ -0,0 +1,535 @@ +BABEL_OP3_404_10058_20150526_034808_inLine +BABEL_OP3_404_10411_20150611_172027_inLine +BABEL_OP3_404_10411_20150611_172027_outLine +BABEL_OP3_404_10938_20141030_023413_inLine +BABEL_OP3_404_10938_20141030_023413_outLine +BABEL_OP3_404_11352_20150513_002642_inLine +BABEL_OP3_404_11352_20150513_002642_outLine +BABEL_OP3_404_11859_20150611_041737_inLine +BABEL_OP3_404_11859_20150611_041737_outLine +BABEL_OP3_404_12220_20141116_205911_inLine +BABEL_OP3_404_12220_20141116_205911_outLine +BABEL_OP3_404_12609_20150524_172934_inLine +BABEL_OP3_404_12609_20150524_172934_outLine +BABEL_OP3_404_13126_20150524_221540_inLine +BABEL_OP3_404_13126_20150524_221540_outLine +BABEL_OP3_404_14158_20141130_030130_inLine +BABEL_OP3_404_14158_20141130_030130_outLine +BABEL_OP3_404_15024_20141118_234824_inLine +BABEL_OP3_404_15024_20141118_234824_outLine +BABEL_OP3_404_15042_20150506_232829_inLine +BABEL_OP3_404_15042_20150506_232829_outLine +BABEL_OP3_404_15535_20141129_021659_inLine +BABEL_OP3_404_15535_20141129_021659_outLine +BABEL_OP3_404_15638_20141127_220502_outLine +BABEL_OP3_404_15902_20141020_173105_outLine +BABEL_OP3_404_16475_20141116_052010_outLine +BABEL_OP3_404_16601_20141201_041704_inLine +BABEL_OP3_404_16601_20141201_041704_outLine +BABEL_OP3_404_17320_20150524_213213_inLine +BABEL_OP3_404_17320_20150524_213213_outLine +BABEL_OP3_404_17420_20150503_201902_inLine +BABEL_OP3_404_17420_20150503_201902_outLine +BABEL_OP3_404_17420_20150527_025815_inLine +BABEL_OP3_404_17420_20150527_025815_outLine +BABEL_OP3_404_17420_20150527_034621_inLine +BABEL_OP3_404_17420_20150527_034621_outLine +BABEL_OP3_404_17520_20141113_032534_inLine +BABEL_OP3_404_17567_20141117_182919_inLine +BABEL_OP3_404_17567_20141117_182919_outLine +BABEL_OP3_404_17573_20141129_035040_inLine +BABEL_OP3_404_17573_20141129_035040_outLine +BABEL_OP3_404_17890_20141128_040046_inLine +BABEL_OP3_404_17890_20141128_040046_outLine +BABEL_OP3_404_17923_20141022_231429_outLine +BABEL_OP3_404_18118_20150503_165936_inLine +BABEL_OP3_404_18118_20150503_165936_outLine +BABEL_OP3_404_18291_20150611_062705_outLine +BABEL_OP3_404_18291_20150611_063700_outLine +BABEL_OP3_404_18766_20150610_064349_inLine +BABEL_OP3_404_19120_20150525_014657_inLine +BABEL_OP3_404_19120_20150525_014657_outLine +BABEL_OP3_404_19120_20150525_015635_inLine +BABEL_OP3_404_19120_20150525_015635_outLine +BABEL_OP3_404_19877_20150506_202237_outLine +BABEL_OP3_404_20454_20150218_171143_inLine +BABEL_OP3_404_20454_20150218_171143_outLine +BABEL_OP3_404_21159_20150615_021612_inLine +BABEL_OP3_404_21435_20150523_030702_inLine +BABEL_OP3_404_21435_20150523_030702_outLine +BABEL_OP3_404_21581_20141101_011021_inLine +BABEL_OP3_404_21581_20141101_011021_outLine +BABEL_OP3_404_21807_20141112_225225_outLine +BABEL_OP3_404_22591_20150217_220714_inLine +BABEL_OP3_404_24209_20150212_224614_inLine +BABEL_OP3_404_24239_20150517_203015_inLine +BABEL_OP3_404_24323_20141117_020615_outLine +BABEL_OP3_404_24501_20150522_030231_inLine +BABEL_OP3_404_24586_20150524_190657_inLine +BABEL_OP3_404_24586_20150524_190657_outLine +BABEL_OP3_404_24590_20141116_230233_inLine +BABEL_OP3_404_24590_20141116_230233_outLine +BABEL_OP3_404_25068_20150206_022730_outLine +BABEL_OP3_404_25085_20150611_040906_inLine +BABEL_OP3_404_25085_20150611_040906_outLine +BABEL_OP3_404_25412_20141120_031532_inLine +BABEL_OP3_404_25412_20141120_031532_outLine +BABEL_OP3_404_25496_20150613_034126_inLine +BABEL_OP3_404_25496_20150613_034126_outLine +BABEL_OP3_404_26398_20150527_032152_inLine +BABEL_OP3_404_26398_20150527_032152_outLine +BABEL_OP3_404_26478_20150617_004029_inLine +BABEL_OP3_404_26478_20150617_004029_outLine +BABEL_OP3_404_26836_20141102_024528_inLine +BABEL_OP3_404_26836_20141102_024528_outLine +BABEL_OP3_404_27203_20141119_185720_inLine +BABEL_OP3_404_27203_20141119_185720_outLine +BABEL_OP3_404_27203_20141119_191138_inLine +BABEL_OP3_404_27203_20141119_191138_outLine +BABEL_OP3_404_27590_20141128_051454_inLine +BABEL_OP3_404_28280_20150619_024509_inLine +BABEL_OP3_404_28280_20150619_024509_outLine +BABEL_OP3_404_28280_20150619_025848_inLine +BABEL_OP3_404_28280_20150619_025848_outLine +BABEL_OP3_404_28600_20141201_223206_inLine +BABEL_OP3_404_28600_20141201_223206_outLine +BABEL_OP3_404_28945_20141104_060349_outLine +BABEL_OP3_404_29076_20141109_215142_inLine +BABEL_OP3_404_29076_20141109_215142_outLine +BABEL_OP3_404_29230_20150611_051340_inLine +BABEL_OP3_404_29230_20150611_051340_outLine +BABEL_OP3_404_29439_20150524_201524_inLine +BABEL_OP3_404_29439_20150524_201524_outLine +BABEL_OP3_404_30497_20150525_194737_inLine +BABEL_OP3_404_30497_20150525_194737_outLine +BABEL_OP3_404_30645_20141019_220859_inLine +BABEL_OP3_404_30653_20150514_014515_inLine +BABEL_OP3_404_31267_20150615_011004_outLine +BABEL_OP3_404_31484_20141122_232804_inLine +BABEL_OP3_404_31484_20141122_232804_outLine +BABEL_OP3_404_31919_20150526_220911_inLine +BABEL_OP3_404_31919_20150526_220911_outLine +BABEL_OP3_404_32630_20150609_012137_inLine +BABEL_OP3_404_32630_20150609_012137_outLine +BABEL_OP3_404_32959_20141201_005331_inLine +BABEL_OP3_404_32959_20141201_005331_outLine +BABEL_OP3_404_32998_20141112_054111_inLine +BABEL_OP3_404_34328_20141119_054513_outLine +BABEL_OP3_404_34328_20141119_055432_outLine +BABEL_OP3_404_34811_20141109_001009_inLine +BABEL_OP3_404_34811_20141109_001009_outLine +BABEL_OP3_404_34899_20150611_060602_outLine +BABEL_OP3_404_35008_20141201_023042_inLine +BABEL_OP3_404_35008_20141201_023042_outLine +BABEL_OP3_404_35181_20150526_211416_inLine +BABEL_OP3_404_35181_20150526_211416_outLine +BABEL_OP3_404_35706_20150523_015900_inLine +BABEL_OP3_404_35706_20150523_015900_outLine +BABEL_OP3_404_35786_20150604_015518_inLine +BABEL_OP3_404_35786_20150604_015518_outLine +BABEL_OP3_404_36017_20150528_192934_inLine +BABEL_OP3_404_36017_20150528_192934_outLine +BABEL_OP3_404_36039_20150526_230125_inLine +BABEL_OP3_404_36039_20150526_230125_outLine +BABEL_OP3_404_36059_20150601_023254_inLine +BABEL_OP3_404_36059_20150601_023254_outLine +BABEL_OP3_404_36059_20150601_033346_inLine +BABEL_OP3_404_36059_20150601_033346_outLine +BABEL_OP3_404_36147_20150211_013803_outLine +BABEL_OP3_404_36219_20141104_012216_inLine +BABEL_OP3_404_36219_20141104_012216_outLine +BABEL_OP3_404_36642_20150610_161207_inLine +BABEL_OP3_404_36642_20150610_161207_outLine +BABEL_OP3_404_37290_20141115_050457_inLine +BABEL_OP3_404_37290_20141115_050457_outLine +BABEL_OP3_404_38125_20150526_233108_inLine +BABEL_OP3_404_38125_20150526_233108_outLine +BABEL_OP3_404_38323_20150615_021843_inLine +BABEL_OP3_404_38340_20141103_231545_inLine +BABEL_OP3_404_38340_20141103_231545_outLine +BABEL_OP3_404_38554_20141010_224451_inLine +BABEL_OP3_404_38554_20141010_224451_outLine +BABEL_OP3_404_38664_20141030_175135_inLine +BABEL_OP3_404_38664_20141030_175135_outLine +BABEL_OP3_404_38979_20150503_202406_outLine +BABEL_OP3_404_39099_20150511_053646_outLine +BABEL_OP3_404_39307_20141022_200554_inLine +BABEL_OP3_404_39307_20141022_201758_inLine +BABEL_OP3_404_39426_20150527_181901_outLine +BABEL_OP3_404_39744_20141023_002710_inLine +BABEL_OP3_404_39893_20150611_034149_inLine +BABEL_OP3_404_39920_20150503_205354_outLine +BABEL_OP3_404_41097_20141129_055801_inLine +BABEL_OP3_404_41097_20141129_055801_outLine +BABEL_OP3_404_41272_20150503_232941_inLine +BABEL_OP3_404_41334_20150617_041322_inLine +BABEL_OP3_404_41400_20150515_021408_inLine +BABEL_OP3_404_41692_20150604_005657_inLine +BABEL_OP3_404_41692_20150604_005657_outLine +BABEL_OP3_404_41745_20141114_235452_inLine +BABEL_OP3_404_41745_20141114_235452_outLine +BABEL_OP3_404_42155_20141127_055149_inLine +BABEL_OP3_404_42619_20141130_012456_outLine +BABEL_OP3_404_42834_20141125_004837_inLine +BABEL_OP3_404_42834_20141125_004837_outLine +BABEL_OP3_404_42883_20150604_035732_inLine +BABEL_OP3_404_42883_20150604_035732_outLine +BABEL_OP3_404_43388_20141114_212210_inLine +BABEL_OP3_404_43388_20141114_214120_inLine +BABEL_OP3_404_43588_20150517_233637_inLine +BABEL_OP3_404_43789_20141120_011327_outLine +BABEL_OP3_404_44309_20150525_022635_inLine +BABEL_OP3_404_44309_20150525_022635_outLine +BABEL_OP3_404_44478_20150512_225118_inLine +BABEL_OP3_404_45106_20141119_050859_inLine +BABEL_OP3_404_45106_20141119_050859_outLine +BABEL_OP3_404_45374_20150122_014830_outLine +BABEL_OP3_404_45374_20150122_015920_outLine +BABEL_OP3_404_45459_20150525_020410_inLine +BABEL_OP3_404_45459_20150525_020410_outLine +BABEL_OP3_404_45699_20150205_021829_inLine +BABEL_OP3_404_45851_20150514_155157_inLine +BABEL_OP3_404_45851_20150514_155157_outLine +BABEL_OP3_404_45908_20150515_004218_outLine +BABEL_OP3_404_46310_20141015_051100_inLine +BABEL_OP3_404_46310_20141015_051100_outLine +BABEL_OP3_404_46315_20141129_012912_inLine +BABEL_OP3_404_46315_20141129_012912_outLine +BABEL_OP3_404_46688_20141015_211329_inLine +BABEL_OP3_404_46688_20141015_211329_outLine +BABEL_OP3_404_46712_20141027_224004_inLine +BABEL_OP3_404_46712_20141027_224004_outLine +BABEL_OP3_404_46974_20141128_055136_inLine +BABEL_OP3_404_46974_20141128_055136_outLine +BABEL_OP3_404_47156_20150625_025324_inLine +BABEL_OP3_404_47156_20150625_025324_outLine +BABEL_OP3_404_47823_20141201_044425_inLine +BABEL_OP3_404_47823_20141201_044425_outLine +BABEL_OP3_404_48016_20150615_000741_inLine +BABEL_OP3_404_48016_20150615_000741_outLine +BABEL_OP3_404_48610_20141013_011505_inLine +BABEL_OP3_404_48610_20141013_012904_inLine +BABEL_OP3_404_48663_20150512_202837_inLine +BABEL_OP3_404_48663_20150512_202837_outLine +BABEL_OP3_404_49306_20150524_003356_inLine +BABEL_OP3_404_49306_20150524_003356_outLine +BABEL_OP3_404_49630_20141128_020114_inLine +BABEL_OP3_404_49630_20141128_020114_outLine +BABEL_OP3_404_49767_20150613_050113_inLine +BABEL_OP3_404_49767_20150613_050113_outLine +BABEL_OP3_404_49775_20141011_005306_inLine +BABEL_OP3_404_49775_20141011_005306_outLine +BABEL_OP3_404_49945_20150610_154709_inLine +BABEL_OP3_404_50601_20141127_032527_inLine +BABEL_OP3_404_50601_20141127_032527_outLine +BABEL_OP3_404_50779_20141115_012852_inLine +BABEL_OP3_404_50779_20141115_012852_outLine +BABEL_OP3_404_50810_20141007_234432_inLine +BABEL_OP3_404_50810_20141007_234432_outLine +BABEL_OP3_404_51414_20150604_001601_inLine +BABEL_OP3_404_51414_20150604_001601_outLine +BABEL_OP3_404_51484_20141202_000325_inLine +BABEL_OP3_404_51484_20141202_000325_outLine +BABEL_OP3_404_51701_20150620_010924_outLine +BABEL_OP3_404_52070_20150620_014422_outLine +BABEL_OP3_404_52070_20150620_020559_outLine +BABEL_OP3_404_52404_20141125_004855_inLine +BABEL_OP3_404_52404_20141125_004855_outLine +BABEL_OP3_404_53063_20141201_005237_inLine +BABEL_OP3_404_53063_20141201_005237_outLine +BABEL_OP3_404_53072_20150518_015132_inLine +BABEL_OP3_404_53415_20150503_225920_inLine +BABEL_OP3_404_53415_20150503_225920_outLine +BABEL_OP3_404_53492_20150525_055025_inLine +BABEL_OP3_404_53492_20150525_055025_outLine +BABEL_OP3_404_53665_20150526_004549_inLine +BABEL_OP3_404_53917_20150503_205456_outLine +BABEL_OP3_404_53957_20141201_051933_inLine +BABEL_OP3_404_54477_20141211_033627_inLine +BABEL_OP3_404_54477_20141211_033627_outLine +BABEL_OP3_404_55013_20150525_222257_inLine +BABEL_OP3_404_55013_20150525_222257_outLine +BABEL_OP3_404_55267_20141130_212756_inLine +BABEL_OP3_404_55349_20150523_031602_inLine +BABEL_OP3_404_55349_20150523_031602_outLine +BABEL_OP3_404_56019_20150502_020750_inLine +BABEL_OP3_404_56019_20150502_020750_outLine +BABEL_OP3_404_56076_20150516_164959_inLine +BABEL_OP3_404_56076_20150516_164959_outLine +BABEL_OP3_404_56331_20150526_020747_inLine +BABEL_OP3_404_56331_20150526_020747_outLine +BABEL_OP3_404_56743_20141114_223719_inLine +BABEL_OP3_404_56743_20141114_223719_outLine +BABEL_OP3_404_57065_20141201_002920_inLine +BABEL_OP3_404_57219_20150618_045613_inLine +BABEL_OP3_404_57219_20150618_045613_outLine +BABEL_OP3_404_57464_20150523_224617_inLine +BABEL_OP3_404_57542_20150526_233832_inLine +BABEL_OP3_404_57542_20150526_233832_outLine +BABEL_OP3_404_57542_20150526_235003_inLine +BABEL_OP3_404_57542_20150526_235003_outLine +BABEL_OP3_404_58006_20150526_024205_inLine +BABEL_OP3_404_58006_20150526_024205_outLine +BABEL_OP3_404_58026_20150615_004130_inLine +BABEL_OP3_404_58026_20150615_004130_outLine +BABEL_OP3_404_58915_20150611_034220_outLine +BABEL_OP3_404_59307_20150504_003405_inLine +BABEL_OP3_404_59307_20150504_003405_outLine +BABEL_OP3_404_59864_20150602_014458_inLine +BABEL_OP3_404_60299_20150611_040929_inLine +BABEL_OP3_404_60310_20141130_231532_inLine +BABEL_OP3_404_60310_20141130_231532_outLine +BABEL_OP3_404_60352_20141201_060712_inLine +BABEL_OP3_404_60352_20141201_060712_outLine +BABEL_OP3_404_60352_20141201_061821_inLine +BABEL_OP3_404_60352_20141201_061821_outLine +BABEL_OP3_404_60458_20150609_021527_inLine +BABEL_OP3_404_60458_20150609_021527_outLine +BABEL_OP3_404_60477_20150613_223056_inLine +BABEL_OP3_404_60477_20150613_224002_inLine +BABEL_OP3_404_60498_20150606_022221_inLine +BABEL_OP3_404_60498_20150606_022221_outLine +BABEL_OP3_404_60706_20141020_215729_inLine +BABEL_OP3_404_60706_20141020_215729_outLine +BABEL_OP3_404_61888_20150504_171019_inLine +BABEL_OP3_404_61971_20150525_020101_outLine +BABEL_OP3_404_62360_20150517_033230_inLine +BABEL_OP3_404_62360_20150517_033230_outLine +BABEL_OP3_404_62724_20141130_200827_inLine +BABEL_OP3_404_62724_20141130_200827_outLine +BABEL_OP3_404_62852_20141013_054854_outLine +BABEL_OP3_404_63425_20141126_054504_inLine +BABEL_OP3_404_63481_20141020_221014_outLine +BABEL_OP3_404_63481_20141020_224225_outLine +BABEL_OP3_404_63670_20141130_050318_inLine +BABEL_OP3_404_63670_20141130_050318_outLine +BABEL_OP3_404_63906_20150525_050310_inLine +BABEL_OP3_404_63906_20150525_050310_outLine +BABEL_OP3_404_63999_20150610_041309_inLine +BABEL_OP3_404_64014_20150503_032745_inLine +BABEL_OP3_404_64014_20150503_032745_outLine +BABEL_OP3_404_64722_20150514_034208_outLine +BABEL_OP3_404_64759_20141014_044027_inLine +BABEL_OP3_404_64759_20141014_045519_inLine +BABEL_OP3_404_64796_20141022_055826_inLine +BABEL_OP3_404_65561_20141124_060558_inLine +BABEL_OP3_404_65561_20141124_060558_outLine +BABEL_OP3_404_65640_20150528_211835_inLine +BABEL_OP3_404_65640_20150528_211835_outLine +BABEL_OP3_404_66967_20141008_202611_inLine +BABEL_OP3_404_66967_20141008_202611_outLine +BABEL_OP3_404_67152_20150503_201836_inLine +BABEL_OP3_404_67152_20150503_201836_outLine +BABEL_OP3_404_67304_20150211_054416_inLine +BABEL_OP3_404_67304_20150211_054416_outLine +BABEL_OP3_404_67552_20141126_011955_inLine +BABEL_OP3_404_67552_20141126_011955_outLine +BABEL_OP3_404_68306_20141126_180315_inLine +BABEL_OP3_404_68306_20141126_180315_outLine +BABEL_OP3_404_69096_20150512_165126_inLine +BABEL_OP3_404_69096_20150512_165126_outLine +BABEL_OP3_404_69153_20141130_221412_inLine +BABEL_OP3_404_69153_20141130_221412_outLine +BABEL_OP3_404_69153_20141130_222842_inLine +BABEL_OP3_404_69153_20141130_222842_outLine +BABEL_OP3_404_69474_20141128_051323_outLine +BABEL_OP3_404_69633_20141129_051648_inLine +BABEL_OP3_404_69633_20141129_051648_outLine +BABEL_OP3_404_69636_20141126_061322_inLine +BABEL_OP3_404_69636_20141126_061322_outLine +BABEL_OP3_404_69885_20150503_011226_inLine +BABEL_OP3_404_69885_20150503_011226_outLine +BABEL_OP3_404_69937_20150620_015912_inLine +BABEL_OP3_404_69964_20150524_015556_inLine +BABEL_OP3_404_69964_20150524_015556_outLine +BABEL_OP3_404_69982_20150625_035440_outLine +BABEL_OP3_404_70221_20141124_052004_inLine +BABEL_OP3_404_70221_20141124_052004_outLine +BABEL_OP3_404_70460_20150527_015340_inLine +BABEL_OP3_404_70460_20150527_015340_outLine +BABEL_OP3_404_70526_20150501_015444_inLine +BABEL_OP3_404_70526_20150501_015444_outLine +BABEL_OP3_404_70713_20150527_013058_inLine +BABEL_OP3_404_70713_20150527_013058_outLine +BABEL_OP3_404_71189_20150523_005918_inLine +BABEL_OP3_404_71189_20150523_005918_outLine +BABEL_OP3_404_71278_20150211_052730_inLine +BABEL_OP3_404_71278_20150211_052730_outLine +BABEL_OP3_404_71278_20150211_054040_inLine +BABEL_OP3_404_71278_20150211_054040_outLine +BABEL_OP3_404_71333_20141102_023503_inLine +BABEL_OP3_404_71333_20141102_023503_outLine +BABEL_OP3_404_71460_20150206_015309_outLine +BABEL_OP3_404_71559_20141210_220929_outLine +BABEL_OP3_404_71780_20141105_055543_inLine +BABEL_OP3_404_71780_20141105_055543_outLine +BABEL_OP3_404_72319_20150502_041426_inLine +BABEL_OP3_404_72319_20150502_041426_outLine +BABEL_OP3_404_72733_20150515_044419_inLine +BABEL_OP3_404_72733_20150515_044419_outLine +BABEL_OP3_404_73072_20141012_012029_inLine +BABEL_OP3_404_73072_20141012_012029_outLine +BABEL_OP3_404_73258_20141117_010123_inLine +BABEL_OP3_404_73258_20141117_010123_outLine +BABEL_OP3_404_73964_20150512_205010_inLine +BABEL_OP3_404_73964_20150512_205010_outLine +BABEL_OP3_404_74728_20150503_042547_inLine +BABEL_OP3_404_74728_20150503_042547_outLine +BABEL_OP3_404_75465_20141129_223330_outLine +BABEL_OP3_404_75975_20150127_051140_outLine +BABEL_OP3_404_76126_20141201_202238_inLine +BABEL_OP3_404_76126_20141201_202238_outLine +BABEL_OP3_404_76238_20141129_223455_inLine +BABEL_OP3_404_76238_20141129_223455_outLine +BABEL_OP3_404_76372_20150601_014341_inLine +BABEL_OP3_404_76372_20150601_014341_outLine +BABEL_OP3_404_76444_20141127_032124_inLine +BABEL_OP3_404_76444_20141127_032124_outLine +BABEL_OP3_404_76482_20150618_063131_outLine +BABEL_OP3_404_76683_20141110_191551_inLine +BABEL_OP3_404_76683_20141110_191551_outLine +BABEL_OP3_404_76837_20150124_222250_outLine +BABEL_OP3_404_76970_20150625_191722_inLine +BABEL_OP3_404_77146_20141019_060916_inLine +BABEL_OP3_404_77242_20150612_024655_inLine +BABEL_OP3_404_77567_20141021_021210_inLine +BABEL_OP3_404_77567_20141021_021210_outLine +BABEL_OP3_404_77803_20141020_030844_inLine +BABEL_OP3_404_77803_20141020_030844_outLine +BABEL_OP3_404_78454_20141115_043455_inLine +BABEL_OP3_404_78749_20150620_025728_inLine +BABEL_OP3_404_78749_20150620_025728_outLine +BABEL_OP3_404_79190_20141108_232204_inLine +BABEL_OP3_404_79190_20141108_232204_outLine +BABEL_OP3_404_79590_20141129_025808_outLine +BABEL_OP3_404_79820_20141104_045340_inLine +BABEL_OP3_404_79820_20141104_045340_outLine +BABEL_OP3_404_79858_20141015_200446_inLine +BABEL_OP3_404_79898_20150620_022648_inLine +BABEL_OP3_404_79898_20150620_022648_outLine +BABEL_OP3_404_79898_20150620_024014_inLine +BABEL_OP3_404_79898_20150620_024014_outLine +BABEL_OP3_404_80069_20150614_233606_inLine +BABEL_OP3_404_80069_20150614_233606_outLine +BABEL_OP3_404_80306_20141119_003833_inLine +BABEL_OP3_404_80306_20141119_003833_outLine +BABEL_OP3_404_80306_20141119_005121_inLine +BABEL_OP3_404_80306_20141119_005121_outLine +BABEL_OP3_404_80559_20141022_010255_inLine +BABEL_OP3_404_80655_20150525_221544_inLine +BABEL_OP3_404_80655_20150525_221544_outLine +BABEL_OP3_404_80897_20141119_233718_inLine +BABEL_OP3_404_80897_20141119_233718_outLine +BABEL_OP3_404_81149_20150525_003741_inLine +BABEL_OP3_404_81149_20150525_003741_outLine +BABEL_OP3_404_81427_20141030_015136_inLine +BABEL_OP3_404_81427_20141030_015136_outLine +BABEL_OP3_404_81854_20150610_060437_inLine +BABEL_OP3_404_82626_20150615_014517_inLine +BABEL_OP3_404_82863_20141119_044230_inLine +BABEL_OP3_404_82863_20141119_044230_outLine +BABEL_OP3_404_83651_20141102_170912_inLine +BABEL_OP3_404_83651_20141102_170912_outLine +BABEL_OP3_404_83771_20150604_012300_outLine +BABEL_OP3_404_83974_20150617_022055_inLine +BABEL_OP3_404_84125_20141018_023340_inLine +BABEL_OP3_404_84125_20141018_023340_outLine +BABEL_OP3_404_84458_20141130_053628_outLine +BABEL_OP3_404_84815_20141127_011952_inLine +BABEL_OP3_404_84815_20141127_013345_inLine +BABEL_OP3_404_85047_20141117_014630_inLine +BABEL_OP3_404_85047_20141117_014630_outLine +BABEL_OP3_404_85340_20141103_022707_inLine +BABEL_OP3_404_85340_20141103_022707_outLine +BABEL_OP3_404_86597_20150612_170328_inLine +BABEL_OP3_404_86597_20150612_170328_outLine +BABEL_OP3_404_87074_20141105_190107_outLine +BABEL_OP3_404_87777_20141127_040747_inLine +BABEL_OP3_404_87777_20141127_040747_outLine +BABEL_OP3_404_87871_20141201_023608_inLine +BABEL_OP3_404_87871_20141201_023608_outLine +BABEL_OP3_404_87921_20141201_023029_inLine +BABEL_OP3_404_87921_20141201_023029_outLine +BABEL_OP3_404_88873_20141028_190127_inLine +BABEL_OP3_404_88873_20141028_190127_outLine +BABEL_OP3_404_89330_20150616_002908_inLine +BABEL_OP3_404_89330_20150616_002908_outLine +BABEL_OP3_404_89943_20141105_211847_outLine +BABEL_OP3_404_90347_20141119_012016_inLine +BABEL_OP3_404_90347_20141119_012016_outLine +BABEL_OP3_404_90760_20150611_151739_inLine +BABEL_OP3_404_90760_20150611_151739_outLine +BABEL_OP3_404_90832_20150616_012728_inLine +BABEL_OP3_404_90832_20150616_012728_outLine +BABEL_OP3_404_91383_20150618_035815_inLine +BABEL_OP3_404_91475_20150614_034536_inLine +BABEL_OP3_404_91581_20141129_045608_inLine +BABEL_OP3_404_91581_20141129_045608_outLine +BABEL_OP3_404_91581_20141129_050730_inLine +BABEL_OP3_404_91581_20141129_050730_outLine +BABEL_OP3_404_91593_20150611_021825_inLine +BABEL_OP3_404_91593_20150611_021825_outLine +BABEL_OP3_404_91884_20150503_022858_inLine +BABEL_OP3_404_91884_20150503_022858_outLine +BABEL_OP3_404_91888_20150512_191012_inLine +BABEL_OP3_404_91888_20150512_191012_outLine +BABEL_OP3_404_91891_20141129_005825_inLine +BABEL_OP3_404_91891_20141129_005825_outLine +BABEL_OP3_404_91944_20141022_021002_inLine +BABEL_OP3_404_91977_20141122_230420_outLine +BABEL_OP3_404_92176_20141119_195614_inLine +BABEL_OP3_404_92176_20141119_195614_outLine +BABEL_OP3_404_92281_20150625_185123_inLine +BABEL_OP3_404_92757_20150525_200048_inLine +BABEL_OP3_404_92757_20150525_200048_outLine +BABEL_OP3_404_92792_20150503_182854_outLine +BABEL_OP3_404_92792_20150525_025523_outLine +BABEL_OP3_404_92942_20141120_022830_inLine +BABEL_OP3_404_92942_20141120_022830_outLine +BABEL_OP3_404_93007_20150615_051230_inLine +BABEL_OP3_404_93007_20150615_051230_outLine +BABEL_OP3_404_93858_20150611_043732_inLine +BABEL_OP3_404_94002_20141119_015307_inLine +BABEL_OP3_404_94002_20141119_015307_outLine +BABEL_OP3_404_94333_20141020_024439_outLine +BABEL_OP3_404_94487_20150518_005132_outLine +BABEL_OP3_404_95077_20141201_055702_outLine +BABEL_OP3_404_95269_20141105_221810_inLine +BABEL_OP3_404_95269_20141105_221810_outLine +BABEL_OP3_404_95338_20150610_211203_inLine +BABEL_OP3_404_95338_20150610_211203_outLine +BABEL_OP3_404_95399_20141119_001023_inLine +BABEL_OP3_404_95399_20141119_001023_outLine +BABEL_OP3_404_95583_20141019_010741_inLine +BABEL_OP3_404_95583_20141019_010741_outLine +BABEL_OP3_404_96059_20150524_042224_outLine +BABEL_OP3_404_96205_20141119_033053_inLine +BABEL_OP3_404_96205_20141119_033053_outLine +BABEL_OP3_404_96205_20141119_034909_inLine +BABEL_OP3_404_96205_20141119_034909_outLine +BABEL_OP3_404_96247_20150526_202623_outLine +BABEL_OP3_404_96690_20141117_053054_inLine +BABEL_OP3_404_96690_20141117_053054_outLine +BABEL_OP3_404_96808_20150609_034129_inLine +BABEL_OP3_404_97097_20150601_042649_outLine +BABEL_OP3_404_97136_20150528_011250_inLine +BABEL_OP3_404_97136_20150528_011250_outLine +BABEL_OP3_404_97911_20150613_195820_outLine +BABEL_OP3_404_98165_20141030_214051_inLine +BABEL_OP3_404_98165_20141030_214051_outLine +BABEL_OP3_404_98192_20150617_021906_outLine +BABEL_OP3_404_98489_20141102_002030_inLine +BABEL_OP3_404_98489_20141102_004054_inLine +BABEL_OP3_404_98678_20150528_021605_inLine +BABEL_OP3_404_98678_20150528_023029_inLine +BABEL_OP3_404_99289_20150521_220314_inLine +BABEL_OP3_404_99289_20150521_220314_outLine +BABEL_OP3_404_99289_20150521_222144_inLine +BABEL_OP3_404_99289_20150521_222144_outLine +BABEL_OP3_404_99718_20141019_051850_inLine +BABEL_OP3_404_99718_20141019_051850_outLine +BABEL_OP3_404_99718_20141019_053305_inLine +BABEL_OP3_404_99718_20141019_053305_outLine +BABEL_OP3_404_99732_20141130_232553_inLine +BABEL_OP3_404_99732_20141130_232553_outLine +BABEL_OP3_404_99920_20141022_052026_inLine diff --git a/egs/babel/s5d/conf/mfcc.conf b/egs/babel/s5d/conf/mfcc.conf new file mode 100644 index 00000000000..45280a4e3a0 --- /dev/null +++ b/egs/babel/s5d/conf/mfcc.conf @@ -0,0 +1,2 @@ +--use-energy=false # only non-default option. +--sample-frequency=8000 # Switchboard is sampled at 8kHz diff --git a/egs/babel/s5d/conf/mfcc_hires.conf b/egs/babel/s5d/conf/mfcc_hires.conf new file mode 100644 index 00000000000..d870ab04c38 --- /dev/null +++ b/egs/babel/s5d/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800) diff --git a/egs/babel/s5d/conf/online_cmvn.conf b/egs/babel/s5d/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/babel/s5d/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/babel/s5d/conf/pitch.conf b/egs/babel/s5d/conf/pitch.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/babel/s5d/conf/pitch.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/babel/s5d/conf/plp.conf b/egs/babel/s5d/conf/plp.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/babel/s5d/conf/plp.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/babel/s5d/conf/slurm.bluecrab.conf b/egs/babel/s5d/conf/slurm.bluecrab.conf new file mode 100644 index 00000000000..d0c5fd1f904 --- /dev/null +++ b/egs/babel/s5d/conf/slurm.bluecrab.conf @@ -0,0 +1,11 @@ +command sbatch --export=PATH --ntasks-per-node=1 --exclude=compute[0001-0014,0017,0021,0022,0038] +option time=* --time=$0 +option mem=* --mem-per-cpu=$0 +option mem=0 # Do not add anything to qsub_opts +option num_threads=* --cpus-per-task=$0 --ntasks-per-node=1 +option num_threads=1 --cpus-per-task=1 --ntasks-per-node=1 # Do not add anything to qsub_opts +option max_jobs_run=* # Do nothing +default gpu=0 +# option gpu=0 -p scavenger --qos=scavenger +option gpu=0 -p shared +option gpu=* -p gpu --gres=gpu:$0 --cpus-per-task=6 --exclude=gpu[019,026] --time=4:0:0 # in reality, we probably should have --cpus-per-task=$((6*$0)) diff --git a/egs/babel/s5d/local/ali_to_rttm.sh b/egs/babel/s5d/local/ali_to_rttm.sh new file mode 100755 index 00000000000..60d0598f007 --- /dev/null +++ b/egs/babel/s5d/local/ali_to_rttm.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# Copyright 2012-2013 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#This script will take the ali directory andcreate the corresponding rttm file +#Example +#steps/align_sgmm2.sh --nj 20 --cmd "$decode_cmd" \ +# --transform-dir exp/tri5/decode_dev2h.uem \ +# data/dev2h.uem data/lang exp/sgmm5 exp/sgmm5/align_dev2h.uem +#local/ali_to_rttm.sh data/dev2h data/lang exp/sgmm5/align_dev2h/ + +cmd=run.pl +scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +beam=10 +retry_beam=40 +boost_silence=1.0 + +if [ -f path.sh ]; then . path.sh; fi +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "This script takes an ali directory and creates the corresponding RTTM file" + echo "" + echo "Usage: align_text.sh " + echo " e.g.: align_text.sh data/heldout data/lang exp/heldout_ali" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) " + + exit 1; +fi + +set -e +set -o pipefail +set -u + +data=$1 +lang=$2 +dir=$3 + +oov=`cat $lang/oov.txt` +mkdir -p $dir/log + +echo "$0: writing alignments." +wbegin=`grep "#1" $lang/phones.txt | head -1 | awk '{print $2}'` +wend=`grep "#2" $lang/phones.txt | head -1 | awk '{print $2}'` + +if [ ! -f $lang/L_align.fst ]; then + echo "$0: generating $lang/L_align.fst" + local/make_L_align.sh data/local/tmp.lang/ $lang $lang 2>&1 | tee $dir/log/L_align.log +fi + +$cmd $dir/log/align_to_words.log \ + ali-to-phones $dir/final.mdl "ark:gunzip -c $dir/ali.*.gz|" ark,t:- \| \ + phones-to-prons $lang/L_align.fst $wbegin $wend ark:- "ark,s:utils/sym2int.pl -f 2- --map-oov '$oov' $lang/words.txt <$data/text|" ark,t:- \| \ + prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:$dir/align.txt + +echo "$0: done writing alignments." + +echo "$0: writing rttm." +[ ! -x local/txt_to_rttm.pl ] && \ + echo "Not creating rttm because local/txt2rttm.pl does not exist or not executable." && exit 1; + +local/txt_to_rttm.pl --symtab=$lang/words.txt --segment=$data/segments $dir/align.txt $dir/rttm 2>$dir/log/rttm.log +local/txt_to_rttm.pl --symtab=$lang/words.txt $dir/align.txt $dir/rttm.per-utt 2>$dir/log/rttm.per-utt.log +echo "$0: done writing rttm." + +exit 0; diff --git a/egs/babel/s5d/local/annotated_kwlist_to_KWs.pl b/egs/babel/s5d/local/annotated_kwlist_to_KWs.pl new file mode 100755 index 00000000000..a4c80cef345 --- /dev/null +++ b/egs/babel/s5d/local/annotated_kwlist_to_KWs.pl @@ -0,0 +1,124 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < [category] + e.g.: annotated_kwlist_to_KWs.pl kwlist.annot.list keywords.list "NGram Order:2,3,4" + +This script reads an annotated kwlist xml file and writes a list of keywords, according +to the given categories. The "category" is a "key:value" pair in the annotated kwlist xml +file. For example +1. "NGram Order:2,3,4" +2. "NGram Order:2" +3. "NGram Order:-" +where "NGram Order" is the category name. The first line means print keywords that are +bigram, trigram and 4gram; The second line means print keywords only for bigram; The last +line means print all possible ngram keywords. +If no "category" is specified, the script will print out the possible categories. + +Allowed options: +EOU + +GetOptions(); + +@ARGV >= 2 || die $Usage; + +# Workout the input/output source +my $kwlist_filename = shift @ARGV; +my $kws_filename = shift @ARGV; + +my $source = "STDIN"; +if ($kwlist_filename ne "-") { + open(KWLIST, "<$kwlist_filename") || die "Fail to open kwlist file: $kwlist_filename\n"; + $source = "KWLIST"; +} + +# Process kwlist.annot.xml +my %attr; +my %attr_kws; +my $kwid=""; +my $name=""; +my $value=""; +while (<$source>) { + chomp; + if (m//) {($name) = /(.*)<\/name>/; next;} + if (m//) { + ($value) = /(.*)<\/value>/; + if (defined($attr{$name})) { + $attr{"$name"}->{"$value"} = 1; + } else { + $attr{"$name"} = {"$value", 1}; + } + if (defined($attr_kws{"${name}_$value"})) { + $attr_kws{"${name}_$value"}->{"$kwid"} = 1; + } else { + $attr_kws{"${name}_$value"} = {"$kwid", 1}; + } + } +} + +my $output = ""; +if (@ARGV == 0) { + # If no category provided, print out the possible categories + $output .= "Possible categories are:\n\n"; + foreach my $name (keys %attr) { + $output .= "$name:"; + my $count = 0; + foreach my $value (keys %{$attr{$name}}) { + if ($value eq "") {$value = "\"\"";} + if ($count == 0) { + $output .= "$value"; + $count ++; next; + } + if ($count == 6) { + $output .= ", ..."; + last; + } + $output .= ",$value"; $count ++; + } + $output .= "\n"; + } + print STDERR $output; + $output = ""; +} else { + my %keywords; + while (@ARGV > 0) { + my $category = shift @ARGV; + my @col = split(/:/, $category); + @col == 2 || die "Bad category \"$category\"\n"; + $name = $col[0]; + if ($col[1] eq "-") { + foreach my $value (keys %{$attr{$name}}) { + foreach my $kw (keys %{$attr_kws{"${name}_$value"}}) { + $keywords{$kw} = 1; + } + } + } else { + my @col1 = split(/,/, $col[1]); + foreach my $value (@col1) { + foreach my $kw (keys %{$attr_kws{"${name}_$value"}}) { + $keywords{$kw} = 1; + } + } + } + } + foreach my $kw (keys %keywords) { + $output .= "$kw\n"; + } +} + +if ($kwlist_filename ne "-") {close(KWLIST);} +if ($kws_filename eq "-") { print $output;} +else { + open(O, ">$kws_filename") || die "Fail to open file $kws_filename\n"; + print O $output; + close(O); +} diff --git a/egs/babel/s5d/local/apply_g2p.sh b/egs/babel/s5d/local/apply_g2p.sh new file mode 100755 index 00000000000..385b1f3536e --- /dev/null +++ b/egs/babel/s5d/local/apply_g2p.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +iters=5 +stage=0 +encoding='utf-8' +remove_tags=true +only_words=true +icu_transform="Any-Lower" +var_counts=3 #Generate upto N variants +var_mass=0.9 #Generate so many variants to produce 90 % of the prob mass +cmd=run.pl +nj=10 #Split the task into several parallel, to speedup things +model= +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e + +if [ $# != 3 ]; then + echo "Usage: $0 [options] " + echo "... where is a list of words whose pronunciation is to be generated" + echo " is a directory used as a target during training of G2P" + echo " is the directory where the output lexicon should be stored" + echo "e.g.: $0 oov_words exp/g2p exp/g2p/oov_lex" + echo "" + echo "main options (for others, see top of script file)" + echo " --nj # How many tasks should be spawn (to speedup things)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +wordlist=$1 +modeldir=$2 +output=$3 + + +mkdir -p $output/log + +model=$modeldir/g2p.model.final +[ ! -f ${model:-} ] && echo "File $model not found in the directory $modeldir." && exit 1 +#[ ! -x $wordlist ] && echo "File $wordlist not found!" && exit 1 + +cp $wordlist $output/wordlist.orig.txt + +if [ ! -z $icu_transform ] ; then + #we have to keep a correspondence map A -> trasnform(A) + paste \ + <(cat $output/wordlist.orig.txt | uconv -f $encoding -t $encoding -x $icu_transform) \ + $output/wordlist.orig.txt \ + > $output/transform_map.txt + cut -f 1 $output/transform_map.txt | sort -u > $output/wordlist.txt +else + cp $output/wordlist.orig.txt $output/wordlist.txt +fi + +if ! g2p=`which g2p.py` ; then + echo "The Sequitur was not found !" + echo "Go to $KALDI_ROOT/tools and execute extras/install_sequitur.sh" + exit 1 +fi + + +echo "Applying the G2P model to wordlist $wordlist" + +if [ $stage -le 0 ]; then + $cmd JOBS=1:$nj $output/log/apply.JOBS.log \ + split -n l/JOBS/$nj $output/wordlist.txt \| \ + g2p.py -V $var_mass --variants-number $var_counts --encoding $encoding \ + --model $modeldir/g2p.model.final --apply - \ + \> $output/output.JOBS +fi +cat $output/output.* > $output/output + +#Remap the words from output file back to the original casing +#Conversion of some of thems might have failed, so we have to be careful +#and use the transform_map file we generated beforehand +#Also, because the sequitur output is not readily usable as lexicon (it adds +#one more column with ordering of the pron. variants) convert it into the proper lexicon form +output_lex=$output/lexicon.lex +if [ ! -z $icu_transform ] ; then + #also, the transform is generally N -> 1, i.e. we have to take + #extra care of words that might have been mapped into the same one + perl -e 'open(WORDS, $ARGV[0]) or die "Could not open file $ARGV[0]"; + while() { chomp; @F=split; + if ($MAP{$F[0]} ) { push @{$MAP{$F[0]}}, $F[1]; } + else { $MAP{$F[0]} = [$F[1]]; } + } + close(WORDS); + open(LEX, $ARGV[1]) or die "Could not open file $ARGV[1]"; + while() {chomp; @F=split /\t/; + if ( $#F != 3 ) { + print STDERR "WARNING: Non-acceptable entry \"" . join(" ", @F) . "\" ($#F splits)\n"; + next; + } + foreach $word (@{$MAP{$F[0]}} ) { + print "$word\t$F[2]\t$F[3]\n"; + } + } + close(LEX); + ' \ + $output/transform_map.txt $output/output | sort -u > $output_lex +else + #Just convert it to a proper lexicon format + cut -f 1,3,4 $output/output $output_lex +fi + +#Some words might have been removed or skipped during the process, +#let's check it and warn the user if so... +nlex=`cut -f 1 $output_lex | sort -u | wc -l` +nwlist=`cut -f 1 $output/wordlist.orig.txt | sort -u | wc -l` +if [ $nlex -ne $nwlist ] ; then + echo "WARNING: Unable to generate pronunciation for all words. "; + echo "WARINNG: Wordlist: $nwlist words" + echo "WARNING: Lexicon : $nlex words" + echo "WARNING:Diff example: " + diff <(cut -f 1 $output_lex | sort -u ) \ + <(cut -f 1 $output/wordlist.orig.txt | sort -u ) || true +fi +exit 0 diff --git a/egs/babel/s5d/local/apply_map_tab_preserving.pl b/egs/babel/s5d/local/apply_map_tab_preserving.pl new file mode 100755 index 00000000000..b57262f1930 --- /dev/null +++ b/egs/babel/s5d/local/apply_map_tab_preserving.pl @@ -0,0 +1,94 @@ +#!/usr/bin/env perl +use warnings; #sed replacement for -w perl parameter + +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0. + + +# This program is a bit like ./sym2int.pl in that it applies a map +# to things in a file, but it's a bit more general in that it doesn't +# assume the things being mapped to are single tokens, they could +# be sequences of tokens. See the usage message. +# this version preserves tabs. + +if (@ARGV > 0 && $ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } +} + +# Mapping is obligatory +$permissive = 0; +if (@ARGV > 0 && $ARGV[0] eq '--permissive') { + shift @ARGV; + # Mapping is optional (missing key is printed to output) + $permissive = 1; +} + +if(@ARGV != 1) { + print STDERR "Usage: apply_map_tab_preserving.pl [options] map output\n" . + "options: [-f ]\n" . + "Applies the map 'map' to all input text, where each line of the map\n" . + "is interpreted as a map from the first field to the list of the other fields\n" . + "Note: can look like 4-5, or 4-, or 5-, or 1, it means the field\n" . + "range in the input to apply the map to.\n" . + "e.g.: echo A B | apply_map.pl a.txt\n" . + "where a.txt is:\n" . + "A a1 a2\n" . + "B b\n" . + "will produce:\n" . + "a1 a2 b\n"; + exit(1); +} + +($map) = @ARGV; +open(M, "<$map") || die "Error opening map file $map: $!"; + +while () { + @A = split(" ", $_); + @A >= 1 || die "apply_map.pl: empty line."; + $i = shift @A; + $o = join(" ", @A); + $map{$i} = $o; +} + +while() { + @A = split("\t", $_); + $field_offset = 0; + for ($n = 0; $n < @A; $n++) { + @B = split(" ", $A[$n]); + + for ($x = 0; $x < @B; $x++) { + $y = $x + $field_offset; + if ( (!defined $field_begin || $y >= $field_begin) + && (!defined $field_end || $y <= $field_end)) { + $b = $B[$x]; + if (!defined $map{$b}) { + if (!$permissive) { + die "apply_map.pl: undefined key $a\n"; + } else { + print STDERR "apply_map.pl: warning! missing key $a\n"; + } + } else { + $B[$x] = $map{$b}; + } + } + } + $field_offset += @B; + $A[$n] = join(" ", @B); + } + print join("\t", @A) . "\n"; +} diff --git a/egs/babel/s5d/local/arpa2G.sh b/egs/babel/s5d/local/arpa2G.sh new file mode 100755 index 00000000000..887b393b459 --- /dev/null +++ b/egs/babel/s5d/local/arpa2G.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# Copyright 2013-2014 Johns Hopkins University (authors: Yenda Trmal, Daniel Povey) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#Simple utility script to convert the gzipped ARPA lm into a G.fst file + + +oov_prob_file= +unk_fraction= +cleanup=true +#end configuration section. + + + +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo "Options: --oov-prob-file # e.g. data/local/oov2prob" + echo " # with this option it will replace with OOVs in G.fst." + exit 1; +fi + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code + +lmfile=$1 +langdir=$2 +destdir=$3 + +mkdir $destdir 2>/dev/null || true + + +if [ ! -z "$oov_prob_file" ]; then + if [ ! -s "$oov_prob_file" ]; then + echo "$0: oov-prob file $oov_prob_file does not exist" + exit 1; + fi + if [ -z "$unk_fraction" ]; then + echo "--oov-prob option requires --unk-fraction option"; + exit 1; + fi + + min_prob=$(gunzip -c $lmfile | perl -e ' $minlogprob = 0.0; + while() { if (m/\\(\d)-grams:/) { $order = $1; } + if ($order == 1) { @A = split; + if ($A[0] < $minlogprob && $A[0] != -99) { $minlogprob = $A[0]; }}} print $minlogprob') + echo "Minimum prob in LM file is $min_prob" + + echo "$0: creating LM file with unk words, using $oov_prob_file, in $destdir/lm_tmp.gz" + gunzip -c $lmfile | \ + perl -e ' ($oov_prob_file,$min_prob,$unk_fraction) = @ARGV; $ceilinged=0; + $min_prob < 0.0 || die "Bad min_prob"; # this is a log-prob + $unk_fraction > 0.0 || die "Bad unk_fraction"; # this is a prob + open(F, "<$oov_prob_file") || die "opening oov file"; + while () { push @OOVS, $_; } + $num_oovs = @F; + while() { + if (m/^ngram 1=(\d+)/) { $n = $1 + $num_oovs; print "ngram 1=$n\n"; } + else { print; } # print all lines unchanged except the one that says ngram 1=X. + if (m/^\\1-grams:$/) { + foreach $l (@OOVS) { + @A = split(" ", $l); + @A == 2 || die "bad line in oov2prob: $_;"; + ($word, $prob) = @A; + $log10prob = (log($prob * $unk_fraction) / log(10.0)); + if ($log10prob > $min_prob) { $log10prob = $min_prob; $ceilinged++;} + print "$log10prob $word\n"; + } + }} print STDERR "Ceilinged $ceilinged unk-probs\n";' \ + $oov_prob_file $min_prob $unk_fraction | \ + ngram -unk -lm - -write-lm $destdir/lm_tmp.gz + lmfile=$destdir/lm_tmp.gz +fi + +if [[ $lmfile == *.bz2 ]] ; then + decompress="bunzip2 -c $lmfile" +elif [[ $lmfile == *.gz ]] ; then + decompress="gunzip -c $lmfile" +else + decompress="cat $lmfile" +fi + +$decompress | \ + grep -v ' ' | grep -v ' ' | grep -v ' ' | \ + arpa2fst - | \ + fstprint | \ + utils/eps2disambig.pl | \ + utils/s2eps.pl | \ + fstcompile --isymbols=$langdir/words.txt \ + --osymbols=$langdir/words.txt --keep_isymbols=false --keep_osymbols=false | \ + fstrmepsilon | fstarcsort --sort_type=ilabel > $destdir/G.fst || exit 1 +fstisstochastic $destdir/G.fst || true; + +if $cleanup; then + rm $destdir/lm_tmp.gz 2>/dev/null || true; +fi + +exit 0 diff --git a/egs/babel/s5d/local/augment_original_stm.pl b/egs/babel/s5d/local/augment_original_stm.pl new file mode 100755 index 00000000000..c5ad87fd286 --- /dev/null +++ b/egs/babel/s5d/local/augment_original_stm.pl @@ -0,0 +1,110 @@ +#!/usr/bin/env perl +use warnings; #sed replacement for -w perl parameter +# Copyright 2012 Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0. + +#This script takes the original BABEL STM file (part of the IndusDB) +#and replaces the "Aggregated" field with a correct speaker ID. +#As a result, the scoring will be done on per-speaker basis as well +#As the segment from segment mapping generally do not correspond to +#the segmentation of the original STM file, it combines the files +#segments and utt2spk to work out the correct speaker ID for +#the reference segment +#In case of overlay, it will either use the previous speaker or +#prints out an error message + +use strict; +use warnings; + +use Data::Dumper; + +@ARGV == 2 || die "$0 \n"; + +my $warn_count = 0; +my $warn_max = 10; +my $stm_file = shift @ARGV; +my $data_dir = shift @ARGV; +my %utt2spk; +my %segments; + +open(F_u, "<$data_dir/utt2spk") || die "Could not open the file $data_dir/utt2spk\n"; +while() { + chop; + (my $utt, my $spk) = split; + $utt2spk{$utt} = $spk; +} +close(F_u); + +open(F_s, "<$data_dir/segments") || die "Could not open the file $data_dir/segments\n"; +while() { + chop; + (my $utt, my $file, my $seg_start, my $seg_end) = split; + push @{$segments{$file}}, [ $seg_start, $seg_end, $utt2spk{$utt}]; +} +close(F_s); + +open(STM, "<$stm_file") || die "Could not opent the STM file $stm_file"; +open(STMOUT, ">$data_dir/stm") || die "Could not open the output STM file $data_dir/stm"; +open(RECO, ">$data_dir/reco2file_and_channel") or die "Could not create the output file $data_dir/reco2file_and_channel"; + +my $prev_filename = ""; +my @timestamps; +my $i = 0; +while() { + chop; + (my $filename, my $line, my $aggregated, my $seg_start, my $seg_end, my $text) = split(/\s+/, $_, 6); + #print "$filename, $seg_start, $seg_end, $text\n"; + $line="1"; + if (( $prev_filename ne $filename ) && ( ";;$prev_filename" ne $filename)){ + my $_filename = $filename; + $_filename =~ s/^;;//g; + next if not exists $segments{$_filename}; + #print $filename, "\n"; + $prev_filename = $_filename; + @timestamps = @{$segments{$_filename}}; + #print Dumper(\@timestamps); + $i=0; + print RECO "$_filename $_filename $line\n"; + } + + my $max_i=@timestamps; + while ( ($i < $max_i ) && ($seg_start > @{$timestamps[$i]}[0] ) ) { + $i+= 1; + } + + if (($i >= $max_i ) && ($timestamps[$i-1][1]) <= $seg_start ){ + #We are over the start of the last segment -> we assing the last speaker ID + if ($warn_count < $warn_max) { + print STDERR "Warning: $prev_filename: the segment from the STM file starts after the last segment from the segments file ends\n"; + print STDERR "Warning: Additional info: STM: ($seg_start, $seg_end), segments file: ($timestamps[$i-1][0] $timestamps[$i-1][1])\n"; + $warn_count += 1; + + if ($warn_count >= $warn_max) { + print STDERR "Warning: Maximum number of warning reached, not warning anymore...\n" + } + } + #print "$i, $filename, $timestamps[$max_i - 1][2]\n"; + print STMOUT "$filename $line $timestamps[$max_i - 1][2] $seg_start $seg_end $text\n"; + } elsif ( $i == 0 ) { + if ($warn_count < $warn_max) { + print STDERR "Warning: $prev_filename: The segment from the STM file start before the first segment from the segments file\n"; + print STDERR "Warning: Additional info: STM: ($seg_start, $seg_end), segments file: ($timestamps[$i][0] $timestamps[$i][1])\n"; + $warn_count += 1; + + if ($warn_count >= $warn_max) { + print STDERR "Warning: Maximum number of warning reached, not warning anymore...\n" + } + } + #Even the first segment's start time was higher then the stm segment start time + #That means we do not really know which speaker the stm segment belongs + print STMOUT "$filename $line $timestamps[$i][2] $seg_start $seg_end $text\n"; + #print "$i, $filename, $timestamps[$i][2]\n"; + } else { + print STMOUT "$filename $line $timestamps[$i-1][2] $seg_start $seg_end $text\n"; + #print "$i, $filename, $timestamps[$i-1][2]\n"; + } +} + +close(STMOUT); +close(STM); +close(RECO); diff --git a/egs/babel/s5d/local/best_path_weights.sh b/egs/babel/s5d/local/best_path_weights.sh new file mode 100755 index 00000000000..52782ee3655 --- /dev/null +++ b/egs/babel/s5d/local/best_path_weights.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# This script combines frame-level posteriors from different decode +# directories. The first decode directory is assumed to be the primary +# and is used to get the best path. The posteriors from other decode +# directories are interpolated with the posteriors of the best path. +# The output is a new directory with final.mdl, tree from the primary +# decode-dir and the best path alignments and weights in a decode-directory +# with the same basename as the primary directory. +# This is typically used to get better posteriors for semisupervised training +# of DNN +# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg +# exp/sgmm_mmi_b0.1/decode_fmllr_train_unt.seg_it4 exp/combine_dnn_sgmm +# Here the final.mdl and tree are copied from exp/tri6_nnet to +# exp/combine_dnn_sgmm. best_path_ali.*.gz obtained from the primary dir and +# the interpolated posteriors in weights.*.gz are placed in +# exp/combine_dnn_sgmm/decode_train_unt.seg + +set -e + +# begin configuration section. +cmd=run.pl +stage=-10 +#end configuration section. + +help_message="Usage: "$(basename $0)" [options] [:weight] [:weight] [[:weight] ... ] + E.g. "$(basename $0)" data/train_unt.seg data/lang exp/tri1/decode:0.5 exp/tri2/decode:0.25 exp/tri3/decode:0.25 exp/combine +Options: + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. +"; + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + + +if [ $# -lt 4 ]; then + printf "$help_message\n"; + exit 1; +fi + +data=$1 +lang=$2 +dir=${@: -1} # last argument to the script +shift 2; +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +mkdir -p $dir +mkdir -p $dir/log + +decode_dir=`echo ${decode_dirs[0]} | cut -d: -f1` +nj=`cat $decode_dir/num_jobs` + +out_decode=$dir/`basename $decode_dir` +mkdir -p $out_decode + +if [ $stage -lt -1 ]; then + mkdir -p $out_decode/log + $cmd JOB=1:$nj $out_decode/log/best_path.JOB.log \ + lattice-best-path --acoustic-scale=0.1 \ + "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz |" \ + ark:/dev/null "ark:| gzip -c > $out_decode/best_path_ali.JOB.gz" || exit 1 +fi + +weights_sum=0.0 + +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + + weight=`echo $decode_dir | cut -d: -s -f2` + [ -z "$weight" ] && weight=1.0 + + if [ $i -eq 0 ]; then + file_list="\"ark,s,cs:gunzip -c $out_decode/weights.$i.JOB.gz | vector-scale --scale=$weight ark:- ark:- |\"" + else + file_list="$file_list \"ark,s,cs:gunzip -c $out_decode/weights.$i.JOB.gz | vector-scale --scale=$weight ark:- ark:- |\"" + fi + + weights_sum=`perl -e "print STDOUT $weights_sum + $weight"` +done + +inv_weights_sum=`perl -e "print STDOUT 1.0/$weights_sum"` + +for i in `seq 0 $[num_sys-1]`; do + if [ $stage -lt $i ]; then + decode_dir=`echo ${decode_dirs[$i]} | cut -d: -f1` + + model=`dirname $decode_dir`/final.mdl # model one level up from decode dir + tree=`dirname $decode_dir`/tree # tree one level up from decode dir + + for f in $model $decode_dir/lat.1.gz $tree; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; + done + if [ $i -eq 0 ]; then + nj=`cat $decode_dir/num_jobs` || exit 1; + cp $model $dir || exit 1 + cp $tree $dir || exit 1 + echo $nj > $out_decode/num_jobs + else + if [ $nj != `cat $decode_dir/num_jobs` ]; then + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + exit 1; + fi + fi + + $cmd JOB=1:$nj $dir/log/get_post.$i.JOB.log \ + lattice-to-post --acoustic-scale=0.1 \ + "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz|" ark:- \| \ + post-to-pdf-post $model ark,s,cs:- ark:- \| \ + get-post-on-ali ark,s,cs:- "ark,s,cs:gunzip -c $out_decode/best_path_ali.JOB.gz | convert-ali $dir/final.mdl $model $tree ark,s,cs:- ark:- | ali-to-pdf $model ark,s,cs:- ark:- |" "ark:| gzip -c > $out_decode/weights.$i.JOB.gz" || exit 1 + fi +done + +if [ $stage -lt $num_sys ]; then + if [ "$num_sys" -eq 1 ]; then + $cmd JOB=1:$nj $dir/log/move_post.JOB.log \ + mv $out_decode/weights.0.JOB.gz $out_decode/weights.JOB.gz || exit 1 + else + $cmd JOB=1:$nj $dir/log/interpolate_post.JOB.log \ + vector-sum $file_list \ + "ark:| vector-scale --scale=$inv_weights_sum ark:- ark:- | gzip -c > $out_decode/weights.JOB.gz" || exit 1 + fi +fi + +exit 0 diff --git a/egs/babel/s5d/local/best_scores.sh b/egs/babel/s5d/local/best_scores.sh new file mode 100755 index 00000000000..a3b2af187e1 --- /dev/null +++ b/egs/babel/s5d/local/best_scores.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -o nounset # Treat unset variables as an error + + +if [ ! -x results ] ; then + data=$(readlink -f ./local) + data=$(dirname $data) + mkdir -p $data/results + ln -s $data/results results +fi + +if [ ! -e ./RESULTS ] ; then + p=$(basename `readlink -f lang.conf`) + p=${p##.*} + filename=results.${p}.${USER}.$(date --iso-8601=seconds) + echo "#Created on $(date --iso-8601=seconds) by $0" >> results/$filename + ln -sf results/$filename RESULTS +fi + + +set -f +export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) +set +f +( + echo -e "#\n# STT Task performance (WER), evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + for f in "${mydirs[@]}"; do + find $f -name "*.sys" -not -name "*char*" | xargs grep Avg | utils/best_wer.sh + done | column -t +) >> RESULTS + +( + ls exp/tri5/decode*dev10h*/score_*/*char*sys >/dev/null 2>&1 || exit 0 + echo -e "#\n# STT Task performance (CER), evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + for f in "${mydirs[@]}"; do + find $f -name "*.sys" -name "*char*" | xargs grep Avg | utils/best_wer.sh + done | column -t +) >> RESULTS + diff --git a/egs/babel/s5d/local/best_scores_kws.sh b/egs/babel/s5d/local/best_scores_kws.sh new file mode 100755 index 00000000000..dcf4508d5e1 --- /dev/null +++ b/egs/babel/s5d/local/best_scores_kws.sh @@ -0,0 +1,179 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -o nounset # Treat unset variables as an error + + +if [ ! -x results ] ; then + data=$(readlink -f ./local) + data=$(dirname $data) + mkdir -p $data/results + ln -s $data/results results +fi + +if [ ! -e ./RESULTS.kws ] ; then + p=$(basename `readlink -f lang.conf`) + p=${p##.*} + filename=kws_results.${p}.${USER}.$(date --iso-8601=seconds) + echo "#Created on $(date --iso-8601=seconds) by $0" >> results/$filename + ln -sf results/$filename RESULTS.kws +fi + + +set -f +export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) +set +f +export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) +( + #### Word search (converted lattices) + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + done + + #### Syllab search (converted word lattices) + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -ipath "*syllabs*" | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), syllabic search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -ipath "*syllabs*" | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + done + + + #### Phone search (converted word lattices) + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -ipath "*phones*" | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), phonetic search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -ipath "*phones*" | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + + done + + + set -f + export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.syll.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) + set +f + if [ ! -z ${mydirs+x} ] ; then + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + #declare -p kwsets + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), syllabic decode+search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + + done + fi + + set -f + export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.phn.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) + set +f + if [ ! -z ${mydirs+x} ] ; then + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + #declare -p kwsets + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), phonetic decode+search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + + done + fi +) | tee RESULTS.kws diff --git a/egs/babel/s5d/local/build_edit_distance_fst.pl b/egs/babel/s5d/local/build_edit_distance_fst.pl new file mode 100755 index 00000000000..51c46667727 --- /dev/null +++ b/egs/babel/s5d/local/build_edit_distance_fst.pl @@ -0,0 +1,127 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + Buld a edit distance FST at the phone level. + +Allowed options: + --confusion-matrix : Matrix for insertion, deletion and substitution. (string, default="") + --ins-cost : Insertion cost (double, default=1 ) + --del-cost : Deletion cost (double, default=1 ) + --subs-cost : substitution cost (double, default=1 ) + --boundary-ins-cost : Cost for insertions at work boundary (double, default=0.1) + --boundary-off : No insertions at word boundary (boolean, default=true) +EOU + +my $confusion_matrix = ""; +my $insertion_cost = 1; +my $deletion_cost = 1; +my $substitution_cost = 1; +my $boundary_ins_cost = 0.1; +my $boundary_off="true"; +GetOptions('confusion-matrix=s' => \$confusion_matrix, + 'ins-cost=f' => \$insertion_cost, + 'del-cost=f' => \$deletion_cost, + 'subs-cost=f' => \$substitution_cost, + 'boundary-ins-cost=f' => \$boundary_ins_cost, + 'boundary-off=s' => \$boundary_off); + +@ARGV == 2 || die $Usage; + +$boundary_off eq "true" || $boundary_off eq "false" || die "$0: Bad value for option --boundary-off\n"; + +# Workout the input and output parameters +my $phone_in = shift @ARGV; +my $fst_out = shift @ARGV; + +open(I, "<$phone_in") || die "$0: Fail to open lexicon $phone_in\n"; +open(O, ">$fst_out") || die "$0: Fail to write FST $fst_out\n"; + +# Read confusion matrix +my %confusion; +if ($confusion_matrix ne "") { + open(M, "<$confusion_matrix") || die "$0: Fail to open confusion matrix $confusion_matrix\n"; + while () { + chomp; + my @col = split(); + @col == 3 || die "$0: Bad line in confusion matrix \"$_\"\n"; + $confusion{"$col[0]_$col[1]"} = $col[2]; + } + close(M); +} + +# Start processing +my @phones; +while () { + chomp; + my @col = split(); + @col == 1 || die "$0: Bad number of columns in phone list \"$_\"\n"; + if ($col[0] eq "") {next;} + push(@phones, $col[0]); +} + +# Add insertions, deletions +my $fst = ""; +foreach my $p (@phones) { + if ($confusion_matrix eq "") { + $fst .= "1 1 $p $deletion_cost\n"; # Deletions + $fst .= "1 1 $p $insertion_cost\n"; # Insertions + if ($boundary_off eq "false") { + $fst .= "0 0 $p $boundary_ins_cost\n"; + $fst .= "0 1 $p $boundary_ins_cost\n"; + $fst .= "2 2 $p $boundary_ins_cost\n"; + $fst .= "1 2 $p $boundary_ins_cost\n"; + } + } else { + my $key = "${p}_"; + if (defined($confusion{$key})) { + $fst .= "1 1 $p $confusion{$key}\n"; + } + $key = "_${p}"; + if (defined($confusion{$key})) { + $fst .= "1 1 $p $confusion{$key}\n"; + if ($boundary_off eq "false") { + $fst .= "0 0 $p $confusion{$key}\n"; + $fst .= "0 1 $p $confusion{$key}\n"; + $fst .= "2 2 $p $confusion{$key}\n"; + $fst .= "1 2 $p $confusion{$key}\n"; + } + } + } +} +foreach my $p1 (@phones) { + foreach my $p2 (@phones) { + if ($p1 eq $p2) { + $fst .= "1 1 $p1 $p2 0\n"; + } else { + if ($confusion_matrix eq "") { + $fst .= "1 1 $p1 $p2 $substitution_cost\n"; + } else { + my $key = "${p1}_${p2}"; + if (defined($confusion{$key})) { + $fst .= "1 1 $p1 $p2 $confusion{$key}\n"; + } + } + } + } +} +if ($boundary_off eq "false") { + $fst .= "0 1 0\n"; + $fst .= "1 2 0\n"; + $fst .= "2\n"; +} else { + $fst .= "1\n"; +} + +print O $fst; + +close(I); +close(O); diff --git a/egs/babel/s5d/local/chain/run_blstm.sh b/egs/babel/s5d/local/chain/run_blstm.sh new file mode 100755 index 00000000000..f098604d04a --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 46.8 | 19252 60586 | 57.6 28.5 13.8 4.5 46.8 31.7 | -0.643 | exp/chain_cleaned/blstm_sp_bi/decode_dev10h.pem/score_8/penalty_0.25/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=-2 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab1.sh b/egs/babel/s5d/local/chain/run_blstm_bab1.sh new file mode 100755 index 00000000000..95c7e9f28aa --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab1.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 45.5 | 19252 60586 | 58.9 27.5 13.5 4.5 45.5 31.4 | -0.660 | exp/chain_cleaned/blstmbab1_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab1 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab2.sh b/egs/babel/s5d/local/chain/run_blstm_bab2.sh new file mode 100755 index 00000000000..a6dd4cb9566 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab2.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 46.7 | 19252 60586 | 57.1 26.1 16.8 3.8 46.7 31.9 | -0.692 | exp/chain_cleaned/blstmbab2_sp_bi/decode_dev10h.pem/score_10/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab2 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 6 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab3.sh b/egs/babel/s5d/local/chain/run_blstm_bab3.sh new file mode 100755 index 00000000000..52f085f8942 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab3.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 45.9 | 19252 60586 | 58.7 28.0 13.3 4.6 45.9 31.6 | -0.668 | exp/chain_cleaned/blstmbab3_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab3 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 256 \ + --non-recurrent-projection-dim 256 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab4.sh b/egs/babel/s5d/local/chain/run_blstm_bab4.sh new file mode 100755 index 00000000000..47704e80ae4 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab4.sh @@ -0,0 +1,179 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab4 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab5.sh b/egs/babel/s5d/local/chain/run_blstm_bab5.sh new file mode 100755 index 00000000000..73c6a4089ed --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab5.sh @@ -0,0 +1,179 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab5 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 256 \ + --non-recurrent-projection-dim 256 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_xconfig.sh b/egs/babel/s5d/local/chain/run_blstm_xconfig.sh new file mode 100755 index 00000000000..27e1a571ad0 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_xconfig.sh @@ -0,0 +1,206 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 46.8 | 19252 60586 | 57.6 28.5 13.8 4.5 46.8 31.7 | -0.643 | exp/chain_cleaned/blstm_sp_bi/decode_dev10h.pem/score_8/penalty_0.25/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=_xconfig #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + #echo "$0: creating neural net configs"; + #steps/nnet3/lstm/make_configs.py \ + # --self-repair-scale-nonlinearity 0.00001 \ + # --self-repair-scale-clipgradient 1.0 \ + # $dir/configs || exit 1; + echo "$0: creating neural net configs using the xconfig parser"; + + label_delay=0 + xent_regularize=0.1 + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=blstm1-forward input=lda cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + lstmp-layer name=blstm1-backward input=lda cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=3 + lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=3 + lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=3 + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_ivector_common.sh b/egs/babel/s5d/local/chain/run_ivector_common.sh new file mode 100755 index 00000000000..696fd14b45f --- /dev/null +++ b/egs/babel/s5d/local/chain/run_ivector_common.sh @@ -0,0 +1,242 @@ +#!/bin/bash + +set -e -o pipefail + + +# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually +# be called by more scripts). It contains the common feature preparation and iVector-related parts +# of the script. See those scripts for examples of usage. + + +stage=0 +nj=30 +min_seg_len=1.55 # min length in seconds... we do this because chain training + # will discard segments shorter than 1.5 seconds. Must remain in sync + # with the same option given to prepare_lores_feats_and_alignments.sh +train_set=train_cleaned # you might set this to e.g. train. +gmm=tri5_cleaned # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. +langdir=data/langp/tri5_ali + +num_threads_ubm=12 +nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it + # becomes exp/nnet3_cleaned or whatever. +add_pitch=false + +. ./cmd.sh +. ./path.sh + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp_comb + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 2." + exit 1 +fi + + +if [ $stage -le 1 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + utils/copy_data_dir.sh data/${train_set}_sp data/${train_set}_sp_hires + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$mfccdir/storage $mfccdir/storage + fi + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp ; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 3 ]; then + echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" + # we have to combine short segments or we won't be able to train chain models + # on those segments. + utils/data/combine_short_segments.sh \ + data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb + + # just copy over the CMVN to avoid having to recompute it. + cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ + utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ +fi + +if [ $stage -le 4 ]; then + echo "$0: selecting segments of hires training data that were also present in the" + echo " ... original training data." + + # note, these data-dirs are temporary; we put them in a sub-directory + # of the place where we'll make the alignments. + temp_data_root=exp/nnet3${nnet3_affix}/tri5 + mkdir -p $temp_data_root + + utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ + data/${train_set}_sp_hires $temp_data_root/${train_set}_hires + + # note: essentially all the original segments should be in the hires data. + n1=$(wc -l 4 option." + exit 1; + fi + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 7 --mllt-iters "2 4 6" \ + --splice-opts "--left-context=3 --right-context=3" \ + --boost-silence $boost_sil \ + $numLeavesMLLT $numGaussMLLT $temp_data_root/${train_set}_hires $langdir \ + $gmm_dir exp/nnet3${nnet3_affix}/tri5 +fi + + +if [ $stage -le 5 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + # train a diagonal UBM using a subset of about a quarter of the data + # we don't use the _comb data for this as there is no need for compatibility with + # the alignments, and using the non-combined data is more efficient for I/O + # (no messing about with piped commands). + num_utts_total=$(wc -l 11 option." + exit 1 + fi + echo "$0: aligning with the perturbed, short-segment-combined low-resolution data" + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + data/${train_set}_sp_comb $langdir $gmm_dir $ali_dir +fi + + +exit 0; diff --git a/egs/babel/s5d/local/chain/run_tdnn.sh b/egs/babel/s5d/local/chain/run_tdnn.sh new file mode 100755 index 00000000000..2d9b6db75b7 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 47.0 | 19252 60586 | 58.0 28.0 14.0 5.0 47.0 31.6 | -0.540 | exp/chain_cleaned/tdnn_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab1.sh b/egs/babel/s5d/local/chain/run_tdnn_bab1.sh new file mode 100755 index 00000000000..0fa4020977c --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab1.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 48.2 | 19252 60586 | 56.9 28.9 14.2 5.1 48.2 32.1 | -0.662 | exp/chain_cleaned/tdnnbab1_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab1 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab2.sh b/egs/babel/s5d/local/chain/run_tdnn_bab2.sh new file mode 100755 index 00000000000..ea9d5959c75 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab2.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 47.7 | 19252 60586 | 56.5 27.2 16.3 4.3 47.7 31.8 | -0.468 | exp/chain_cleaned/tdnnbab2_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab2 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab3.sh b/egs/babel/s5d/local/chain/run_tdnn_bab3.sh new file mode 100755 index 00000000000..2973a2c9f02 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab3.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 46.7 | 19252 60586 | 57.4 26.4 16.2 4.0 46.7 31.6 | -0.469 | exp/chain_cleaned/tdnnbab3_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab3 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab4.sh b/egs/babel/s5d/local/chain/run_tdnn_bab4.sh new file mode 100755 index 00000000000..bd2eba9cb8b --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab4.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 47.3 | 19252 60586 | 57.5 29.1 13.4 4.8 47.3 31.7 | -0.595 | exp/chain_cleaned/tdnnbab4_sp_bi/decode_dev10h.pem/score_8/penalty_0.25/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab4 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 400 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_lstm_1e.sh b/egs/babel/s5d/local/chain/run_tdnn_lstm_1e.sh new file mode 100755 index 00000000000..ec8366492d7 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_lstm_1e.sh @@ -0,0 +1,227 @@ +#!/bin/bash + +# From egs/swbdrun_tdnn_lstm_1e.sh + +set -e -o pipefail -u + +# configs for 'chain' +stage=0 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab1 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1e # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $tree_dir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + #--trainer.num-chunk-per-minibatch 128,64 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +exit 0 diff --git a/egs/babel/s5d/local/check_models.sh b/egs/babel/s5d/local/check_models.sh new file mode 100755 index 00000000000..88b3dacc94b --- /dev/null +++ b/egs/babel/s5d/local/check_models.sh @@ -0,0 +1,34 @@ +#!/bin/bash + + +check_model () { + model=$1 + if [ -s $model ]; then echo $model + else + dir=`dirname $model` + latest_model=`ls -lt $dir/{?,??}.mdl 2>/dev/null | head -1 | awk '{print $9}'` + echo "*$model is not there, latest is: $latest_model" + fi +} + +for model in exp/mono/final.mdl exp/tri{1,2,3}/final.mdl; do + check_model $model +done + +if [ ! -f exp/tri4/final.mdl ]; then + echo "*exp/tri4/final.mdl is not there*" + exit 1 +fi + +if [ -f exp/tri4/trans.1 ]; then # This is LimitedLP. + models="exp/tri4/final.alimdl exp/sgmm5/final.alimdl exp/sgmm5_mmi_b0.1/final.mdl exp/tri5_nnet/final.mdl" +else + models="exp/tri4/final.mdl exp/tri5/final.alimdl exp/sgmm5/final.alimdl exp/sgmm5_mmi_b0.1/final.mdl exp/tri6_nnet/final.mdl" +fi +models="$models exp_BNF/tri5/final.mdl exp_BNF/tri6/final.alimdl exp_BNF/sgmm7/final.alimdl" + +for model in $models; do + check_model $model +done + + diff --git a/egs/babel/s5d/local/check_tools.sh b/egs/babel/s5d/local/check_tools.sh new file mode 100755 index 00000000000..2c96f8445d1 --- /dev/null +++ b/egs/babel/s5d/local/check_tools.sh @@ -0,0 +1,40 @@ +#!/bin/bash -u + +# Copyright 2015 (c) Johns Hopkins University (Jan Trmal ) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +[ -f ./path.sh ] && . ./path.sh + +sph2pipe=`command -v sph2pipe 2>/dev/null` \ + || { echo >&2 "sph2pipe not found on PATH. Did you run make in the $KALDI_ROOT/tools directory?"; exit 1; } + +srilm=`command -v ngram 2>/dev/null` \ + || { echo >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh"; exit 1; } + +sox=`command -v sox 2>/dev/null` \ + || { echo >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; exit 1; } + +# If sox is found on path, check if the version is correct +if [ ! -z "$sox" ]; then + sox_version=`$sox --version 2>&1| head -1 | sed -e 's?.*: ??' -e 's?.* ??'` + if [[ ! $sox_version =~ v14.4.* ]]; then + echo "Unsupported sox version $sox_version found on path. You will need version v14.4.0 and higher." + exit 1 + fi +fi + +exit 0 + + diff --git a/egs/babel/s5d/local/check_wers.sh b/egs/babel/s5d/local/check_wers.sh new file mode 100755 index 00000000000..10e1a89ee3a --- /dev/null +++ b/egs/babel/s5d/local/check_wers.sh @@ -0,0 +1,50 @@ +#!/bin/bash + + + +check_wer () { + dir=$1 + if [ -d $dir ]; then + seen_dir=false + for ddir in $dir/decode*; do + if [ -d $ddir ]; then + seen_dir=true + printf " % -40s " $ddir + line=`grep Sum $ddir/score_*/*.sys 2>/dev/null | $char_command | utils/best_wer.sh` + if [ -z "$line" ]; then echo "------" + else echo $line | cut -c 1-65; fi + fi + done + ! $seen_dir && echo "$dir ********** no decode dirs" + fi + +} + +final=false +char_command="grep -v char" + +for n in `seq 10`; do + if [ "$1" == "--final" ]; then + final=true + shift + fi + if [ "$1" == "--char" ]; then + char_command="grep char" + shift + fi +done + +if [ $# != 0 ]; then + echo "Usage: local/check_wers.sh [--final] [--char]" + exit 1; +fi + +if $final; then + for dir in exp/sgmm5_mmi_b0.1 exp/tri5_nnet exp/tri6_nnet exp_BNF/sgmm7 exp_BNF/sgmm7_mmi_b0.1 exp/combine*; do + check_wer $dir + done +else + for dir in exp/tri{2,3,4,5} exp/sgmm5 exp/sgmm5_mmi_b0.1 exp/tri5_nnet exp/tri6_nnet exp_BNF/* exp/combine_*; do + check_wer $dir + done +fi diff --git a/egs/babel/s5d/local/cmu_uem2kaldi_dir.sh b/egs/babel/s5d/local/cmu_uem2kaldi_dir.sh new file mode 100755 index 00000000000..f320cfa19cd --- /dev/null +++ b/egs/babel/s5d/local/cmu_uem2kaldi_dir.sh @@ -0,0 +1,124 @@ +#!/bin/bash -e + +# Creating a UEM decoding setup with CMU segmentation from Florian (Feb 15, 2013). +dummy_text=true +text= +filelist= +#end of configuration + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ] ; then + echo "$0: Converts the CMU segmentation database file into a kaldi data directory for UEM decoding" + echo "" + echo "cmu_ume2kaldi_dir.sh " + echo "example: cmu_ume2kaldi_dir.sh db-tag-eval-utt.dat /export/babel/data/106-tagalog/audio data/eval.uem" + echo "Was called with: $*" + exit 1; +fi + +database=$1 +audiopath=$2 +datadir=$3 + +echo $0 $@ +mkdir -p $datadir +# 1. Create the segments file: +[ ! -f $database ] && echo "Database file $1 does not exist!" && exit 1; + +echo "Converting `basename $database` to kaldi directory $datadir " +cat $database | perl -pe 's:.+(BABEL):BABEL:; s:\}\s+\{FROM\s+: :; s:\}\s+\{TO\s+: :; s:\}.+::;' | \ + perl -ne '@K = split; + $utteranceID = @K[0]; + $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; + $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; + $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; + $utteranceID .= sprintf ("_%06i", (100*@K[2])); + printf("%s %s %.2f %.2f\n", $utteranceID, @K[0], @K[1], @K[2]);' | sort > $datadir/segments + +if [ ! -z $filelist ] ; then + mv $datadir/segments $datadir/segments.full + grep -F -f $filelist $datadir/segments.full > $datadir/segments + + l=`grep -v -F -f $filelist $datadir/segments.full | cut -f 2 -d ' ' | sort -u | wc -l` + echo "Because of using filelist, $l files omitted" +fi + + + # 2. Create the utt2spk file: + +echo "Creating the $datadir/utt2spk file" +cut -f1 -d' ' $datadir/segments | \ + perl -ne 'chomp; m:([^_]+_[AB]).*:; print "$_ $1\n";' | \ + sort > $datadir/utt2spk + + # 3. Create the spk2utt file: + +echo "Creating the $datadir/spk2utt file" +perl -ne '{chomp; @K=split; $utt{@K[1]}.=" @K[0]";} + END{foreach $spk (sort keys %utt) { + printf("%s%s\n", $spk, $utt{$spk}); + } + }' < $datadir/utt2spk | sort > $datadir/spk2utt + +# 4. Create the wav.scp file: +sph2pipe=`which sph2pipe || which $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe` +if [ $? -ne 0 ] ; then + echo "Could not find sph2pipe binary. Add it to PATH" + exit 1; +fi +sox=`which sox` +if [ $? -ne 0 ] ; then + echo "Could not find sox binary. Add it to PATH" + exit 1; +fi + +echo "Creating the $datadir/wav.scp file" +( + set -o pipefail + for file in `cut -f 2 -d ' ' $datadir/segments` ; do + if [ -f $audiopath/audio/$file.sph ] ; then + echo "$file $sph2pipe -f wav -p -c 1 $audiopath/audio/$file.sph |" + elif [ -f $audiopath/audio/$file.wav ] ; then + echo "$file $sox $audiopath/audio/$file.wav -r 8000 -c 1 -b 16 -t wav - downsample |" + else + echo "Audio file $audiopath/audio/$file.sph does not exist!" >&2 + exit 1 + fi + done | sort -u > $datadir/wav.scp + if [ $? -ne 0 ] ; then + echo "Error producing the wav.scp file" + exit 1 + fi +) || exit 1 + +l1=`wc -l $datadir/wav.scp | cut -f 1 -d ' ' ` +echo "wav.scp contains $l1 files" +if [ ! -z $filelist ] ; then + l2=`wc -l $filelist | cut -f 1 -d ' '` + echo "filelist `basename $filelist` contains $l2 files" + + if [ "$l1" -ne "$l2" ] ; then + echo "WARNING: Not all files from the specified fileset made their way into wav.scp" + fi +fi + +# 5. Create the text file: +echo "Creating the $datadir/text file" +if [ ! -z $text ] ; then + cp $text $datadir/text || echo "Could not copy the source text file \"$text\" " && exit 1 +elif $dummy_text ; then + cut -f1 -d' ' $datadir/segments | \ + sed -e 's/$/ IGNORE_TIME_SEGMENT_IN_SCORING/' | \ + sort > $datadir/text +fi + +# 6. reco2file_and_channel +echo "Creating the $datadir/reco2file_and_channel file" +(for f in $( cut -f 1 -d ' ' $datadir/wav.scp ) ; do echo $f $f "1"; done) > $datadir/reco2file_and_channel +echo "Everything done" + + + diff --git a/egs/babel/s5d/local/count_to_logprob.pl b/egs/babel/s5d/local/count_to_logprob.pl new file mode 100755 index 00000000000..7d779321810 --- /dev/null +++ b/egs/babel/s5d/local/count_to_logprob.pl @@ -0,0 +1,94 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + This script takes in the confusion phone pair counts and converts + the counts into negated log probabilities. The counts should be in + the following format: + p1 p2 count1 // For substitution + p3 count2 // For deletion + p4 count3 // For insertion + +Allowed options: + --cutoff : Minimal count to be considered (int , default=1) +EOU + +my $cutoff = 1; +GetOptions('cutoff=i' => \$cutoff); + +@ARGV == 2 || die $Usage; + +# Workout the input and output parameters +my $cm_in = shift @ARGV; +my $cm_out = shift @ARGV; + +open(I, "<$cm_in") || die "$0: Fail to open keywords file $cm_in\n"; +open(O, ">$cm_out") || die "$0: Fail to write confusion matrix $cm_out\n"; + +# Collect counts +my %ins; +my %del; +my %subs; +my %phone_count; +my $ins_count = 0; +my $del_count = 0; +while () { + chomp; + my @col = split(); + @col == 3 || die "$0: Bad line in confusion matrix file: $_\n"; + my ($p1, $p2, $count) = ($col[0], $col[1], $col[2]); + $count >= $cutoff || next; + if ($p1 eq "" && $p2 ne "") { + $ins{$p2} = $count; + $ins_count += $count; + } elsif ($p1 ne "" && $p2 eq "") { + $del{$p1} = $count; + $del_count += $count; + } elsif ($p1 ne "" && $p2 ne "") { + $p1 ne $p2 || next; # Skip same phone convert + $subs{"${p1}_$p2"} = $count; + if (defined($phone_count{$p1})) { + $phone_count{$p1} += $count; + } else { + $phone_count{$p1} = $count; + } + } +} + +# Compute negated log probability +foreach my $key (keys %ins) { + $ins{$key} = -log($ins{$key}/$ins_count); +} +foreach my $key (keys %del) { + $del{$key} = -log($del{$key}/$del_count); +} +foreach my $key (keys %subs) { + my @col = split(/_/, $key); + $subs{$key} = -log($subs{$key}/$phone_count{$col[0]}); +} + +# print results +my $output = ""; +foreach my $key (keys %ins) { + $output .= " $key $ins{$key}\n"; +} +foreach my $key (keys %del) { + $output .= "$key $del{$key}\n"; +} +foreach my $key (keys %subs) { + my @col = split(/_/, $key); + $output .= "$col[0] $col[1] $subs{$key}\n"; +} + +print O $output; + +close(I); +close(O); diff --git a/egs/babel/s5d/local/create_shadow_dataset.sh b/egs/babel/s5d/local/create_shadow_dataset.sh new file mode 100755 index 00000000000..49467ed28c1 --- /dev/null +++ b/egs/babel/s5d/local/create_shadow_dataset.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University +# Apache 2.0. + +stage=0 + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +[ -f /export/babel/data/software/env.sh ] && . /export/babel/data/software/env.sh + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: create_shadow_dataset.sh " + exit 1 +fi + +dest=$1 +src1=$2 +src2=$3 + +mkdir -p $dest/kws + +if [ $stage -le 0 ] ; then + utils/combine_data.sh $dest $src1 $src2 || exit 1 +fi + +if [ $stage -le 1 ] ; then + #zkombinovat ecf + echo "Combining ECF files..." + perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + use strict; + use warnings; + + + my $src1 = XMLin($ARGV[0]); + my $src2 = XMLin($ARGV[1]); + my $tgt={}; + my %filename_hash; + + my $expected_duration=0.0; + my $duration=0.0; + + if ( $src1->{language} ne $src2->{language} ) { + die "ECF languages differ in the source ecf.xml files" + } + $expected_duration=$src1->{source_signal_duration} + $src2->{source_signal_duration}; + + $tgt->{source_signal_duration} = $expected_duration; + $tgt->{language}=$src1->{language}; + $tgt->{version}="Generated automatically by the shadow_set.sh script"; + $tgt->{excerpt}= []; + + #print Dumper(\$src1); + foreach my $excerpt ( @{$src1->{excerpt}} ) { + push @{$tgt->{excerpt}}, $excerpt; + if ( exists $filename_hash{$excerpt->{audio_filename}} ) { + print STDERR "[WARN]: Duplicate filename $excerpt->{audio_filename} \n" + } else { + $duration += $excerpt->{dur} ; + $filename_hash{$excerpt->{audio_filename}} = $excerpt; + } + } + foreach my $excerpt ( @{$src2->{excerpt}} ) { + push @{$tgt->{excerpt}}, $excerpt; + if ( exists $filename_hash{$excerpt->{audio_filename}} ) { + print STDERR "[WARN]: Duplicate filename $excerpt->{audio_filename} \n" + } else { + $duration += $excerpt->{dur} ; + $filename_hash{$excerpt->{audio_filename}} = $excerpt; + } + } + $tgt->{source_signal_duration} = $duration; + + my $tgtxml = XMLout($tgt, RootName=>"ecf"); + print $tgtxml; + ' $src1/kws/ecf.xml $src2/kws/ecf.xml > $dest/kws/ecf.xml +fi + +if [ $stage -le 2 ] ; then + #zkombinovat kwlist + echo "Combining the KWLIST files" + perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + use strict; + use warnings; + + my $src1 = XMLin($ARGV[0], ForceArray => 1); + my $src2 = XMLin($ARGV[1], ForceArray => 1); + my $tgt={}; + my %kwid_hash; + + if ( $src1->{compareNormalize} ne $src2->{compareNormalize} ) { + die "KWLIST compareNormalize attributes differ in the source kwlist.xml files"; + } + if ( $src1->{language} ne $src2->{language} ) { + die "KWLIST languages differ in the source kwlist.xml files"; + } + + $tgt->{ecf_filename} = ""; + $tgt->{language}=$src1->{language}; + $tgt->{compareNormalize}=$src1->{compareNormalize}; + $tgt->{encoding}=$src1->{encoding}; + $tgt->{version}="1"; + $tgt->{kw}= []; + + + foreach my $kw ( @{$src1->{kw}} ) { + $kw->{kwid} = $kw->{kwid} . "-A"; + if ( exists $kwid_hash{$kw->{kwid}} ) { + print STDERR "[WARN]: Duplicate kwid $kw->{kwid}\n"; + } else { + $kwid_hash{$kw->{kwid}} = $kw; + } + push @{$tgt->{kw}}, $kw; + } + foreach my $kw ( @{$src2->{kw}} ) { + $kw->{kwid} = $kw->{kwid} . "-B"; + if ( exists $kwid_hash{$kw->{kwid}} ) { + print STDERR "[WARN]: Duplicate kwid $kw->{kwid}\n"; + } else { + $kwid_hash{$kw->{kwid}} = $kw; + } + push @{$tgt->{kw}}, $kw; + } + + my $tgtxml = XMLout($tgt, RootName=>"kwlist", KeyAttr=>""); + print $tgtxml; + ' $src1/kws/kwlist.xml $src2/kws/kwlist.xml > $dest/kws/kwlist.xml || exit 1 +fi + +if [ $stage -le 3 ] ; then + echo "Making KWLIST maps" + perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + use strict; + use warnings; + + my $src1 = XMLin($ARGV[0], ForceArray => 1); + open TGT_DEV, ">", $ARGV[1] or die $!; + open TGT_TST, ">", $ARGV[2] or die $!; + + foreach my $kw ( @{$src1->{kw}} ) { + if ( $kw->{kwid} =~ "KW.+-A\$" ) { + my $new_kw = $kw->{kwid}; + my $old_kw = substr $new_kw, 0, -2; + print TGT_DEV "$old_kw\t$new_kw\n"; + } elsif ( $kw->{kwid} =~ "KW.+-B\$" ) { + my $new_kw = $kw->{kwid}; + my $old_kw = substr $new_kw, 0, -2; + print TGT_TST "$old_kw\t$new_kw\n"; + } else { + die "Unsupported or unknown KW ID: $kw->{kwid}\n"; + } + } + ' $dest/kws/kwlist.xml $dest/kws/kws_map.dev.txt $dest/kws/kws_map.test.txt || exit 1 +fi + +exit 0 + diff --git a/egs/sprakbanken/s5/local/cstr_ndx2flist.pl b/egs/babel/s5d/local/cstr_ndx2flist.pl similarity index 99% rename from egs/sprakbanken/s5/local/cstr_ndx2flist.pl rename to egs/babel/s5d/local/cstr_ndx2flist.pl index d19db421a9f..79daa1a99db 100755 --- a/egs/sprakbanken/s5/local/cstr_ndx2flist.pl +++ b/egs/babel/s5d/local/cstr_ndx2flist.pl @@ -16,7 +16,7 @@ # limitations under the License. # This is modified from the script in standard Kaldi recipe to account -# for the way the WSJ data is structured on the Edinburgh systems. +# for the way the WSJ data is structured on the Edinburgh systems. # - Arnab Ghoshal, 12/1/12 # This program takes as its standard input an .ndx file from the WSJ corpus that looks @@ -25,7 +25,7 @@ #;; #;; Index for WSJ0 SI-short Sennheiser training data #;; Data is read WSJ sentences, Sennheiser mic. -#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts +#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts #;; per speaker TI) = 7236 utts #;; #11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 diff --git a/egs/babel/s5d/local/ctm2segments.pl b/egs/babel/s5d/local/ctm2segments.pl new file mode 100755 index 00000000000..55a8bd84fc8 --- /dev/null +++ b/egs/babel/s5d/local/ctm2segments.pl @@ -0,0 +1,159 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; + +my $cf_needed = 0.9; +my $cf_needed_upper = 1; +my $extend_segments = 0.0 ; + +my $Usage = < + +Allowed options: + --min-cf : Minimum CF to include the word (float, default = 0.9) + --max-cf : Maximum CF to include the word (float, default = 1.0) + --extend-segments : Add this delta to the boundaries of the segments (float, default = 0.0) +EOU + +GetOptions('min-cf=f' => \$cf_needed, + 'max-cf=f' => \$cf_needed_upper, + 'extend-segments=f' => \$extend_segments, + ); + + +# Get parameters +my $filein = shift @ARGV; +my $dirout = shift @ARGV; + + +my @segments; +my @utterances; +my @text; + +my $words = ""; +my $seg_end = -1; +my $seg_start = -1; +my $filename; + +my $total_seconds=0; +my $extracted_seconds=0; +open(FILEIN, $filein); +while (my $line= ) { + chop $line; + my @entries = split(/ /, $line); + die "Cannot parse line \"$line\"" if scalar @entries != 6; + + ($filename, my $chann_id, my $beg, my $end, my $word, my $conf) = @entries; + + $total_seconds += $end * 1.0; + + if ($conf >= $cf_needed ) { + if ( $words ne "" ) { + #print "Extend segment\n"; + $words .= " $word"; + $seg_end = $beg * 1.0 + $end*1.0; + } else { + #start a new segment + #print "Start segment\n"; + $seg_start = $beg; + $seg_end = $beg * 1.0 + $end*1.0; + $words = $word; + } + } else { + #flush the segment + if ( $words ) { + my @filename_parts = split(/_/, $filename); + my $channel="C"; + if ($filename_parts[6] eq "inLine" ) { + $channel="A"; + } elsif ($filename_parts[6] eq "outLine" ) { + $channel="B"; + } + + $extracted_seconds+= ($seg_end - $seg_start); + $seg_start -= $extend_segments; + $seg_end += $extend_segments; + + my $spk_id=$filename_parts[3] . "_" . $channel; + my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); + my $last_part = sprintf("%06d", $seg_start * 100); + $utt_id .= "_" . $last_part; + #print $utt_id . " $beg \n"; + + #14350_A_20121123_042710_001337 + + #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 + my $segment = "$utt_id $filename $seg_start $seg_end"; + #14350_A_20121123_042710_001337 14350_A + my $utt2spk = "$utt_id $spk_id"; + #10901_A_20121128_230024_000227 hayi Lovemore + my $text = "$utt_id $words"; + push @segments, $segment; + push @utterances, $utt2spk; + push @text, $text; + $words = ""; + } + + } +} +if ( $words ) { + #print "Flush.\n"; + my @filename_parts = split(/_/, $filename); + my $channel="C"; + if ($filename_parts[6] eq "inLine" ) { + $channel="A"; + } elsif ($filename_parts[6] eq "outLine" ) { + $channel="B"; + } + + $extracted_seconds+= ($seg_end - $seg_start); + $seg_start -= $extend_segments; + $seg_end += $extend_segments; + + my $spk_id=$filename_parts[3] . "_" . $channel; + my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); + my $last_part = sprintf("%06d", $seg_start * 100); + $utt_id .= "_" . $last_part; + #print $utt_id . " $beg \n"; + + #14350_A_20121123_042710_001337 + + #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 + my $segment = "$utt_id $filename $seg_start $seg_end"; + #14350_A_20121123_042710_001337 14350_A + my $utt2spk = "$utt_id $spk_id"; + #10901_A_20121128_230024_000227 hayi Lovemore + my $text = "$utt_id $words"; + push @segments, $segment; + push @utterances, $utt2spk; + push @text, $text; + $words = ""; +} + +open(SEGMENTS, "> $dirout/segments"); +foreach my $line (@segments) { + print SEGMENTS "$line\n"; +} +close(SEGMENTS); + +open(TEXT, "> $dirout/text"); +foreach my $line (@text) { + print TEXT "$line\n"; +} +close(TEXT); + +open(UTT, "> $dirout/utt2spk"); +foreach my $line (@utterances) { + print UTT "$line\n"; +} +close(UTT); + +my $total_hours=sprintf("%.2f", $total_seconds/3600); +my $extracted_hours=sprintf("%.2f", $extracted_seconds/3600); +my $s_ex_secs=sprintf("%d", $extracted_seconds); + +print "Fragments extracted: $s_ex_secs seconds ($extracted_hours hours) out of $total_hours hours\n"; + diff --git a/egs/babel/s5d/local/datasets/basic_kws.sh b/egs/babel/s5d/local/datasets/basic_kws.sh new file mode 100644 index 00000000000..cff34eba69c --- /dev/null +++ b/egs/babel/s5d/local/datasets/basic_kws.sh @@ -0,0 +1,28 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +if [ "${dataset_kind}" == "supervised" ] ; then + mandatory_variables="my_ecf_file my_kwlists my_rttm_file" + optional_variables="my_subset_ecf" +else + mandatory_variables="my_ecf_file my_kwlists" + optional_variables="my_subset_ecf" +fi + +check_variables_are_set + +if [ ! -f ${dataset_dir}/kws/.done ] ; then + kws_flags=( --use-icu true ) + if [ "${dataset_kind}" == "supervised" ] || [ !-z "$my_rttm_file" ] ; then + kws_flags+=(--rttm-file $my_rttm_file ) + fi + if $my_subset_ecf ; then + kws_flags+=(--subset-ecf $my_data_list) + fi + local/kws_setup.sh --case_insensitive $case_insensitive \ + "${kws_flags[@]}" "${icu_opt[@]}" \ + $my_ecf_file $my_kwlist_file $lang ${dataset_dir} || exit 1 + touch ${dataset_dir}/kws/.done +fi diff --git a/egs/babel/s5d/local/datasets/extra_kws.sh b/egs/babel/s5d/local/datasets/extra_kws.sh new file mode 100644 index 00000000000..d00eab1b06f --- /dev/null +++ b/egs/babel/s5d/local/datasets/extra_kws.sh @@ -0,0 +1,137 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +if [ "${dataset_kind}" == "supervised" ] ; then + mandatory_variables="my_ecf_file my_kwlists my_rttm_file" + optional_variables="my_subset_ecf" +else + mandatory_variables="my_ecf_file my_kwlists" + optional_variables="my_subset_ecf" +fi + +check_variables_are_set + +function register_extraid { + local dataset_dir=$1 + local extraid=$2 + echo "Registering $extraid" + echo $extraid >> $dataset_dir/extra_kws_tasks; + sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks +} + +function setup_oov_search { + local phone_cutoff=0 + + local g2p_nbest=10 + local g2p_mass=0.95 + + + local data_dir=$1 + local source_dir=$2 + local extraid=$3 + + local kwsdatadir=$data_dir/${extraid}_kws + + mkdir -p $kwsdatadir + + for file in $source_dir/rttm ; do + [ -f $file ] && cp -f $file $kwsdatadir + done + + for file in $source_dir/utter_* $source_dir/kwlist*.xml $source_dir/ecf.xml ; do + cp -f $file $kwsdatadir + done + + kwlist=$source_dir/kwlist_outvocab.xml + #Get the KW list + paste \ + <(cat $kwlist | grep -o -P "(?<=kwid=\").*(?=\")") \ + <(cat $kwlist | grep -o -P "(?<=).*(?=)" | uconv -f utf-8 -t utf-8 -x Any-Lower) \ + >$kwsdatadir/keywords.txt + cut -f 2 $kwsdatadir/keywords.txt | \ + sed 's/\s\s*/\n/g' | sort -u > $kwsdatadir/oov.txt + + + #Generate the confusion matrix + #NB, this has to be done only once, as it is training corpora dependent, + #instead of search collection dependent + if [ ! -f exp/conf_matrix/.done ] ; then + local/generate_confusion_matrix.sh --cmd "$decode_cmd" --nj $my_nj \ + exp/sgmm5_denlats/dengraph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix || return 1 + touch exp/conf_matrix/.done + fi + confusion=exp/conf_matrix/confusions.txt + + if [ ! -f exp/g2p/.done ] ; then + if [ -f data/.extlex ]; then + local/train_g2p.sh data/local/lexicon_orig.txt exp/g2p || return 1; + else + local/train_g2p.sh data/local/lexicon.txt exp/g2p || return 1; + fi + touch exp/g2p/.done + fi + local/apply_g2p.sh --nj $my_nj --cmd "$decode_cmd" \ + --var-counts $g2p_nbest --var-mass $g2p_mass \ + $kwsdatadir/oov.txt exp/g2p $kwsdatadir/g2p || return 1 + L2_lex=$kwsdatadir/g2p/lexicon.lex + + if [ -z "$L1_lex" ] ; then + L1_lex=data/local/lexiconp.txt + fi + + local/kws_data_prep_proxy.sh \ + --cmd "$decode_cmd" --nj $my_nj \ + --case-insensitive true \ + --confusion-matrix $confusion \ + --phone-cutoff $phone_cutoff \ + --pron-probs true --beam $proxy_beam --nbest $proxy_nbest \ + --phone-beam $proxy_phone_beam --phone-nbest $proxy_phone_nbest \ + $lang $data_dir $L1_lex $L2_lex $kwsdatadir + +} + + +kws_flags=( --use-icu true ) +if [ "${dataset_kind}" == "supervised" ] || [ ! -z "$my_rttm_file" ]; then + #The presence of the file had been already verified, so just + #add the correct switches + kws_flags+=(--rttm-file $my_rttm_file ) +fi +if $my_subset_ecf ; then + kws_flags+=(--subset-ecf $my_data_list) +fi + +if [ ${#my_kwlists[@]} -ne 0 ] ; then + + touch $dataset_dir/extra_kws_tasks + + for extraid in "${!my_kwlists[@]}" ; do + #The next line will help us in running only one. We don't really + #know in which directory the KWS setup will reside in, so we will + #place the .done file directly into the data directory + [ -f $dataset_dir/.done.kws.$extraid ] && continue; + kwlist=${my_kwlists[$extraid]} + + local/kws_setup.sh --extraid $extraid --case_insensitive $case_insensitive \ + "${kws_flags[@]}" "${icu_opt[@]}" \ + $my_ecf_file $kwlist $lang ${dataset_dir} || exit 1 + + #Register the dataset for default running... + #We can do it without any problem here -- the kws_stt_tasks will not + #run it, unless called with --run-extra-tasks true switch + register_extraid $dataset_dir $extraid + touch $dataset_dir/.done.kws.$extraid + done + for extraid in "${!my_kwlists[@]}" ; do + #The next line will help us in running only one. We don't really + #know in which directory the KWS setup will reside in, so we will + #place the .done file directly into the data directory + [ -f $dataset_dir/.done.kws.${extraid}_oov ] && continue; + setup_oov_search $dataset_dir $dataset_dir/${extraid}_kws ${extraid}_oov || exit 1 + register_extraid $dataset_dir ${extraid}_oov + touch $dataset_dir/.done.kws.${extraid}_oov + done +fi + diff --git a/egs/babel/s5d/local/datasets/supervised_pem.sh b/egs/babel/s5d/local/datasets/supervised_pem.sh new file mode 100644 index 00000000000..e131fae40fa --- /dev/null +++ b/egs/babel/s5d/local/datasets/supervised_pem.sh @@ -0,0 +1,35 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. +if [ "${dataset_type}" != "supervised" ] ; then + mandatory_variables="my_data_dir my_data_list my_nj " + optional_variables="" +else + mandatory_variables="my_data_dir my_data_list my_nj " + optional_variables="my_stm_file " +fi + +check_variables_are_set + + +if [[ ! -f ${dataset_dir}/wav.scp || ${dataset_dir}/wav.scp -ot "$my_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} data lists in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + mkdir -p ${dataset_dir} + local/prepare_acoustic_training_data.pl --fragmentMarkers \-\*\~ \ + $my_data_dir ${dataset_dir} > ${dataset_dir}/skipped_utts.log || exit 1 +fi + +if [ "$dataset_kind" == "supervised" ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + else + local/prepare_stm.pl --fragmentMarkers \-\*\~ ${dataset_dir} + fi +fi + diff --git a/egs/babel/s5d/local/datasets/supervised_seg.sh b/egs/babel/s5d/local/datasets/supervised_seg.sh new file mode 100644 index 00000000000..45cc7f28593 --- /dev/null +++ b/egs/babel/s5d/local/datasets/supervised_seg.sh @@ -0,0 +1,90 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. +if [ ${dataset_type} != "supervised" ] ; then + mandatory_variables="my_data_dir my_data_list my_nj" + optional_variables="" +else + mandatory_variables="my_data_dir my_data_list my_nj" + optional_variables="my_stm_file" +fi + +check_variables_are_set + +segmentation_opts="--isolated-resegmentation \ + --min-inter-utt-silence-length 1.0 \ + --silence-proportion 0.05 " + +workdir=exp/make_seg/${dataset_id} +unseg_dir=$workdir +mkdir -p $unseg_dir +# 4. Create the wav.scp file: +sph2pipe=`which sph2pipe || which $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe` +if [ $? -ne 0 ] ; then + echo "Could not find sph2pipe binary. Add it to PATH" + exit 1; +fi +sox=`which sox` +if [ $? -ne 0 ] ; then + echo "Could not find sox binary. Add it to PATH" + exit 1; +fi + +echo "Creating the $unseg_dir/wav.scp file" +audiodir=$my_data_dir/audio +for file in `cat $my_data_list | sort -u` ; do + if [ -f $audiodir/$file.sph ] ; then + echo "$file $sph2pipe -f wav -p -c 1 $audiodir/$file.sph |" + elif [ -f $audiodir/$file.wav ] ; then + echo "$file $sox $audiodir/$file.wav -r 8000 -c 1 -b 16 -t wav - downsample |" + else + echo "Audio file $audiodir/$file.(sph|wav) does not exist!" >&2 + exit 1 + fi +done | sort -u > $unseg_dir/wav.scp + +l1=`cat $unseg_dir/wav.scp | wc -l ` +l2=`cat $my_data_list | wc -l ` +if [ "$l1" -ne "$l2" ] ; then + echo "wav.scp number of files: $l1" + echo "filelist number of files: $l2" + echo "Not all files from the list $my_data_list found their way into wav.scp" + exit 1 +fi + +echo "Creating the $unseg_dir/reco2file_and_channel file" +cat $unseg_dir/wav.scp | awk '{print $1, $1, "A";}' > $unseg_dir/reco2file_and_channel +cat $unseg_dir/wav.scp | awk '{print $1, $1;}' > $unseg_dir/utt2spk +utils/utt2spk_to_spk2utt.pl $unseg_dir/utt2spk > $unseg_dir/spk2utt + +make_plp $unseg_dir $workdir/make_plp $workdir/plp || exit 1 + +local/resegment/generate_segments.sh --nj $my_nj --cmd "$decode_cmd" \ + --noise_oov false --segmentation_opts "$segmentation_opts" \ + $unseg_dir data/lang exp/tri4b_seg \ + $workdir $dataset_dir || exit 1 + +num_hours=`cat ${dataset_dir}/segments | \ + awk '{secs+= $4-$3;} END{print(secs/3600);}'` + +echo "Number of hours of the newly segmented data: $num_hours" + +if [ "$dataset_kind" == "supervised" ]; then + echo --------------------------------------------------------------------- + echo "preparing ${dataset_id} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + else + local/prepare_stm.pl --fragmentmarkers \-\*\~ ${dataset_dir} + fi +else + echo --------------------------------------------------------------------- + echo "preparing ${dataset_id} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + fi +fi + diff --git a/egs/babel/s5d/local/datasets/supervised_uem.sh b/egs/babel/s5d/local/datasets/supervised_uem.sh new file mode 100644 index 00000000000..5ac1e003d5d --- /dev/null +++ b/egs/babel/s5d/local/datasets/supervised_uem.sh @@ -0,0 +1,36 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +eval my_data_cmudb=\$${dataset_type}_data_cmudb + +if [ "${dataset_kind}" != "supervised" ] ; then + mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" + optional_variables="" +else + mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" + optional_variables="my_stm_file" +fi + +check_variables_are_set + +if [[ ! -f ${dataset_dir}/wav.scp || ${dataset_dir}/wav.scp -ot "$my_data_cmudb" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} data lists in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + mkdir -p ${dataset_dir} + local/cmu_uem2kaldi_dir.sh --filelist $my_data_list \ + $my_data_cmudb $my_data_dir ${dataset_dir} +fi + +if [ "$dataset_kind" == "supervised" ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + else + local/prepare_stm.pl --fragmentMarkers \-\*\~ ${dataset_dir} + fi +fi diff --git a/egs/babel/s5d/local/datasets/unsupervised_seg.sh b/egs/babel/s5d/local/datasets/unsupervised_seg.sh new file mode 120000 index 00000000000..9e2e12b5bad --- /dev/null +++ b/egs/babel/s5d/local/datasets/unsupervised_seg.sh @@ -0,0 +1 @@ +supervised_seg.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/datasets/unsupervised_uem.sh b/egs/babel/s5d/local/datasets/unsupervised_uem.sh new file mode 120000 index 00000000000..81440969d5c --- /dev/null +++ b/egs/babel/s5d/local/datasets/unsupervised_uem.sh @@ -0,0 +1 @@ +supervised_uem.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/datasets/vocab_kws.sh b/egs/babel/s5d/local/datasets/vocab_kws.sh new file mode 100644 index 00000000000..d161fc77b67 --- /dev/null +++ b/egs/babel/s5d/local/datasets/vocab_kws.sh @@ -0,0 +1,51 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +if [ "${dataset_kind}" == "supervised" ] ; then + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + optional_variables="my_subset_ecf" +else + mandatory_variables="my_ecf_file my_kwlist_file" + optional_variables="my_subset_ecf" +fi + +check_variables_are_set + +if [ "$dataset_kind" == "shadow" ]; then + true #we do not support multiple kw lists for shadow set system + +elif [ ! -f $dataset_dir/.done.kws.fullvocab ] ; then + #a This will work for both supervised and unsupervised dataset kinds + kws_flags=() + if [ "$dataset_kind" == "supervised" ] || [ ! -z "$my_rttm_file" ] ; then + kws_flags+=(--rttm-file $my_rttm_file ) + fi + if $my_subset_ecf ; then + kws_flags+=(--subset-ecf $my_data_list) + fi + + #We just could come with some bogus naming scheme, + #but as long as the audio files can tell the iarpa lang id, we will use that + langid=`ls -1 $my_data_dir/audio/ | head -n 1| cut -d '_' -f 3` + + #NB: we assume the default KWS search is already done and will "borrow" + #the rttm and ecf files. + #We could easily generate the ecf file, but the RTTM assumes the decoding + #had been already done. That could be done + #Ideally, these files should be generated here! + + local/kws_setup.sh --kwlist-wordlist true "${kws_flags[@]}" \ + --extraid fullvocab $my_ecf_file \ + <(cat $lang/words.txt | \ + grep -v "^<" | grep -v "^#" | \ + awk "{printf \"KWID$langid-FULLVOCAB-%05d %s\\n\", \$2, \$1 }" ) \ + $lang ${dataset_dir} || exit 1 + + echo fullvocab >> $dataset_dir/extra_kws_tasks; + sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks + touch $dataset_dir/.done.kws.fullvocab +fi + + diff --git a/egs/babel/s5d/local/decode_helper.sh b/egs/babel/s5d/local/decode_helper.sh new file mode 100755 index 00000000000..d2bed774c68 --- /dev/null +++ b/egs/babel/s5d/local/decode_helper.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +. ./cmd.sh + +TYPE=$1 +LANGDIR=$2 +MODELDIR=$3 +DEVDIR=$4 +TRANSFORMDIR=$5 + +echo "$@" + +if [ "$1" == "SI" ]; then + utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 + steps/decode.sh --nj 20 --cmd "$decode_cmd" \ + $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 +elif [ "$1" == "FMLLR" ]; then + utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 + steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ + $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 +elif [ "$1" == "SGMM" ]; then + utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 + + steps/decode_sgmm.sh --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR \ + $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1; + + steps/decode_sgmm.sh --use-fmllr true --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR\ + $MODELDIR/graph $DEVDIR $MODELDIR/decode_fmllr || exit 1; + +fi + + diff --git a/egs/babel/s5d/local/eval_kw_subsets.sh b/egs/babel/s5d/local/eval_kw_subsets.sh new file mode 100755 index 00000000000..8a67225da52 --- /dev/null +++ b/egs/babel/s5d/local/eval_kw_subsets.sh @@ -0,0 +1,4 @@ +KWSEval -e ecf.xml -r rttm -t keyword_outvocab.xml -s kwslist.xml -c -o -b -d -f ./kws/outvocab +KWSEval -e ecf.xml -r rttm -t keyword_invocab.xml -s kwslist.xml -c -o -b -d -f ./kws/invocab +KWSEval -e ecf.xml -r rttm -t kws.xml -s kwslist.xml -c -o -b -d -f ./kws/fullvocab + diff --git a/egs/babel/s5d/local/extend_lexicon.sh b/egs/babel/s5d/local/extend_lexicon.sh new file mode 100755 index 00000000000..41b244f110b --- /dev/null +++ b/egs/babel/s5d/local/extend_lexicon.sh @@ -0,0 +1,561 @@ +#!/bin/bash + +# Copyright 2014 Johns Hopkins University (authors: Daniel Povey, Yenda Trmal) +# 2014 Guoguo Chen +# 2015 MIT Lincoln Labs (author: Fred Richardson) +# Apache 2.0. + +# This script takes an input lexicon (e.g. lexicon.txt) and generates likely +# out of vocabulary words from it, with their associated spellings. It outputs +# two files: lexiconp.txt (this is the lexicon format that has pronunciation +# probabilities; the words in the original lexicon have probability one), and +# oov2prob, which says how the OOV mass is distributed among the new OOV words +# in the lexicon. + +# It assumes that the syllables in pronunciations in the input lexicon.txt are +# separated by tabs, as is normal for the BABEL setup; the syllable boundaries +# are necessary for the method that this script uses. + +# We use SRILM to train an lm (lm.gz) by treating the sequence of syllables in a +# pronunciation like the sequence of words in a sentence; we use a 3-gram +# Kneser-Ney smoothed model, as this seemed to work best. We then generate +# "sentences" (really, pronunciations) from this LM using the "ngram" command +# from SRILM with the "-gen" option. We do this in parallel, and also use SRILM +# to compute the probabilities of these "sentences". Then the "--num-prons" +# most likely generated pronunciations are selected (by default: one million). + +# Next, we use the g2p tool from "Sequitur" to learn a mapping from +# pronuciations of words to their spellings. This is the opposite of the normal +# direction of prediction, so we refer to the models as "p2g". To do this, we +# give g2p a reversed version of the input lexicon, so while the input lexicon +# might have entries like +# Hi h ay +# the reversed lexicon would have entries like +# hay H i +# We were concerned that depending on the way the phones are represented as +# letters, there might be a lot of ambiguity introduced when we get rid of the +# spaces (e.g. does "hay" come from h+ay, or h+a+y?), and that this might hurt +# the accuracy of the g2p prediction. We did not want to introduce a separator +# because we felt that this would make the mapping harder for g2p to learn. +# Instead we mapped the phones to unique letters; this is what the "phone_map" +# file is about. Furthermore, in BABEL we have the concept of tags on the +# phones, e.g. in a tonal language, ay_3 might be the phone "ay" with tone 3. +# As far as Kaldi is concerned, ay_3 is a single phone. To avoid the number of +# letters blowing up too much, we make these tags separate letters when generating +# phone_map, so ay_3 might be mapped to kX with ay mapping to k and 3 mapping to +# X. To avoid ambiguity being introduced, we ensure that the alphabets for the +# phones and the tags are distinct (and in general, we allow multiple tags, with +# the tags in different positions having distinct alphabets). + +# Once we have our g2p models trained (and the g2p training is the most time +# consuming aspect of this script), we apply g2p to all of our generated +# pronunciations to give us likely spelling variants. The number of +# alternatives is controlled by the options --var-mass (default: 0.8, meaning we +# generate 0.8 of the entire probability mass), and --var-counts (default: 3, +# meaning we generate at most 3 alternative spellings per pronunciation). We +# take the probabilities of the OOVs (as assigned by the syllable-level LM) and +# multiply them by the spelling probabilities assigned by g2p, to give us the +# probability of the (pronunciation, word) pair. From these pairs we strip out +# those with words (spellings) that were in the original lexicon, and those with +# pronunciations shorter than a specified minimum --min-phones (default: 3). We +# then limit the total number of pairs to --num-prons (default: one million) and +# scale us the probabilities of the pairs pairs so that they sum to one overall. + +# We format this information as two pieces: a lexicon with probabilities +# (lexiconp.txt) and a file that gives us the probability of each OOV word +# (oov2prob). The probabilities in lexiconp.txt are normalized so that the most +# probable pronunciation of each word is 1; the probabilities in oov2prob are +# normalized such that if we multiply by the pronunciation probability in +# lexiconp.txt, we would get the probability we assigned to that (pronunciation, +# word) pair. + +# These outputs are used as follows: lexiconp.txt will be used by +# utils/prepare_lang.sh to generate L.fst and L_disambig.fst in the lang/ +# directory, so the lexicon FSTs and words.txt will include the generated OOVs. +# oov2prob will be used when generating the grammar transducer G.fst by +# local/arpa2G.sh. For example, if you call arpa2G.sh with the options +# --oov-prob-file some/dir/oov2prob --unk-fraction 0.33, it will put all the OOVs +# listed in some/dir/oov2prob as if they were unigrams in G.fst, with probability +# equal to 0.33 times the probability listed in oov2prob. However, that script +# will not allow the unigram probability of any OOV word to be more probable than +# the least probable word which was originally in the ARPA file (not counting , +# which generally has probability -99); this is applied as a ceiling on the +# unknown-word probabilities. Note: the --unk-fraction should probably be +# similar to the OOV rate in that language. Calculating the OOV rate on some +# dev data is one reasonable way to set this; see the commands at the very +# bottom of this file for an example of how we can compute the OOV rate. +# (Arguably, one should give an even higher fraction than this, because given the +# unigram state, the probability of seeing an unknown word is higher). +# It might seem appropriate to use as "unk-fraction" the probability of +# the unknown word ( or ) in the LM itself. However, this depends +# how the LM was estimated; I think in the BABEL setup, appears as +# an actual word in the transcripts, and the probability that the LM assigns +# to it seems to be lower than appropriate. + +stage=-5 +g2p_iters=5 +num_prons=1000000 # number of prons to generate. +num_sent_gen=12000000 # number of sents to generate. this should + # exceed num_prons by a factor of at least + # several. +nj=40 # number of jobs to use for generation. +encoding='utf-8' # option for g2p; leave this as it is. +# the following two options are used in g2p generation. +var_counts=3 #Generate up to N variants in g2p +var_mass=0.8 #Generate enough variants to produce 80 % of the prob mass +min_phones=3 # minimum number of phones we allow in generated words + # (very short generated words could contribute to graph blowup, + # and might hurt the decoding accuracy also). +skip_done=false # if true, allows us to skip over done g2p stages. +cmd=run.pl +cleanup=true + +echo "$0 $@" # Print the command line for logging + +. utils/parse_options.sh +. path.sh + +if [ $# -ne 2 ] && [ $# -ne 3 ]; then + echo "$0: usage: extend_lexicon.sh [options] [dev_text]" + echo " e.g.: $0 data/local/lexicon_orig.txt data/local/extend/" + echo "Will create in the files lexiconp.txt and oov2prob" + echo "where lexiconp.txt is an extended lexicon with pronunciation" + echo "probabilities, and oov2prob has lines which divide" + echo "the OOV probability mass among the introduced OOV words." + echo "Important options:" + echo " --cmd # how to run jobs, default run.pl" + echo " --num-prons # how many prons to generate, default 1000000" + exit 1; +fi + + +input_lexicon=$1 +toplevel_dir=$2 # e.g. data/local/extend +dev_text= +if [ $# -eq 3 ]; then + dev_text=$3 +fi + +dir=$2/tmp # most of our work happens in this "tmp" directory. + +mkdir -p $dir + +if [ ! -s $input_lexicon ]; then + echo "$0: expected input lexicon $input_lexicon to exist"; +fi + +cp $input_lexicon $toplevel_dir/input_lexicon.txt # just to have a record of what we started with. + +loc=`which ngram-count`; +if [ -z $loc ]; then + echo You appear to not have SRILM tools installed, either on your path, + echo or installed in $sdir. See tools/install_srilm.sh for installation + echo instructions. + exit 1 +fi + + +if ! which g2p.py >&/dev/null; then + if [ ! -d $KALDI_ROOT/tools/sequitur ]; then + echo "Sequitur was not found !" + echo "Go to $KALDI/tools and execute extras/install_sequitur.sh" + else + echo "Problems running sequitur. Check that your path.sh is putting it on the path." + echo "e.g. that it is sourcing KALDI_ROOT/tools/env.sh and that that env.sh file exists" + fi + exit 1; +fi + +if ! which g2p.py >/dev/null ; then + exit 1 +fi + + +if [ $stage -le -5 ]; then + # Map the phones to a more unambiguous representation so that when we + # concatenate the letters of them, we won't lose information. This will + # also make g2p's life easier because each phone goes to a single letter, + # which g2p will treat as a single symbol (remember, g2p is designed + # to produce graphemes, so the tokens it produces are letters). + + cat $toplevel_dir/input_lexicon.txt | \ + awk '{for(n=2;n<=NF;n++) seen[$n]=1;} END{for (key in seen) print key;}' >$dir/phonelist + + cat $dir/phonelist | perl -e ' @ids = ("a".."z", "A".."Z", "0".."9", ":", "=", "?", "@", "[", "]", "^", "+", "\$", "%", "&", "#", "*", "!", "(", ")", "{", "}" ); + @map = (); while(<>) { + chomp; $output = "$_ "; + @col = split("_"); + # Loop over different positions. + for ($p = 0; $p < @col; $p++) { + # New position that has not been assigned a hash. + if (@map <= $p) { push(@map, {}); } + # Assign map for each position. + if (!defined($map[$p]->{$col[$p]})) { + if (@ids == 0) { # We have used all the ids... die here. + die "Used up all the un-mapped ids, cannot continue\n"; + } + $map[$p]->{$col[$p]} = shift @ids; + } + $output .= "$map[$p]->{$col[$p]}"; + } + print "$output\n"; }' > $dir/phone_map + cat $dir/phone_map | awk '{print $2, $1}' > $dir/phone_map.reverse + + cat $toplevel_dir/input_lexicon.txt | \ + local/apply_map_tab_preserving.pl -f 2- $dir/phone_map > $dir/lexicon_in.txt +fi + + +if [ $stage -le -4 ]; then + cat $dir/lexicon_in.txt | perl -ane 'if (! m/^\<\S+\>\s/) { print; } ' > $dir/lexicon_in_nosil.txt + + cat $dir/lexicon_in.txt | perl -ane 's/^(\S+\s+)/${1}1.0\t/;print;' > $dir/lexiconp_in.txt +fi + + + + +if [ $stage -le -3 ]; then + # Each syllable will be given a "word" representation; we join the phones using comma "," + perl -e 'while() { s/^\S+\s*//; s/ /,/g; print }' <$dir/lexicon_in_nosil.txt >$dir/syllable_text.txt + + echo "$0: using SRILM to train syllable LM" + + ngram-count -lm $dir/3gram.me.gz -maxent -maxent-convert-to-arpa -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $dir/syllable_text.txt -sort + rm $dir/lm.gz 2>/dev/null + ln -s 3gram.me.gz $dir/lm.gz +fi + + +ngram=$(which ngram) + +if [ $stage -le -2 ]; then + mkdir -p $dir/log + echo "$0: generating words from the syllable LM" + + per_job_num_sent_gen=$[$num_sent_gen/$nj] + + $cmd JOB=1:$nj $dir/log/gen.JOB.log \ + $ngram -lm $dir/lm.gz -gen $per_job_num_sent_gen -seed JOB \| \ + sort -u \> $dir/sents.JOB || exit 1; +fi + +if [ $stage -le -1 ]; then + echo "$0: computing probs for the generated sentences" + rm $dir/probs.* 2>/dev/null + + echo '#!/usr/bin/perl +while(1) { + $sent = <>; $line=<>; if ($line !~ m/sentences/) { $sent =~ m/^file/ || die "Bad sent $sent"; exit(0); } + $line = <>; if ($line !~ m/logprob= (\S+)/) { die "Bad line $line"; } print "$1 $sent"; + $line = <>; $line eq "\n" || die "expected blank line"; }' >$dir/temp.pl + chmod +x $dir/temp.pl + + $cmd JOB=1:$nj $dir/log/compute_prob.JOB.log \ + $ngram -debug 1 -lm $dir/lm.gz -ppl $dir/sents.JOB \| $dir/temp.pl \| sort -gr \> $dir/probs.JOB || exit 1; + + if $cleanup; then + rm $dir/sents.*; + fi + sort -m -gr $dir/probs.* > $dir/probs.all + uniq $dir/probs.all | head -n $num_prons > $dir/probs || true + if $cleanup; then + rm $dir/probs.*; + fi + + mass=$(cat $dir/probs | awk '{x += exp($1 * log(10));} END{print x}') + + echo "$0: total probability mass in generated words is $mass" + echo " this should ideally be close to 1 (although we lose a little due to the" + echo " empty sentence). You can get closer by increasing --num-sent-gen and/or" + echo " --nj" + + nl=$(cat $dir/probs | wc -l) + if [ $nl -lt $num_prons ]; then + echo "$0: Number of generated lines $nl is less than number of requested words $num_prons:" + echo " please run with larger --nj, currently $nj " + exit 1; + fi +fi + + +# Next we train a reverse g2p, which is really p2g. Suppose a line in the lexicon is +# sugar s uh g ax r +# The basic idea is that we'd transform it to the following in reverse_lex.sh +# suhgaxr s u g a r +# We may lose a little information by doing this, though, because the segmentation +# into phonemes may be ambiguous. So we create a mapping from the original phonemes +# and tags to letters of the alphabet. Note: tags are things like s_3 for a phone: here +# s is the phone and _3 is the tag. + + +if [ $stage -le 0 ]; then + cat $dir/lexicon_in_nosil.txt | perl -ane ' + use Encode qw(decode encode); + @A = split; $w = shift @A; + $w = Encode::decode("'$encoding'", $w); + $w = join(" ", split("", $w)); + $w = Encode::encode("'$encoding'", $w); + print join("", @A) . "\t" . $w . "\n";' > $dir/lexicon_reverse.txt + + echo "$0: Training the G2P model (iter 0)" + if ! $skip_done || [ ! -f $dir/p2g.model.0 ]; then + $cmd $dir/log/g2p.0.log \ + g2p.py -S --encoding $encoding --train $dir/lexicon_reverse.txt --devel 5% --write-model $dir/p2g.model.0 || exit 1; + else + echo "$0: $dir/p2g.model.0 already exists: skipping it since --skip-done is true" + fi +fi + +for i in `seq 0 $(($g2p_iters-2))`; do + if [ $stage -le $[i+1] ]; then + if ! $skip_done || [ ! -f $dir/p2g.model.$[$i+1] ]; then + echo "$0: Training the G2P model (iter $[$i + 1] )" + $cmd $dir/log/g2p.$[$i+1].log \ + g2p.py -S --encoding $encoding --model $dir/p2g.model.$i --ramp-up \ + --train $dir/lexicon_reverse.txt --devel 5% \ + --write-model $dir/p2g.model.$(($i+1)) + else + ii=$[$i+1]; + echo "$0: $dir/p2g.model.$ii already exists: skipping it since --skip-done is true" + fi + fi + rm -f $dir/p2g.model.final + ln -s p2g.model.$(($i+1)) $dir/p2g.model.final +done + + + +if [ $stage -le $g2p_iters ]; then + # get the word-list to apply g2p to; each one is just a sequence + # of phones, formed by appending the syllables in the "generated sentences" + # (really generated syllable-sequences) in $dir/probs, and removing the + # separator. + + cat $dir/probs | head -n $num_prons | awk '{$1=""; print $0}' | \ + sed "s/,//g;s/ //g;" | sort | uniq > $dir/fake_word_list.txt + + echo "$0: Applying the G2P model to wordlist $wordlist" + + $cmd JOB=1:$nj $dir/log/apply_p2g.JOB.log \ + split -n l/JOB/$nj $dir/fake_word_list.txt \| \ + g2p.py -V $var_mass --variants-number $var_counts --encoding $encoding \ + --model $dir/p2g.model.final --apply - \ + \> $dir/p2g_output.JOB || exit 1; + perl -wlne 'use strict; + our %P; + my ($prn,$num,$prb,$spl)=m/^(\S+)\s+(\S+)\s+(\S+)\s+(.*)$/; + my $tok=$prn."=".$spl; + $P{$tok} = [ $num, $prb ] unless (defined($P{$tok}) && $P{$tok}[1] < $prb); + END { + map{ my ($prn,$spl)=m/^(.*)=(.*)$/; + my ($num, $prb) = @{$P{$tok}}; + print join("\t",$prn,$num,$prb,$spl) + } sort keys %P + }' $dir/p2g_output.* > $dir/p2g_output + rm $dir/p2g_output.* +fi + +if [ $stage -le $[$g2p_iters+1] ]; then + + # the NF >= 4 is about pruning out any empty spellings, that would + # produce an empty word. + # pron2spelling contains lines like ak>a 0.957937 aka + cat $dir/p2g_output | \ + awk '{if (NF >= 4) {printf("%s %s ", $1, $3); for (n=4;n<=NF;n++) {printf("%s", $n);} printf("\n"); }}' | \ + sort | uniq > $dir/pron2spelling + + # Now remove from pron2spelling, any words that appear in $dir/lexiconp_in.txt + # (this also contains the excluded words like ). + cat $dir/pron2spelling | \ + perl -e 'open(F, $ARGV[0]) || die "opening $ARGV[0]"; while() { @A=split; $seen_word{$A[0]}=1; } + while() { @A=split; if (! $seen_word{$A[2]}) { print; }} ' $dir/lexiconp_in.txt > $dir/pron2spelling.excluded + # $dir/pron2spelling.excluded contains lines like + #ab syllable1 syllable2 ... + # e.g. + # Kuku 0.000002642 k>&u k>&u + + cat $dir/probs | \ + perl -e ' while(){ @A = split; $prob = shift @A; $pron=join("", @A); + $pron =~ tr/,//d; print "$pron $_"; } '> $dir/probs.with_pron + # $dir/probs.with_pron contains lines like the following: + # ak>a -2.43244 a &k>&a + # This is so we can get the pronunciation in the same form that we put it in, for + # the p2g training, for easier comparison with the lines in $dir/pron2spelling.excluded + + perl -e ' ($p2s, $probs_with_pron) = @ARGV; + open(P2S, "<$p2s" || die); open(PROBS, "<$probs_with_pron")||die; + while () { + @A = split; + ($pron,$pronprob,$spelling) = @A; + if (!defined $prons{$pron}) { $prons{$pron} = [ ]; } # new anonymous array + $ref = $prons{$pron}; + push @$ref, "$pronprob $spelling"; + } + $log10 = log(10.0); + while () { + @A = split; + $pron = shift @A; # pron in same format as used by p2g model. + $logprob = shift @A; + $syllable_pron = join(" ", @A); # pron separated by syllable + $p = exp($logprob * $log10); + $ref = $prons{$pron}; + if (defined $ref) { + foreach $str (@$ref) { + @B = split(" ", $str); + ($pronprob,$spelling) = @B; + $pair_prob = $p * $pronprob; + print "$spelling $pair_prob $syllable_pron\n"; + } + } + } ' $dir/pron2spelling.excluded $dir/probs.with_pron > $dir/lexicon.oov.raw + + # $dir/lexicon.oov.raw contains lines like: + # ukuzi 0.000342399163717093 u &k>&u &z&i + + mass=$(cat $dir/lexicon.oov.raw | awk '{x+=$2;} END{print x}') + echo "$0: Total probability mass of unseen words (before removing prons" + echo " shorter than $min_phones phones) is $mass" + + # the next stage does 3 things: (1) it converts the pronunciations to be + # tab-separated lists of syllables and removes the seprator ","; (2) it limits us + # to prons containing at least $min_phones phones; and (3) it limits to the + # most likely $num_prons pairs of (spelling, pron) + perl -e ' while () { + @A = split; + $spelling = shift @A; + $prob = shift @A; + for ($n = 0; $n < @A; $n++) { # replace separator in syllable with space. + $A[$n] =~ tr/,/ /d; # replace the separator with space. + } + $final_pron = join("\t", @A); + print "$spelling\t$prob\t$final_pron\n"; + } ' <$dir/lexicon.oov.raw | sort -k2,2 -gr | \ + awk -v min=$min_phones '{if(NF>=min+2){print;}}' | head -n $num_prons >$dir/lexicon.oov + + + mass=$(cat $dir/lexicon.oov | awk '{x+=$2;} END{print x}') + echo "$0: Total probability mass of unseen words (after removing prons" + echo " shorter than $min_phones phones) is $mass." + + + # $dir/lexicon.oov contains lines like the following: + # ngisa 0.00340513074018366 N g i s a + # where the multiple-spaces are actually tabs. + + # Now renormalize the probability to sum to one, decompose $dir/lexicon.oov + # into two pieces: a lexicon $dir/lexiconp_oov.txt, which contains the + # probabilities of different spellings of words (with the most likely one at + # 1.0), and $dir/oov2prob which contains the probabilities of the words + # (we'll use it later to adjust the LM). + + # the uniq here shouldn't be needed, actually. [relates to a bug in a previous + # step that is now fixed. This script relies on the fact that lexicon.oov + # is sorted in reverse order of probability. + cat $dir/lexicon.oov | awk -v mass=$mass 'BEGIN{OFS=FS="\t";} {$2 = $2/mass; print;}' | uniq | \ + perl -e ' ($lexiconp,$words_probs) = @ARGV; + open(L, "|sort -u >$lexiconp") || die "opening lexicon $lexiconp"; + open(W, "|sort -u >$words_probs") || die "opening probs file $words_probs"; + while () { + @A = split("\t", $_); + $word = shift @A; $prob = shift @A; $pron = join("\t", @A); + if (!defined $maxprob{$word}) { # max prob is always the first. + $maxprob{$word} = $prob; + print W "$word $prob\n"; + } + $pronprob = $prob / $maxprob{$word}; + $pronprob <= 1 || die "bad pronprob $pronprob\n"; + print L "$word\t$pronprob\t$pron"; + } close(L); close(W); # wait for sort to finish. ' \ + $dir/lexiconp_oov.txt $dir/oov2prob + + # lexiconp_oov.txt contains lines like: + #leyanga 0.96471840417664 l 3 j_" a_" N a + #leyanga 1 l 3 j_" a_" N g a + + # oov2prob looks like this: + #-Uni 8.77716315938887e-07 + #Adlule 9.62418179264897e-08 + #Afuna 2.23048402109824e-06 +fi + +if [ $stage -le $[$g2p_iters+2] ]; then + # put it to the output directory $localdir e.g. data/local/ + cat $dir/lexiconp_in.txt $dir/lexiconp_oov.txt | \ + local/apply_map_tab_preserving.pl -f 3- $dir/phone_map.reverse | sort -u > $toplevel_dir/lexiconp.txt + cp $dir/oov2prob $toplevel_dir/oov2prob +fi + +# Finally, if $dev_text is not empty, print out OOV rate. We assame $dev_text is +# in the following format: +# 14350_A_20121123_042710_001717 yebo yini +# where "14350_A_20121123_042710_001717" is the utterance id and "yebo yini" is +# the actual words. +if [ ! -z $dev_text ]; then + # Original token OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot tokens; token OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/input_lexicon.txt > $toplevel_dir/original_oov_rates + + # New token OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot tokens; token OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/lexiconp.txt > $toplevel_dir/new_oov_rates + + # Original type OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | sort -u |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot types; type OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/input_lexicon.txt >> $toplevel_dir/original_oov_rates + + # New type OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | sort -u |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot types; type OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/lexiconp.txt >> $toplevel_dir/new_oov_rates +fi + +exit 0; + +###BELOW HERE IS JUST COMMENTS ########### + +#cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt | \ +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt | \ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); printf("Seen $invoc out of $tot tokens; OOV rate is %.2f\n", $oov_rate); ' $x +done +# OOV rate measured on the words in the FullLP lexicon. +#Seen 13675 out of 60613 tokens; OOV rate is 77.44 +#Seen 26936 out of 60613 tokens; OOV rate is 55.56 + +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +cat data/dev10h/text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | \ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); printf("Seen $invoc out of $tot tokens; OOV rate is %.2f\n", $oov_rate); ' $x +done +# zulu limitedlp, dev10h: +# With the million-word lexicon we more than halve the per-token OOV rate of dev10h. +#Seen 44680 out of 66891 tokens; OOV rate is 33.20 +#Seen 57095 out of 66891 tokens; OOV rate is 14.64 diff --git a/egs/babel/s5d/local/extract_oov_words.pl b/egs/babel/s5d/local/extract_oov_words.pl new file mode 100755 index 00000000000..08f8f5d1436 --- /dev/null +++ b/egs/babel/s5d/local/extract_oov_words.pl @@ -0,0 +1,70 @@ +#!/usr/bin/env perl +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +use Data::Dumper; +$Data::Dumper::Indent = 1; + +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; + +$ignore_oov = 0; +$ignore_first_field = 0; +for($x = 0; $x < 2; $x++) { + if ($ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } + } +} + +$symtab = shift @ARGV; +if (!defined $symtab) { + print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" . + "options: [--map-oov ] [-f ]\n" . + "note: can look like 4-5, or 4-, or 5-, or 1.\n"; +} + + +open(F, "<:encoding(UTF-8)", $symtab) || die "Error opening symbol table file $symtab"; +while() { + @A = split(" ", $_); + @A == 2 || die "bad line in symbol table file: $_"; + + if ( not defined( $sym2int{$A[0]} ) ) { + $sym2int{$A[0]} = []; + } + push @{ $sym2int{$A[0]} }, $A[1] + 0; +} + + +$lines=0; +while (<>) { + @A = split(" ", $_); + @B = (); + for ($n = 0; $n < @A; $n++) { + if ( (!defined $field_begin || $n >= $field_begin) + && (!defined $field_end || $n <= $field_end)) { + $a = $A[$n]; + $i = $sym2int{$a}; + if (!defined ($i)) { + print $a . "\n"; + } + } + } +} + + diff --git a/egs/babel/s5d/local/filter_keywords.pl b/egs/babel/s5d/local/filter_keywords.pl new file mode 100755 index 00000000000..a724ad77f1a --- /dev/null +++ b/egs/babel/s5d/local/filter_keywords.pl @@ -0,0 +1,68 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Encode; + +my $Usage = < + +EOU + +if(@ARGV != 3) { + die $Usage; +} + +# Get parameters +my $dictin = shift @ARGV; +my $filein = shift @ARGV; +my $fileout = shift @ARGV; + +# Open dictionary +if (!open(D, "<$dictin")) {print "Fail to open dictionary: $dictin\n"; exit 1;} + +# Get input source +my $source = ""; +if ($filein eq "-") { + $source = "STDIN"; +} else { + if (!open(I, "<$filein")) {print "Fail to open input file: $filein\n"; exit 1;} + $source = "I"; +} + +# Open output fst list +my $sourceout = ""; +if ($fileout ne "-") { + if (!open(O, ">$fileout")) {print "Fail to open output file: $fileout\n"; exit 1;} + $sourceout = "O"; +} + +# Read in the dictionary +my %dict = (); +while () { + chomp; + my @col = split(" ", $_); + my $word = shift @col; + my $original_w = $word; + $word =~ tr/a-z/A-Z/; + $dict{$word} = $original_w; +} + +# Process the queries +my $word; +while (<$source>) { + chomp; + my @col = split(" ", $_); + foreach $word (@col) { + if (defined($dict{$word})) { + eval "print $sourceout \"$dict{$word} \""; + } else { + eval "print $sourceout \"$word \""; + } + } + eval "print $sourceout \"\n\""; +} + +close(D); +if ($filein ne "-") {close(I);} +if ($fileout ne "-") {close(O);} diff --git a/egs/babel/s5d/local/filter_kwslist.pl b/egs/babel/s5d/local/filter_kwslist.pl new file mode 100755 index 00000000000..7c57b62517a --- /dev/null +++ b/egs/babel/s5d/local/filter_kwslist.pl @@ -0,0 +1,55 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. +# +use strict; +use warnings; +use Getopt::Long; +use XML::Simple; + +my $data = XMLin(\*STDIN); +my $duptime= $ARGV[0]; + +#print Dumper($data); + +# Filters duplicate keywords that have the same keyword and about the same time. +# Relies on the fact that its input is sorted from largest to smallest score. + +foreach my $kwentry (@{$data->{detected_kwlist}}) { + #print "$kwentry->{kwid}\n"; + my $prev_time; + my $prev_file; + + if(ref($kwentry->{kw}) eq 'ARRAY'){ + my @arr = @{$kwentry->{kw}}; + my @newarray = (); + + push @newarray, $arr[0]; + #$arr[0]->{tbeg} . "\n"; + for (my $i = 1; $i < scalar(@arr); $i +=1) { + + my $found = 0; + foreach my $kw (@newarray) { + if (( abs($arr[$i]->{tbeg} - $kw->{tbeg}) < $duptime ) && + ( $arr[$i]->{channel} == $kw->{channel}) && + ( $arr[$i]->{file} eq $kw->{file}) ) { + + $found = 1; + + #print $arr[$i]->{tbeg} . "\n"; + } + } + if ( $found == 0 ) { + push @newarray, $arr[$i]; + } + } + + $kwentry->{kw} = \@newarray; + }else{ + #print $kwentry->{kw}->{tbeg} . "\n"; + } +# print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; +} +my $xml = XMLout($data, RootName => "kwslist", NoSort=>1); +print $xml; diff --git a/egs/babel/s5d/local/fix_kwslist.pl b/egs/babel/s5d/local/fix_kwslist.pl new file mode 100755 index 00000000000..33c6dc30e82 --- /dev/null +++ b/egs/babel/s5d/local/fix_kwslist.pl @@ -0,0 +1,89 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; +use XML::Simple; +use Data::Dumper; +use File::Basename; + +sub mysort { + if ($a->{kwid} =~ m/[0-9]+$/ and $b->{kwid} =~ m/[0-9]+$/) { + ($a->{kwid} =~ /([0-9]*)$/)[0] <=> ($b->{kwid} =~ /([0-9]*)$/)[0] + } else { + $a->{kwid} cmp $b->{kwid}; + } +} + +my $Usage = < + e.g.: fix_kwslist.pl --kwlist-filename=kwlist.xml kwlist.xml kwslist.xml fixed_kwslist.xml + +Allowed options: + --kwlist-filename : Kwlist filename with version info (string, default = "") + +EOU + +my $kwlist_filename=""; +GetOptions('kwlist-filename=s' => \$kwlist_filename); + +if (@ARGV != 3) { + die $Usage; +} + +# Workout the input/output source +my $kwlist_in = shift @ARGV; +my $kwslist_in = shift @ARGV; +my $fixed_kwslist_out = shift @ARGV; + +my $KW = XMLin($kwlist_in); +my $KWS = XMLin($kwslist_in); + +# Extract keywords from kwlist.xml +my %kwlist; +my $language = $KW->{language}; +foreach my $kwentry (@{$KW->{kw}}) { + $kwlist{$kwentry->{kwid}} = 1; +} + +# Now work on the kwslist +$KWS->{language} = $language; +if ($kwlist_filename ne "") { + $KWS->{kwlist_filename} = basename($kwlist_filename); +} elsif ($KWS->{kwlist_filename} eq "") { + $KWS->{kwlist_filename} = basename($kwlist_in); +} +foreach my $kwentry (@{$KWS->{detected_kwlist}}) { + if (defined($kwlist{$kwentry->{kwid}})) { + delete $kwlist{$kwentry->{kwid}}; + } +} + +# Empty entries... +foreach my $kw (keys %kwlist) { + my %empty; + my @tmp = []; + $empty{search_time} = 1; + $empty{kwid} = $kw; + $empty{oov_count} = 0; + push(@{$KWS->{detected_kwlist}}, \%empty); +} + +my @sorted = sort mysort @{$KWS->{detected_kwlist}}; +$KWS->{detected_kwlist} = \@sorted; + +my $xml = XMLout($KWS, RootName => "kwslist", NoSort=>0); +if ($fixed_kwslist_out eq "-") { + print $xml; +} else { + if (!open(O, ">$fixed_kwslist_out")) { + print "Fail to open output file: $fixed_kwslist_out\n"; + exit 1; + } + print O $xml; + close(O); +} diff --git a/egs/babel/s5d/local/generate_confusion_matrix.sh b/egs/babel/s5d/local/generate_confusion_matrix.sh new file mode 100755 index 00000000000..fb602cf0957 --- /dev/null +++ b/egs/babel/s5d/local/generate_confusion_matrix.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +nj=4 +cmd=run.pl +acwt=0.1 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 5 ]; then + echo "Usage $0 [options] " + echo " e.g.: local/prepare_confusions.sh --nj 32 exp/sgmm5/graph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix" + echo "" + echo "main options (for others, see top of script file)" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + echo " --acwt # Acoustic model weight. Value will be used for 1-best path decoding of the lattices" + echo "" + echo "Please note that the output confusion matrix will be phoneme-based" + echo "and all the phone contexts (singleton, intra, begin, end) or phoneme" + echo "tags (such as tone or stress) will be collapsed into a single monophone" + echo "" + echo "The output format is line oriented." + echo "Each line can have one of these four formats (A, B being different phones, special symbol" + echo " A A count #Number of hits, i.e. correctly determined phones" + echo " A B count #Number of substitutions of A with B " + echo " A count #Number of deletions" + echo " A count #Number of insertions" + exit 1; +fi + +set -u +set -e +set -o pipefail + +data=$1; shift +modeldir=$1; shift +alidir=$1; shift +latdir=$1; shift +wdir=$1; shift + +model=$modeldir/final.mdl +[ ! -f $model ] && echo "File $model does not exist!" && exit 1 +phones=$data/phones.txt +[ ! -f $phones ] && echo "File $phones does not exist!" && exit 1 + +! ali_nj=`cat $alidir/num_jobs` && echo "Could not open the file $alidir/num_jobs" && exit 1 +! lat_nj=`cat $latdir/num_jobs` && echo "Could not open the file $latdir/num_jobs" && exit 1 +if [ $ali_nj -ne $lat_nj ] ; then + echo "Alignments num_jobs and lattices num_jobs mismatch!" + exit 1 +fi +[ ! $nj -le $ali_nj ] && echo "Number of jobs is too high (max is $ali_nj)." && nj=$ali_nj + +mkdir -p $wdir/log + +cat $data/phones.txt | sed 's/_[B|E|I|S]//g' |\ + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' | sed 's/_[^ ]*//g' > $wdir/phones.txt + +echo "Converting alignments to phone sequences..." +$cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ + align-text\ + ark:\<\( \ + ali-to-phones $model ark:"gunzip -c $alidir/ali.JOB.gz|" ark,t:- \|\ + int2sym.pl -f 2- $wdir/phones.txt - \) \ + ark:\<\( \ + lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ + lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \| \ + int2sym.pl -f 2- $wdir/phones.txt - \) \ + ark:$wdir/confusions.JOB.txt || exit 1 + +confusion_files="" +for i in `seq 1 $nj` ; do + confusion_files="$confusion_files $wdir/confusions.$i.txt" +done + +echo "Converting statistics..." +cat $confusion_files | cut -f 2- -d ' ' | sed 's/ *; */\n/g' | \ + sed 's/ *$//g' | sed 's/^ *//g' | sort | uniq -c | \ + grep -v -E '|||SIL' | \ + perl -ane ' + die unless scalar @F == 3; + print "$F[1] $F[2] $F[0]\n"; + ' > $wdir/confusions.txt + +exit 0 +#-echo "Converting alignments to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ +#- ali-to-phones $model ark:"gunzip -c $alidir/ali.JOB.gz|" ark,t:- \|\ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/ali.JOB.txt +#- +#-echo "Converting lattices to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/lat_to_phones.JOB.log \ +#- lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ +#- lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \| \ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/lat.JOB.txt + diff --git a/egs/sprakbanken/s5/local/generate_example_kws.sh b/egs/babel/s5d/local/generate_example_kws.sh similarity index 99% rename from egs/sprakbanken/s5/local/generate_example_kws.sh rename to egs/babel/s5d/local/generate_example_kws.sh index 2c849438192..e90752926b3 100755 --- a/egs/sprakbanken/s5/local/generate_example_kws.sh +++ b/egs/babel/s5d/local/generate_example_kws.sh @@ -71,7 +71,7 @@ cat $text | perl -e ' } $min_count++; } - + $total = 20; $current = 0; $min_count = 4; @@ -88,7 +88,7 @@ cat $text | perl -e ' } $min_count++; } - + $total = 10; $current = 0; $min_count = 3; diff --git a/egs/babel/s5d/local/generate_phoneme_transcription.sh b/egs/babel/s5d/local/generate_phoneme_transcription.sh new file mode 100755 index 00000000000..4ef0e556277 --- /dev/null +++ b/egs/babel/s5d/local/generate_phoneme_transcription.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +nj=4 +cmd=run.pl +acwt=0.1 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. utils/parse_options.sh || exit 1; + +echo "$#" +if [ $# != 4 ]; then + echo "Usage $0 [options] " + echo " e.g.: local/prepare_confusions.sh --nj 32 exp/sgmm5/graph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix" + echo "" + echo "main options (for others, see top of script file)" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + echo " --acwt # Acoustic model weight. Value will be used for 1-best path decoding of the lattices" + echo "" + echo "Please note that the output confusion matrix will be phoneme-based" + echo "and all the phone contexts (singleton, intra, begin, end) or phoneme" + echo "tags (such as tone or stress) will be collapsed into a single monophone" + echo "" + echo "The output format is line oriented." + echo "Each line can have one of these four formats (A, B being different phones, special symbol" + echo " A A count #Number of hits, i.e. correctly determined phones" + echo " A B count #Number of substitutions of A with B " + echo " A count #Number of deletions" + echo " A count #Number of insertions" + exit 1; +fi + +set -u +set -e +set -o pipefail + +data=$1; shift +modeldir=$1; shift +latdir=$1; shift +wdir=$1; shift + +model=$modeldir/final.mdl +[ ! -f $model ] && echo "File $model does not exist!" && exit 1 +phones=$data/phones.txt +[ ! -f $phones ] && echo "File $phones does not exist!" && exit 1 + +! lat_nj=`cat $latdir/num_jobs` && echo "Could not open the file $latdir/num_jobs" && exit 1 +[ ! $nj -le $lat_nj ] && echo "Number of jobs is too high (max is $lat_nj)." && nj=$lat_nj + +mkdir -p $wdir/log + +cat $data/phones.txt | sed 's/_[B|E|I|S]//g' |\ + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' > $wdir/phone_map + +echo "Converting alignments to phone sequences..." +$cmd JOB=1:$nj $wdir/log/phones.JOB.log \ + lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \|\ + lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \|\ + int2sym.pl -f 2- $wdir/phone_map - \> $wdir/phones.JOB.txt || exit 1 + +confusion_files="" +for i in `seq 1 $nj` ; do + confusion_files="$confusion_files $wdir/phones.$i.txt" +done + +echo "Converting statistics..." +cat $confusion_files | sort > $wdir/phones.txt + +exit 0 +#-echo "Converting alignments to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ +#- ali-to-phones $model ark:"gunzip -c $alidir/ali.JOB.gz|" ark,t:- \|\ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/ali.JOB.txt +#- +#-echo "Converting lattices to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/lat_to_phones.JOB.log \ +#- lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ +#- lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \| \ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/lat.JOB.txt + diff --git a/egs/babel/s5d/local/generate_proxy_keywords.sh b/egs/babel/s5d/local/generate_proxy_keywords.sh new file mode 100755 index 00000000000..584f7d7902e --- /dev/null +++ b/egs/babel/s5d/local/generate_proxy_keywords.sh @@ -0,0 +1,176 @@ +#!/bin/bash + +# Copyright 2012-2014 Guoguo Chen +# Apache 2.0. + +# Begin configuration section. +nj=8 +cmd=run.pl +beam=-1 # Beam for proxy FST, -1 means no prune +phone_beam=-1 # Beam for KxL2xE FST, -1 means no prune +nbest=-1 # Use top n best proxy keywords in proxy FST, -1 means all + # proxies +phone_nbest=50 # Use top n best phone sequences in KxL2xE, -1 means all + # phone sequences +confusion_matrix= # If supplied, using corresponding E transducer +count_cutoff=1 # Minimal count to be considered in the confusion matrix; + # will ignore phone pairs that have count less than this. +pron_probs=false # If true, then lexicon looks like: + # Word Prob Phone1 Phone2... +# End configuration section. + +[ -f ./path.sh ] && . ./path.sh; # source the path. +echo "$0 " "$@" +. parse_options.sh || exit 1; + +if [ $# -ne 1 ]; then + echo "Generate proxy keywords for IV/OOV keywords. Phone confusions will be" + echo "used when generating the proxies if the confusion matrix is supplied." + echo "If you are going to use the confusion matrix, please use the following" + echo "format for the file \$confusion_matrix:" + echo " p1 p2 count1 // For substitution" + echo " p3 count2 // For deletion" + echo " p4 count3 // For insertion" + echo "" + echo "Proxies keywords are generated using:" + echo "K x L2 x E x L1'" + echo "where K is a keyword FST, L2 is a lexicon that contains pronunciations" + echo "of keywords in K, E is an edit distance FST that contains the phone" + echo "confusions and L1 is the original lexicon." + echo "" + echo "The script assumes that L1.lex, L2.lex, words.txt and keywords.txt have" + echo "been prepared and stored in the directory ." + echo "" + echo "Usage: local/generate_example_kws.sh " + echo " e.g.: local/generate_example_kws.sh data/dev10h/kws_proxy/" + exit 1; +fi + +set -e +set -o pipefail + +kwsdatadir=$1 + +# Checks some files. +for f in $kwsdatadir/L1.lex $kwsdatadir/L2.lex \ + $kwsdatadir/words.txt $kwsdatadir/keywords.txt; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1 +done + +# Gets phone symbols +phone_start=2 +if $pron_probs; then + phone_start=3 +fi + +pron_probs_param=""; +if $pron_probs; then + pron_probs_param="--pron-probs"; +fi + +cat $kwsdatadir/L1.lex | \ + perl -e ' + while ( $line = ) { + chomp $line; + ($word, $pron) = split " ", $line, 2; + $pron = join(" ", split(" ", $pron)); + push @{$LEX{$pron}}, $word; + } + + open(L1, "| sort -u > $ARGV[0]") or die "Cannot open $ARGV[0]\n"; + open(MAP, "| sort -u > $ARGV[1]") or die "Cannot open $ARGV[1]\n"; + foreach $pron (keys %LEX) { + $head = $LEX{$pron}->[0]; + print L1 "$head $pron\n"; + foreach $alt (@{$LEX{$pron}}) { + print MAP "0 0 $alt $head\n"; + } + } + print MAP "0\n"; + close(L1); + close(MAP); +' $kwsdatadir/L1_dedup.lex $kwsdatadir/L1.revdup.fst.txt + +fstcompile --isymbols=$kwsdatadir/words.txt --osymbols=$kwsdatadir/words.txt $kwsdatadir/L1.revdup.fst.txt | \ + fstarcsort --sort_type=olabel - $kwsdatadir/L1.revdup.fst + +ndisambig=`utils/add_lex_disambig.pl \ + $pron_probs_param $kwsdatadir/L1_dedup.lex $kwsdatadir/L1_disambig.lex` +ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. +( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $kwsdatadir/disambig.txt + +cat $kwsdatadir/L2.lex $kwsdatadir/L1.lex |\ + awk '{for(i='$phone_start'; i <= NF; i++) {print $i;}}' |\ + sort -u | sed '1i\' |\ + cat - $kwsdatadir/disambig.txt | awk 'BEGIN{x=0} {print $0"\t"x; x++;}' \ + > $kwsdatadir/phones.txt + +# Compiles lexicon into FST +cat $kwsdatadir/L2.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$kwsdatadir/phones.txt \ + --osymbols=$kwsdatadir/words.txt - |\ + fstinvert | fstarcsort --sort_type=olabel > $kwsdatadir/L2.fst + +echo $kwsdatadir/phones.txt +phone_disambig_symbol=`grep \#0 $kwsdatadir/phones.txt | awk '{print $2}'` +word_disambig_symbol=`grep \#0 $kwsdatadir/words.txt | awk '{print $2}'` +phone_disambig_symbols=`grep "^#" $kwsdatadir/phones.txt |\ + awk '{print $2}' | tr "\n" " "` +word_disambig_symbols=`grep "^#" $kwsdatadir/words.txt |\ + awk '{print $2}' | tr "\n" " "` +cat $kwsdatadir/L1_disambig.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$kwsdatadir/phones.txt \ + --osymbols=$kwsdatadir/words.txt - |\ + fstaddselfloops "echo $phone_disambig_symbol |" \ + "echo $word_disambig_symbol |" |\ + fstdeterminize | fstrmsymbols "echo $phone_disambig_symbols|" |\ + fstrmsymbols --remove-from-output=true "echo $word_disambig_symbols|" |\ + fstarcsort --sort_type=ilabel > $kwsdatadir/L1.fst + +# Compiles E.fst +confusion_matrix_param="" +if [ ! -z $confusion_matrix ]; then + echo "$0: Using confusion matrix, normalizing" + local/count_to_logprob.pl --cutoff $count_cutoff \ + $confusion_matrix $kwsdatadir/confusion.txt + confusion_matrix_param="--confusion-matrix $kwsdatadir/confusion.txt" +fi +cat $kwsdatadir/phones.txt |\ + grep -v -E "<.*>" | grep -v "SIL" | awk '{print $1;}' |\ + local/build_edit_distance_fst.pl --boundary-off=true \ + $confusion_matrix_param - - |\ + fstcompile --isymbols=$kwsdatadir/phones.txt \ + --osymbols=$kwsdatadir/phones.txt - $kwsdatadir/E.fst + +# Pre-composes L2 and E, for the sake of efficiency +fstcompose $kwsdatadir/L2.fst $kwsdatadir/E.fst |\ + fstarcsort --sort_type=ilabel > $kwsdatadir/L2xE.fst + +keywords=$kwsdatadir/keywords.int +# Prepares for parallelization +cat $kwsdatadir/keywords.txt |\ + utils/sym2int.pl -f 2- $kwsdatadir/words.txt | sort -R > $keywords + +nof_keywords=`cat $keywords|wc -l` +if [ $nj -gt $nof_keywords ]; then + nj=$nof_keywords + echo "$0: Too many number of jobs, using $nj instead" +fi + +# Generates the proxy keywords +mkdir -p $kwsdatadir/split/log +$cmd JOB=1:$nj $kwsdatadir/split/log/proxy.JOB.log \ + split -n r/JOB/$nj $keywords \| \ + generate-proxy-keywords --verbose=1 \ + --proxy-beam=$beam --proxy-nbest=$nbest \ + --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ + $kwsdatadir/L2xE.fst $kwsdatadir/L1.fst ark:- ark,t:$kwsdatadir/split/proxy.JOB.fsts + +proxy_fsts="" +for j in `seq 1 $nj`; do + proxy_fsts="$proxy_fsts $kwsdatadir/split/proxy.$j.fsts" +done +cat $proxy_fsts | fsttablecompose $kwsdatadir/L1.revdup.fst ark:- ark:- | \ + fsts-project ark:- ark:$kwsdatadir/keywords.fsts diff --git a/egs/babel/s5d/local/kaldi_dir2uem.py b/egs/babel/s5d/local/kaldi_dir2uem.py new file mode 100755 index 00000000000..26b4ec1aaba --- /dev/null +++ b/egs/babel/s5d/local/kaldi_dir2uem.py @@ -0,0 +1,101 @@ +#! /usr/bin/env python + +import argparse, sys +from argparse import ArgumentParser +import re + +def main(): + parser = ArgumentParser(description='Convert kaldi data directory to uem dat files', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--verbose', type=int, \ + dest='verbose', default=0, \ + help='Give higher verbose for more logging') + parser.add_argument('--get-text', action='store_true', \ + help='Get text in dat file') + parser.add_argument('--prefix', type=str, \ + help='Add db file name as db--{utt/spk}.dat') + parser.add_argument('kaldi_dir', \ + help='Kaldi data directory') + parser.add_argument('output_dir', \ + help='Directory to store uem dat files') + parser.usage=':'.join(parser.format_usage().split(':')[1:]) \ + + 'e.g. : %(prog)s --prefix 203-lao-v0 data/dev10h.seg CMU_db' + options = parser.parse_args() + + if options.get_text: + try: + text_file = open(options.kaldi_dir+'/text', 'r') + except IOError as e: + repr(e) + sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/text')) + sys.exit(1) + + try: + segments_file = open(options.kaldi_dir+'/segments', 'r') + except IOError as e: + repr(e) + sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/segments')) + sys.exit(1) + + try: + scp_file = open(options.kaldi_dir+'/wav.scp', 'r') + except IOError as e: + repr(e) + sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/wav.scp')) + sys.exit(1) + + reco2file_map = {} + for line in scp_file.readlines(): + splits = line.strip().split() + m = re.search(r".*/(?P[0-9A-Za-z_]*\.(sph|wav)).*", line) + if not m: + sys.stderr.write("%s does not contain a valid speech file (.wav or .sph)\n" % line.strip()) + sys.exit(1) + reco2file_map[splits[0]] = m.group('file_name') + # End for + + spk2utt_map = {} + + if options.prefix == None: + prefix = options.kaldi_dir.split('/')[-1].split('.')[0] + else: + prefix = options.prefix + + try: + utt_dat = open(options.output_dir+'/db-'+prefix+'-utt.dat', 'w') + spk_dat = open(options.output_dir+'/db-'+prefix+'-spk.dat', 'w') + except IOError as e: + repr(e) + sys.stderr.write("%s: Could not write dat files in %s\n" % (sys.argv[0], options.output_dir)) + sys.exit(1) + + for line in segments_file.readlines(): + utt_id, file_id, start, end = line.strip().split() + + if (options.get_text): + splits = text_file.readline().split() + while splits[0] < utt_id: + splits = text_file.readline().split() + text = ' '.join(splits[1:]) + else: + text = "" + + utt_dat.write("{UTTID %s} {UTT %s} {SPK %s} {FROM %s} {TO %s} {TEXT %s}\n" % (utt_id, utt_id, file_id, start, end, text)) + spk2utt_map.setdefault(file_id, []) + spk2utt_map[file_id].append(utt_id) + + for spk, utts in spk2utt_map.items(): + try: + spk_dat.write("{SEGS %s} {ADC %s} {CONV %s.wav} {CHANNEL 1} {DUR }\n" % (' '.join(utts), reco2file_map[spk], spk)) + except KeyError as e: + repr(e) + sys.stderr.write("%s: Error in getting file for %s\n" % (sys.argv[0], spk)) + sys.exit(1) + # End for + + segments_file.close() + utt_dat.close() + spk_dat.close() + +if __name__ == '__main__': + main() diff --git a/egs/babel/s5d/local/kwords2indices.pl b/egs/babel/s5d/local/kwords2indices.pl new file mode 100755 index 00000000000..5d5f0a3ad45 --- /dev/null +++ b/egs/babel/s5d/local/kwords2indices.pl @@ -0,0 +1,123 @@ +#!/usr/bin/env perl +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +use Data::Dumper; +$Data::Dumper::Indent = 1; + +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; + +sub permute { + + my $last = pop @_; + + unless(@_) { + return map([$_], @$last); + } + + return map { + my $left = $_; + map([@$left, $_], @$last) + } + permute(@_); +} + +$oov_count=0; + +$ignore_oov = 0; +$ignore_first_field = 0; +for($x = 0; $x < 2; $x++) { + if ($ARGV[0] eq "--map-oov") { + shift @ARGV; $map_oov = shift @ARGV; + } + if ($ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } + } +} + +$symtab = shift @ARGV; +if (!defined $symtab) { + print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" . + "options: [--map-oov ] [-f ]\n" . + "note: can look like 4-5, or 4-, or 5-, or 1.\n"; +} +open(F, "<:encoding(UTF-8)", $symtab) || die "Error opening symbol table file $symtab"; +while() { + @A = split(" ", $_); + @A == 2 || die "bad line in symbol table file: $_"; + + if ( not defined( $sym2int{$A[0]} ) ) { + $sym2int{$A[0]} = []; + } + push @{ $sym2int{$A[0]} }, $A[1] + 0; +} +#print Dumper(\%sym2int); + +if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up + if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; } + $map_oov = $sym2int{$map_oov}; +} + +$lines=0; +while (<>) { + @A = split(" ", $_); + @B = (); + $lines = $lines + 1; + $undefined_words = 0; + for ($n = 1; $n < @A; $n++) { + $a = $A[$n]; + $i = $sym2int{$a}; + if (!defined ($i)) { + if (defined $map_oov) { + if ($num_warning++ < $max_warning) { + print STDERR "sym2int.pl: replacing $a with $map_oov\n"; + if ($num_warning == $max_warning) { + print STDERR "sym2int.pl: not warning for OOVs any more times\n"; + } + } + $i = [ $map_oov ]; + } else { + $pos = $n+1; + die "sym2int.pl: undefined symbol $a (in position $pos)\n"; + } + $undefined_words = $undefined_words + 1; + } + $a = $i; + push @B, $a; + } + #if ( defined $sym2int{$A[$n]} ) { + # push @B, $sym2int{$A[$n]}; + #} else { + # push @B, [0]; + #} + if ($undefined_words > 0) { + $oov_count = $oov_count + 1; + } + @C = permute @B; + #print Dumper(\@B); + #print Dumper(\@C); + foreach $phrase ( @C ) { + print "$A[0] "; + print join(" ", @{$phrase}); + print "\n"; + } +} + +print STDERR "Remaped/ignored $oov_count phrases...\n"; + diff --git a/egs/babel/s5d/local/kws_combine.sh b/egs/babel/s5d/local/kws_combine.sh new file mode 100755 index 00000000000..8934faf7d30 --- /dev/null +++ b/egs/babel/s5d/local/kws_combine.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# Script for system combination of the KWS posting lists + +# begin configuration section. +cmd=run.pl +stage=0 +# Ntrue-scale +ntrue_scale=1.1 +min_lmw=8 +max_lmw=12 +extraid= +skip_scoring=false +optimize_weights=false +#end of configuration section + +help_message="Usage: $(basename $0) [options] [:lmwt-bias] [:lmwt-bias] [[:lmwt-bias] ... ] +E.g.: $(basename $0) data/dev10h.pem data/lang exp/tri6_nnet/decode_dev10h.pem/kws_10/ exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/ exp/combine/dev10hx.pem +" +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + +datadir=$1 +lang=$2 +odir=${@: -1} # last argument to the script +shift 2; +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + + +if [ -z "$extraid" ] ; then + kwsdatadir=$datadir/kws + kwsoutputdir="$odir/kws" +else + kwsdatadir=$datadir/${extraid}_kws + kwsoutputdir="$odir/${extraid}_kws" +fi + +for f in $kwsdatadir/ecf.xml $kwsdatadir/kwlist.xml ; do + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; +done +ecf=$kwsdatadir/ecf.xml +kwlist=$kwsdatadir/kwlist.xml + +# Duration +duration=`head -1 $ecf |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` + +mkdir -p $kwsoutputdir/log + +total_sum=0 +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. + [ -z "$offset" ] && offset=1 + total_sum=$(($total_sum+$offset)) +done + +systems="" +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. + decode_dir=`echo $decode_dir | cut -d: -f1` + [ -z "$offset" ] && offset=1 + + weight=$(perl -e "print ($offset/$total_sum);") + if [ -f $decode_dir ] ; then + systems+="$weight $decode_dir " + else + kwsfile=$decode_dir/kwslist.unnormalized.xml + [ ! -f ${kwsfile} ] && echo "The file ${kwsfile} does not exist!" && exit 1 + systems+="$weight ${kwsfile} " + fi +done + +echo $systems + +# Combination of the weighted sum and power rule +$cmd PWR=1:9 $kwsoutputdir/log/combine_kws.PWR.log \ + mkdir -p ${kwsoutputdir}_PWR '&&' \ + local/naive_comb.pl --method=2 --power=0.PWR \ + $systems ${kwsoutputdir}_PWR/kwslist.unnormalized.xml || exit 1 + +$cmd PWR=1:9 $kwsoutputdir/log/postprocess_kws.PWR.log \ + utils/kwslist_post_process.pl --duration=${duration} --digits=3 \ + --normalize=true --Ntrue-scale=${ntrue_scale} \ + ${kwsoutputdir}_PWR/kwslist.unnormalized.xml \ + ${kwsoutputdir}_PWR/kwslist.xml || exit 1 + +echo "Scoring..." +if ! $skip_scoring ; then +$cmd PWR=1:9 $kwsoutputdir/log/score_kws.PWR.log \ + local/kws_score.sh --extraid "${extraid}" $datadir ${kwsoutputdir}_PWR || exit 1 +fi + + diff --git a/egs/babel/s5d/local/kws_data_prep.sh b/egs/babel/s5d/local/kws_data_prep.sh new file mode 100755 index 00000000000..3882c99ce6d --- /dev/null +++ b/egs/babel/s5d/local/kws_data_prep.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. + +# Begin configuration section. +case_insensitive=true +use_icu=true +icu_transform="Any-Lower" +silence_word= # Optional silence word to insert (once) between words of the transcript. +# End configuration section. + +echo $0 "$@" + +help_message=" + Usage: local/kws_data_prep.sh + e.g.: local/kws_data_prep.sh data/lang/ data/eval/ data/kws/ + Input is in : kwlist.xml, ecf.xml (rttm file not needed). + Output is in : keywords.txt, keywords_all.int, kwlist_invocab.xml, + kwlist_outvocab.xml, keywords.fsts + Note: most important output is keywords.fsts + allowed switches: + --case-sensitive # Shall we be case-sensitive or not? + # Please not the case-sensitivness depends + # on the shell locale! + --use-uconv # Use the ICU uconv binary to normalize casing + --icu-transform # When using ICU, use this transliteration + +" + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 3 ]; then + printf "FATAL: invalid number of arguments.\n\n" + printf "$help_message\n" + exit 1; +fi + +set -u +set -e +set -o pipefail + +langdir=$1; +datadir=$2; +kwsdatadir=$3; +keywords=$kwsdatadir/kwlist.xml + + +mkdir -p $kwsdatadir; + +cat $keywords | perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + my $data = XMLin(\*STDIN); + + #print Dumper($data->{kw}); + foreach $kwentry (@{$data->{kw}}) { + #print Dumper($kwentry); + print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; + } +' > $kwsdatadir/keywords.txt + + +# Map the keywords to integers; note that we remove the keywords that +# are not in our $langdir/words.txt, as we won't find them anyway... +#cat $kwsdatadir/keywords.txt | babel/filter_keywords.pl $langdir/words.txt - - | \ +# sym2int.pl --map-oov 0 -f 2- $langdir/words.txt | \ +if $case_insensitive && ! $use_icu ; then + echo "$0: Running case insensitive processing" + cat $langdir/words.txt | tr '[:lower:]' '[:upper:]' > $kwsdatadir/words.txt + [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ + echo "$0: Warning, multiple words in dictionary differ only in case: " + + + cat $kwsdatadir/keywords.txt | tr '[:lower:]' '[:upper:]' | \ + sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int +elif $case_insensitive && $use_icu ; then + echo "$0: Running case insensitive processing (using ICU with transform \"$icu_transform\")" + cat $langdir/words.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" > $kwsdatadir/words.txt + [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ + echo "$0: Warning, multiple words in dictionary differ only in case: " + + paste <(cut -f 1 $kwsdatadir/keywords.txt ) \ + <(cut -f 2 $kwsdatadir/keywords.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" ) |\ + local/kwords2indices.pl --map-oov 0 $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int +else + cp $langdir/words.txt $kwsdatadir/words.txt + cat $kwsdatadir/keywords.txt | \ + sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int +fi + +(cat $kwsdatadir/keywords_all.int | \ + grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int ) || true + +(cut -f 1 -d ' ' $kwsdatadir/keywords.int | \ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_invocab.xml) || true + +(cat $kwsdatadir/keywords_all.int | \ + egrep " 0 | 0$" | cut -f 1 -d ' ' | \ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_outvocab.xml) || true + + +# Compile keywords into FSTs +if [ -s $kwsdatadir/keywords.int ]; then + if [ -z $silence_word ]; then + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:$kwsdatadir/keywords.fsts + else + silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` + [ -z $silence_int ] && \ + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:- | \ + awk -v 'OFS=\t' -v silint=$silence_int '{if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; }' \ + > $kwsdatadir/keywords.fsts + fi +else + echo "WARNING: $kwsdatadir/keywords.int is zero-size. That means no keyword" + echo "WARNING: was found in the dictionary. That might be OK -- or not." + touch $kwsdatadir/keywords.fsts +fi + +# Create utterance id for each utterance +cat $datadir/segments | \ + awk '{print $1}' | \ + sort | uniq | perl -e ' + $idx=1; + while(<>) { + chomp; + print "$_ $idx\n"; + $idx++; + }' > $kwsdatadir/utter_id + +# Map utterance to the names that will appear in the rttm file. You have +# to modify the commands below accoring to your rttm file +cat $datadir/segments | awk '{print $1" "$2}' | sort | uniq > $kwsdatadir/utter_map; + +echo "$0: Kws data preparation succeeded" diff --git a/egs/babel/s5d/local/kws_data_prep_proxy.sh b/egs/babel/s5d/local/kws_data_prep_proxy.sh new file mode 100755 index 00000000000..04cc59b6499 --- /dev/null +++ b/egs/babel/s5d/local/kws_data_prep_proxy.sh @@ -0,0 +1,270 @@ +#!/bin/bash + +# Copyright 2014 Guoguo Chen +# Apache 2.0. + +# Begin configuration section. +nj=8 +cmd=run.pl +beam=-1 # Beam for proxy FST, -1 means no prune +phone_beam=-1 # Beam for KxL2xE FST, -1 means no prune +nbest=-1 # Use top n best proxy keywords in proxy FST, -1 means all + # proxies +phone_nbest=50 # Use top n best phone sequences in KxL2xE, -1 means all + # phone sequences +phone_cutoff=5 # We don't generate proxy keywords for OOV keywords that + # have less phones than the specified cutoff as they may + # introduce a lot false alarms +max_phone_cutoff=9990 # We don't generate proxy keywords for OOV keywords that + # have more than this phonemes. This can be used when + # we need to use different parameters for keywords of + # different lengths. +confusion_matrix= # If supplied, using corresponding E transducer +count_cutoff=1 # Minimal count to be considered in the confusion matrix; + # will ignore phone pairs that have count less than this. +pron_probs=false # If true, then lexicon looks like: + # Word Prob Phone1 Phone2... +case_insensitive=true +icu_transform="Any-Lower" +proxy_set= # List of keywords to generate proxies for, one KWID per + # line. If empty, then by default generate proxies for all + # OOV keywords. +# End configuration section. + +[ -f ./path.sh ] && . ./path.sh; # source the path. +echo $0 "$@" +. parse_options.sh || exit 1; + +if [ $# -ne 5 ]; then + echo "Usage: local/kws_data_prep_proxy.sh \\" + echo " " + echo " e.g.: local/kws_data_prep_proxy.sh data/lang/ data/dev10h/ \\" + echo " data/local/tmp.lang/lexiconp.txt oov_lexicon.txt data/dev10h/kws/" + echo "allowed options:" + echo " --case-sensitive # Being case-sensitive or not" + echo " --icu-transform # Transliteration for upper/lower case" + echo " # mapping" + echo " --proxy-set # Keyword set for generating proxies" + exit 1 +fi + +set -e +set -o pipefail + +langdir=$1 +datadir=$2 +l1_lexicon=$3 +l2_lexicon=$4 +kwsdatadir=$5 + +# Checks some files. +for f in $langdir/words.txt $kwsdatadir/kwlist.xml $l1_lexicon $l2_lexicon; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1 +done + +keywords=$kwsdatadir/kwlist.xml +mkdir -p $kwsdatadir/tmp/ + +cat $keywords | perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + my $data = XMLin(\*STDIN); + + #print Dumper($data->{kw}); + foreach $kwentry (@{$data->{kw}}) { + #print Dumper($kwentry); + print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; + }' > $kwsdatadir/raw_keywords_all.txt + +# Takes care of upper/lower case. +cp $langdir/words.txt $kwsdatadir/words.txt +cat $l1_lexicon | sed 's/\s/ /g' > $kwsdatadir/tmp/L1.tmp.lex +if $case_insensitive; then + echo "$0: Running case insensitive processing" + echo "$0: Using ICU with transofrm \"$icu_transform\"" + + # Processing words.txt + cat $kwsdatadir/words.txt |\ + uconv -f utf8 -t utf8 -x "${icu_transform}" > $kwsdatadir/words.norm.txt + + # Processing lexicon + cat $l2_lexicon | sed 's/\s/ /g' | cut -d ' ' -f 1 |\ + uconv -f utf8 -t utf8 -x "${icu_transform}" |\ + paste -d ' ' - <(cat $l2_lexicon | sed 's/\s/ /g' | cut -d ' ' -f 2-) \ + > $kwsdatadir/tmp/L2.tmp.lex + + paste <(cut -f 1 $kwsdatadir/raw_keywords_all.txt) \ + <(cut -f 2 $kwsdatadir/raw_keywords_all.txt |\ + uconv -f utf8 -t utf8 -x "${icu_transform}") \ + > $kwsdatadir/keywords_all.txt + cat $kwsdatadir/keywords_all.txt |\ + local/kwords2indices.pl --map-oov 0 $kwsdatadir/words.norm.txt \ + > $kwsdatadir/keywords_all.int +else + cat $l2_lexicon | sed 's/\s/ /g' > $kwsdatadir/tmp/L2.tmp.lex + cp $kwsdatadir/raw_keywords_all.txt $kwsdatadir/keywords_all.txt + + cat $kwsdatadir/keywords_all.txt | \ + sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt \ + > $kwsdatadir/keywords_all.int +fi + +# Writes some scoring related files. +cat $kwsdatadir/keywords_all.int |\ + (grep -E -v " 0 | 0$" || true) | cut -f 1 -d ' ' |\ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_invocab.xml + +cat $kwsdatadir/keywords_all.int |\ + (grep -E " 0 | 0$" || true) | cut -f 1 -d ' ' |\ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_outvocab.xml + +# Selects a set to generate proxies for. By default, generate proxies for OOV +# keywords. +if [ -z $proxy_set ]; then + cat $kwsdatadir/keywords_all.int |\ + (grep -E " 0 | 0$" || true) | awk '{print $1;}' | sort -u \ + > $kwsdatadir/keywords_proxy.list +else + cp $proxy_set $kwsdatadir/keywords_proxy.list +fi +cat $kwsdatadir/keywords_all.txt |\ + grep -f $kwsdatadir/keywords_proxy.list > $kwsdatadir/keywords_proxy.txt +cat $kwsdatadir/keywords_proxy.txt |\ + cut -f 2- | awk '{for(x=1;x<=NF;x++) {print $x;}}' |\ + sort -u > $kwsdatadir/keywords_proxy_words.list + +# Maps original phone set to a "reduced" phone set. We limit L2 to only cover +# the words that are actually used in keywords_proxy.txt for efficiency purpose. +# Besides, if L1 and L2 contains the same words, we use the pronunciation from +# L1 since it is the lexicon used for the LVCSR training. +cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 1 |\ + paste -d ' ' - <(cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 2-|\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + awk '{if(NF>=2) {print $0}}' > $kwsdatadir/tmp/L1.lex +cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 1 |\ + paste -d ' ' - <(cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 2-|\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + awk '{if(NF>=2) {print $0}}' | perl -e ' + ($lex1, $words) = @ARGV; + open(L, "<$lex1") || die "Fail to open $lex1.\n"; + open(W, "<$words") || die "Fail to open $words.\n"; + while () { + chomp; + @col = split; + @col >= 2 || die "Too few columsn in \"$_\".\n"; + $w = $col[0]; + $w_p = $_; + if (defined($lex1{$w})) { + push(@{$lex1{$w}}, $w_p); + } else { + $lex1{$w} = [$w_p]; + } + } + close(L); + while () { + chomp; + @col = split; + @col >= 2 || die "Too few columsn in \"$_\".\n"; + $w = $col[0]; + $w_p = $_; + if (defined($lex1{$w})) { + next; + } + if (defined($lex2{$w})) { + push(@{$lex2{$w}}, $w_p); + } else { + $lex2{$w} = [$w_p]; + } + } + %lex = (%lex1, %lex2); + while () { + chomp; + if (defined($lex{$_})) { + foreach $x (@{$lex{$_}}) { + print "$x\n"; + } + } + } + close(W); + ' $kwsdatadir/tmp/L1.lex $kwsdatadir/keywords_proxy_words.list \ + > $kwsdatadir/tmp/L2.lex +rm -f $kwsdatadir/tmp/L1.tmp.lex $kwsdatadir/tmp/L2.tmp.lex + +# Creates words.txt that covers all the words in L1.lex and L2.lex. We append +# new words to the original word symbol table. +max_id=`cat $kwsdatadir/words.txt | awk '{print $2}' | sort -n | tail -1`; +cat $kwsdatadir/keywords_proxy.txt |\ + awk '{for(i=2; i <= NF; i++) {print $i;}}' |\ + cat - <(cat $kwsdatadir/tmp/L2.lex | awk '{print $1;}') |\ + cat - <(cat $kwsdatadir/tmp/L1.lex | awk '{print $1;}') |\ + sort -u | \ + (grep -F -v -x -f <(cat $kwsdatadir/words.txt | awk '{print $1;}') || true)|\ + awk 'BEGIN{x='$max_id'+1}{print $0"\t"x; x++;}' |\ + cat $kwsdatadir/words.txt - > $kwsdatadir/tmp/words.txt + +# Creates keyword list that we need to generate proxies for. +cat $kwsdatadir/keywords_proxy.txt | perl -e ' + open(W, "<'$kwsdatadir/tmp/L2.lex'") || + die "Fail to open L2 lexicon: '$kwsdatadir/tmp/L2.lex'\n"; + my %lexicon; + while () { + chomp; + my @col = split(); + @col >= 2 || die "'$0': Bad line in lexicon: $_\n"; + if ('$pron_probs' eq "false") { + $lexicon{$col[0]} = scalar(@col)-1; + } else { + $lexicon{$col[0]} = scalar(@col)-2; + } + } + while (<>) { + chomp; + my $line = $_; + my @col = split(); + @col >= 2 || die "Bad line in keywords file: $_\n"; + my $len = 0; + for (my $i = 1; $i < scalar(@col); $i ++) { + if (defined($lexicon{$col[$i]})) { + $len += $lexicon{$col[$i]}; + } else { + print STEDRR "'$0': No pronunciation found for word: $col[$i]\n"; + } + } + if (($len >= '$phone_cutoff') && ($len <= '$max_phone_cutoff')){ + print "$line\n"; + } elsif ($len > '$max_phone_cutoff'){ + print STDERR "'$0': Keyword $col[0] is too long, not generating proxy\n"; + } else { + print STDERR "'$0': Keyword $col[0] is too short, not generating proxy\n"; + } + }' > $kwsdatadir/tmp/keywords.txt + +# Creates proxy keywords. +local/generate_proxy_keywords.sh \ + --cmd "$cmd" --nj "$nj" --beam "$beam" --nbest "$nbest" \ + --phone-beam $phone_beam --phone-nbest $phone_nbest \ + --confusion-matrix "$confusion_matrix" --count-cutoff "$count_cutoff" \ + --pron-probs "$pron_probs" $kwsdatadir/tmp/ +cp $kwsdatadir/tmp/keywords.fsts $kwsdatadir + +# Creates utterance id for each utterance. +cat $datadir/segments | \ + awk '{print $1}' | \ + sort | uniq | perl -e ' + $idx=1; + while(<>) { + chomp; + print "$_ $idx\n"; + $idx++; + }' > $kwsdatadir/utter_id + +# Map utterance to the names that will appear in the rttm file. You have +# to modify the commands below accoring to your rttm file +cat $datadir/segments | awk '{print $1" "$2}' |\ + sort | uniq > $kwsdatadir/utter_map; + +echo "$0: Kws data preparation succeeded" diff --git a/egs/babel/s5d/local/kws_gen_oracle_lattices.sh b/egs/babel/s5d/local/kws_gen_oracle_lattices.sh new file mode 100755 index 00000000000..b73112b191d --- /dev/null +++ b/egs/babel/s5d/local/kws_gen_oracle_lattices.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. + +# Begin configuration section. +cmd=run.pl +duptime=0.5 +model=final.mdl +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "Usage $0 [options] " + echo "" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo "" + exit 1; +fi + +lang=$1; +data=$2; +decodedir=$3; + + +kwsdatadir=$data/kws +oracledir=$decodedir/kws_oracle +mkdir -p $oracledir/log + +for filename in $lang/words.txt $decodedir/num_jobs \ + $data/text $decodedir/lat.1.gz \ + $decodedir/../$model ; do + if [[ ! -f $filename ]] ; then + echo "FATAL: File $filename does not exist!" + exit 1; + fi +done + +nj=`cat $decodedir/num_jobs` + +(cd $decodedir; ln -s ../$model final.mdl ) +(cd $oracledir; echo "$nj" > num_jobs ) + +$cmd LAT=1:$nj $oracledir/log/lat.LAT.log \ + cat $data/text \| \ + sed 's/- / /g' \| \ + sym2int.pl --map-oov '""' -f 2- $lang/words.txt \| \ + lattice-oracle --word-symbol-table=$lang/words.txt \ + --write-lattices="ark:|gzip -c > $oracledir/lat.LAT.gz" \ + "ark:gzip -cdf $decodedir/lat.LAT.gz|" ark:- ark,t:$oracledir/lat.LAT.tra; + diff --git a/egs/babel/s5d/local/kws_oracle.sh b/egs/babel/s5d/local/kws_oracle.sh new file mode 100755 index 00000000000..c7aa661664f --- /dev/null +++ b/egs/babel/s5d/local/kws_oracle.sh @@ -0,0 +1,136 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal) +# 2013 Johns Hopkins University +# Apache 2.0. + +. ./path.sh +. ./cmd.sh + +# Begin configuration section. +cmd=run.pl +acwt=0.09091 #Acoustic weight -- should not be necessary for oracle lattices +duptime=0.6 #Max time difference in which the occurences of the same KW will be seen as duplicates +text= # an alternative reference text to use. when not specified, the /text will be used +model= # acoustic model to use +extraid= # kws setup extra ID (kws task was setup using kws_setup.sh --extraid +stage=0 # to resume the computation from different stage +# End configuration section. + +set -e +set -o pipefail + +echo "$0 $@" # Print the command line for logging + + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "Usage $0 [options] " + echo "" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --text #The alternative text file in the format SEGMENT W1 W2 W3..., " + echo " #The default text file is taken from /text" + echo "" + exit 1; +fi + +lang=$1; +data=$2; +decodedir=$3; + +if [ -z $text ] ; then + text=$data/text +fi + +if [ -z "$model" ]; then # if --model was not specified on the command line... + srcdir=`dirname $decodedir`; # The model directory is one level up from decoding directory. + model=$srcdir/final.mdl; +fi + +if [ -z $extraid ] ; then # the same logic as with kws_setup.sh + kwsdatadir=$data/kws +else + kwsdatadir=$data/${extraid}_kws +fi + +nj=`cat $decodedir/num_jobs`; + +oracledir=$decodedir/kws_oracle +mkdir -p $oracledir +mkdir -p $oracledir/log + +if [ $stage -le 0 ] ; then + echo "$nj" > $oracledir/num_jobs + $cmd LAT=1:$nj $oracledir/log/oracle_lat.LAT.log \ + cat $text \| \ + sed 's/- / /g' \| \ + sym2int.pl --map-oov '""' -f 2- $lang/words.txt \| \ + lattice-oracle --word-symbol-table=$lang/words.txt \ + --write-lattices="ark:|gzip -c > $oracledir/lat.LAT.gz" \ + "ark:gzip -cdf $decodedir/lat.LAT.gz|" ark:- ark,t:$oracledir/lat.LAT.tra; +fi + +if [ $stage -le 1 ] ; then + steps/make_index.sh --cmd "$cmd" --acwt $acwt --model $model \ + $kwsdatadir $lang $oracledir $oracledir +fi + +if [ $stage -le 2 ] ; then + steps/search_index.sh --cmd "$cmd" $kwsdatadir $oracledir +fi + +if [ $stage -le 3 ]; then + + #TODO: this stage should be probably moved in a single script file + # and used accross all the kw search scripts + duration=`head -1 $kwsdatadir/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + grep -o -E "[0-9]*[\.]*[0-9]*" |\ + perl -e 'while(<>) {print $_/2;}'` + + + cat $oracledir/result.* | \ + utils/write_kwslist.pl --flen=0.01 --duration=$duration \ + --segments=$data/segments --normalize=true --duptime=$duptime\ + --map-utter=$kwsdatadir/utter_map --remove-dup=true \ + - $oracledir/kwslist_orig.xml + + #This does not do much -- just adds empty entries for keywords for which + #not even one occurence has not been found + local/fix_kwslist.pl $kwsdatadir/kwlist.xml $oracledir/kwslist_orig.xml $oracledir/kwslist.xml +fi + + +if [ $stage -le 4 ]; then + #As there is a missing functionality in the F4DE for scoring + #subsets of the original set, lets keep this commented out. + #Alternatively:TODO: write a filter_kwslist.pl script + #That will produce kwslist on a basis of given kwlist.xml subset + + local/kws_score_f4de.sh `dirname $kwsdatadir` $oracledir + #-local/kws_score_f4de.sh --kwlist $kwsdatadir/kwlist_outvocab.xml \ + #- --f4de-prefix outvocab `dirname $kwsdatadir` $oracledir || exit 1 + #-local/kws_score_f4de.sh --kwlist $kwsdatadir/kwlist_invocab.xml \ + #- --f4de-prefix invocab `dirname $kwsdatadir` $oracledir || exit 1 + + echo "=======================================================" + ( + echo -n "ATWV-full " + grep Occurrence $oracledir/sum.txt | cut -d '|' -f 13 + ) + + #-( + #-echo -n "ATWV-invocab " + #-grep Occurrence $oracledir/invocab.sum.txt | cut -d '|' -f 13 + #-) || echo "Error occured getting the invocab results" + + #-( + #-echo -n "ATWV-outvocab " + #-grep Occurrence $oracledir/outvocab.sum.txt | cut -d '|' -f 13 + #-) || echo "Error occured getting the outvocab results" + + echo "=======================================================" +fi diff --git a/egs/babel/s5d/local/kws_oracle_threshold.pl b/egs/babel/s5d/local/kws_oracle_threshold.pl new file mode 100755 index 00000000000..e1dc153767e --- /dev/null +++ b/egs/babel/s5d/local/kws_oracle_threshold.pl @@ -0,0 +1,200 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +sub KeywordSort { + if ($a->[0] ne $b->[0]) { + $b->[0] <=> $a->[0]; + } else { + $b->[1] <=> $a->[1]; + } +} + +my $Usage = < + e.g.: kws_oracle_threshold.pl alignment.csv + +Allowed options: + --beta : Beta value when computing ATWV (float, default = 999.9) + --duration : Duration of all audio, you must set this (float, default = 999.9) + +EOU + +my $beta = 999.9; +my $duration = 999.9; +GetOptions( + 'beta=f' => \$beta, + 'duration=f' => \$duration); + +@ARGV == 1 || die $Usage; + +# Works out the input/output source. +my $alignment_in = shift @ARGV; + +# Hash alignment file. For each instance we store a 3-dimension vector: +# [score, ref, res] +# where "score" is the confidence of that instance, "ref" equals 0 means there's +# no reference at that place and 1 means there's corresponding reference, "res" +# 0 means the instance is not considered when scoring, 1 means it's a false +# alarm and 2 means it's a true hit. +open(A, "<$alignment_in") || die "$0: Fail to open alignment file: $alignment_in\n"; +my %Ntrue; +my %keywords; +my %alignment; +my $lattice_miss = 0; +my $lattice_ref = 0; +my %keywords_lattice_miss; +my %keywords_lattice_ref; +while () { + chomp; + my @col = split(','); + @col == 12 || die "$0: Bad number of columns in $alignment_in: $_\n"; + + # First line of the csv file. + if ($col[11] eq "alignment") {next;} + + # Instances that do not have corresponding references. + if ($col[11] eq "CORR!DET" || $col[11] eq "FA") { + if (!defined($alignment{$col[3]})) { + $alignment{$col[3]} = []; + } + my $ref = 0; + my $res = 0; + if ($col[11] eq "FA") { + $res = 1; + } + push(@{$alignment{$col[3]}}, [$col[9], $ref, $res]); + next; + } + + # Instances that have corresponding references. + if ($col[11] eq "CORR" || $col[11] eq "MISS") { + if (!defined($alignment{$col[3]})) { + $alignment{$col[3]} = []; + $Ntrue{$col[3]} = 0; + $keywords_lattice_miss{$col[3]} = 0; + $keywords_lattice_ref{$col[3]} = 0; + } + my $ref = 1; + my $res = 0; + if ($col[10] ne "") { + if ($col[11] eq "CORR") { + $res = 2; + } + push(@{$alignment{$col[3]}}, [$col[9], $ref, $res]); + } + $Ntrue{$col[3]} += 1; + $keywords{$col[3]} = 1; + + # The following is for lattice recall and STWV. + $lattice_ref ++; + $keywords_lattice_ref{$col[3]} ++; + if ($col[11] eq "MISS" && $col[10] eq "") { + $lattice_miss ++; + $keywords_lattice_miss{$col[3]} ++; + } + next; + } +} +close(A); + +# Works out the oracle ATWV by sweeping the threshold. +my $atwv = 0.0; +my $otwv = 0.0; +my %mtwv_sweep; +foreach my $kwid (keys %keywords) { + # Sort the instances by confidence score. + my @instances = sort KeywordSort @{$alignment{$kwid}}; + my $local_otwv = 0.0; + my $max_local_otwv = 0.0; + my $local_atwv = 0.0; + my $active_otwv_threshold = ""; + foreach my $instance (@instances) { + my @ins = @{$instance}; + my $gain = 1.0 / $Ntrue{$kwid}; + my $cost = $beta / ($duration - $Ntrue{$kwid}); + # OTWV. + if ($local_otwv > $max_local_otwv && + $active_otwv_threshold ne "" && $active_otwv_threshold != $ins[0]) { + $max_local_otwv = $local_otwv; + } + if ($ins[1] == 1) { + $local_otwv += $gain; + } else { + $local_otwv -= $cost; + } + $active_otwv_threshold = $ins[0]; + if ($active_otwv_threshold == 1.0) { + # If score = 1.0, we always accept the instance as YES. + $max_local_otwv = $local_otwv; + } + + # ATWV. + if ($ins[2] == 1) { + $local_atwv -= $cost; + } elsif ($ins[2] == 2) { + $local_atwv += $gain; + } + + # MTWV. + for (my $threshold = 0.000; $threshold <= $ins[0]; $threshold += 0.001) { + if ($ins[1] == 1) { + $mtwv_sweep{$threshold} += $gain; + } else { + $mtwv_sweep{$threshold} -= $cost; + } + } + } + if ($local_otwv > $max_local_otwv) { + $max_local_otwv = $local_otwv; + } + $atwv += $local_atwv; + $otwv += $max_local_otwv; +} + +# Works out the MTWV. +my $mtwv = 0.0; +my $mtwv_threshold = 0.0; +for my $threshold (keys %mtwv_sweep) { + if ($mtwv_sweep{$threshold} > $mtwv) { + $mtwv = $mtwv_sweep{$threshold}; + $mtwv_threshold = $threshold; + } +} + +# Works out the STWV. +my $stwv = 0.0; +for my $kw (keys %keywords_lattice_miss) { + $stwv += $keywords_lattice_miss{$kw} / $keywords_lattice_ref{$kw}; +} +$stwv = 1 - $stwv / scalar(keys %keywords); + +$atwv /= scalar(keys %keywords); +$atwv = sprintf("%.4f", $atwv); +$otwv /= scalar(keys %keywords); +$otwv = sprintf("%.4f", $otwv); +$mtwv /= scalar(keys %keywords); +$mtwv = sprintf("%.4f", $mtwv); +my $lattice_recall = 1 - $lattice_miss / $lattice_ref; +$lattice_recall = sprintf("%.4f", $lattice_recall); +$stwv = sprintf("%.4f", $stwv); +print "ATWV = $atwv\n"; +print "OTWV = $otwv\n"; +print "STWV = $stwv\n"; +print "MTWV = $mtwv, THRESHOLD = $mtwv_threshold\n"; +print "Lattice Recall = $lattice_recall\n"; diff --git a/egs/babel/s5d/local/kws_score.sh b/egs/babel/s5d/local/kws_score.sh new file mode 120000 index 00000000000..9b896c530a7 --- /dev/null +++ b/egs/babel/s5d/local/kws_score.sh @@ -0,0 +1 @@ +kws_score_f4de.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/kws_score_f4de.sh b/egs/babel/s5d/local/kws_score_f4de.sh new file mode 100755 index 00000000000..4f79e1925a9 --- /dev/null +++ b/egs/babel/s5d/local/kws_score_f4de.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +# case_insensitive=true +extraid= +kwlist= +ecf= +rttm= +f4de_prefix= +# End configuration section. + +help_message="$0: score the kwslist using the F4DE scorer from NIST + Example: + $0 [additional-parameters] + where the most important additional parameters can be: + --extraid #for using, when a non-default kws tasks are setup + (using the kws_setup.sh --extraid) for a kaldi-single data-dir + --kwlist #allows for an alternative kwlist -- if not set, the default + kwlist is taken from + --f4de-prefix #allows for scoring the same results using + different kwlists and storing them in the same dir " + +echo $0 $@ +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 2 ]; then + printf "FATAL: incorrect number of variables given to the script\n\n" + printf "$help_message\n" + exit 1; +fi + +if [ -z $extraid ] ; then + kwsdatadir=$1/kws +else + kwsdatadir=$1/${extraid}_kws +fi +kwsoutputdir="$2/" + +if [ -z $kwlist ] ; then + kwlist=$kwsdatadir/kwlist.xml +fi + +if [ -z $rttm ] ; then + rttm=$kwsdatadir/rttm +fi + +if [ -z $ecf ] ; then + ecf=$kwsdatadir/ecf.xml +fi + +if [ ! -z ${f4de_prefix} ] ; then + f4de_prefix="/${f4de_prefix}" +fi + +if [[ ! -d "$kwsdatadir" ]] ; then + echo "FATAL: the KWS input data directory does not exist!" + exit 1; +fi + +for file in $ecf $rttm $kwlist ; do + if [[ ! -f "$file" ]] ; then + echo "FATAL: file $file does not exist!" + exit 1; + fi +done + +echo KWSEval -e $ecf -r $rttm -t $kwlist \ + -s $kwsoutputdir/kwslist.xml -c -o -b -d -f $kwsoutputdir + +if [ -f $kwsdatadir/categories ]; then + if ! grep -q "NGramOrder" "$kwlist"; then + cat $kwlist | local/search/annotate_kwlist.pl $kwsdatadir/categories > $kwsoutputdir/kwlist.xml + kwlist=$kwsoutputdir/kwlist.xml + elif ! grep -q "Characters" "$kwlist"; then + cat $kwlist | local/search/annotate_kwlist.pl $kwsdatadir/categories > $kwsoutputdir/kwlist.xml + kwlist=$kwsoutputdir/kwlist.xml + fi +fi + +KWSEval -e $ecf -r $rttm -t $kwlist -a --zGlobalMeasures MAP \ + --zGlobalMeasures MAPpct --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -s $kwsoutputdir/kwslist.xml -c -o -b -d -f ${kwsoutputdir}${f4de_prefix} || exit 1; + +duration=`cat ${kwsoutputdir}${f4de_prefix}/sum.txt | grep TotDur | cut -f 3 -d '|' | sed "s/\s*//g"` + +local/kws_oracle_threshold.pl --duration $duration ${kwsoutputdir}${f4de_prefix}/alignment.csv > ${kwsoutputdir}${f4de_prefix}/metrics.txt + +exit 0; + + diff --git a/egs/babel/s5d/local/kws_search.sh b/egs/babel/s5d/local/kws_search.sh new file mode 100755 index 00000000000..39177e8a4c5 --- /dev/null +++ b/egs/babel/s5d/local/kws_search.sh @@ -0,0 +1,230 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + + +help_message="$(basename $0): do keyword indexing and search. data-dir is assumed to have + kws/ subdirectory that specifies the terms to search for. Output is in + decode-dir/kws/ + Usage: + $(basename $0) " + +# Begin configuration section. +#acwt=0.0909091 +min_lmwt=7 +max_lmwt=17 +duptime=0.6 +cmd=run.pl +model= +skip_scoring=false +skip_optimization=false # true can speed it up if #keywords is small. +max_states=150000 +indices_dir= +kwsout_dir= +stage=0 +word_ins_penalty=0 +extraid= +silence_word= # specify this if you did to in kws_setup.sh, it's more accurate. +ntrue_scale=1.0 +nbest=900 +max_silence_frames=50 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e +set -o pipefail + + +if [[ "$#" -ne "3" ]] ; then + echo -e "$0: FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +silence_opt= + +langdir=$1 +datadir=$2 +decodedir=$3 + +if [ -z $extraid ] ; then + kwsdatadir=$datadir/kws +else + kwsdatadir=$datadir/${extraid}_kws +fi + +if [ -z $kwsout_dir ] ; then + if [ -z $extraid ] ; then + kwsoutdir=$decodedir/kws + else + kwsoutdir=$decodedir/${extraid}_kws + fi +else + kwsoutdir=$kwsout_dir +fi +mkdir -p $kwsoutdir + +if [ -z $indices_dir ]; then + indices_dir=$kwsoutdir +fi + +for d in "$datadir" "$kwsdatadir" "$langdir" "$decodedir"; do + if [ ! -d "$d" ]; then + echo "$0: FATAL: expected directory $d to exist" + exit 1; + fi +done +if [[ ! -f "$kwsdatadir/ecf.xml" ]] ; then + echo "$0: FATAL: the $kwsdatadir does not contain the ecf.xml file" + exit 1; +fi + +echo $kwsdatadir +duration=`head -1 $kwsdatadir/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` + +#duration=`head -1 $kwsdatadir/ecf.xml |\ +# grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ +# grep -o -E "[0-9]*[\.]*[0-9]*" |\ +# perl -e 'while(<>) {print $_/2;}'` + +echo "Duration: $duration" + +if [ ! -z "$model" ]; then + model_flags="--model $model" +else + model_flags= +fi + +frame_subsampling_factor=1 +if [ -f $decodedir/../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../frame_subsampling_factor) + echo "Frame subsampling factor autodetected: $frame_subsampling_factor" +fi + +if [ $stage -le 0 ] ; then + if [ ! -f $indices_dir/.done.index ] ; then + [ ! -d $indices_dir ] && mkdir $indices_dir + for lmwt in `seq $min_lmwt $max_lmwt` ; do + indices=${indices_dir}_$lmwt + mkdir -p $indices + + acwt=`perl -e "print (1.0/$lmwt);"` + [ ! -z $silence_word ] && silence_opt="--silence-word $silence_word" + steps/make_index.sh $silence_opt --cmd "$cmd" --acwt $acwt $model_flags\ + --skip-optimization $skip_optimization --max-states $max_states \ + --word-ins-penalty $word_ins_penalty --max-silence-frames $max_silence_frames\ + --frame-subsampling-factor ${frame_subsampling_factor} \ + $kwsdatadir $langdir $decodedir $indices || exit 1 + done + touch $indices_dir/.done.index + else + echo "Assuming indexing has been aready done. If you really need to re-run " + echo "the indexing again, delete the file $indices_dir/.done.index" + fi +fi + + +if [ $stage -le 1 ]; then + for lmwt in `seq $min_lmwt $max_lmwt` ; do + kwsoutput=${kwsoutdir}_$lmwt + indices=${indices_dir}_$lmwt + mkdir -p $kwsoutdir + local/search_index.sh --cmd "$cmd" --indices-dir $indices \ + --strict false --frame-subsampling-factor ${frame_subsampling_factor}\ + $kwsdatadir $kwsoutput || exit 1 + + nj=`cat $indices/num_jobs` + #this will truncate the file + rm -f $kwsoutput/results; touch $kwsoutput/results + + # This is a memory-efficient way how to do the filtration + # we do this in this way because the result.* files can be fairly big + # and we do not want to run into troubles with memory + #% files="" + #% for job in `seq 1 $nj`; do + #% if [ -f $kwsoutput/results.${job}.gz ] ; then + #% files="$files <(gunzip -c $kwsoutput/results.${job}.gz)" + #% elif[ -f $kwsoutput/results.${job} ] ; then + #% files="$files $kwsoutput/results.${job}" + #% else + #% echo >&2 "The file $kwsoutput/results.${job}[.gz] does not exist" + #% return 1 + #% fi + #% done + #% sort -m -u $files | local/search/filter_kws_results.pl --nbest $nbest |\ + #% sort -u > $kwsoutput/results + + # this is similar to the previous code -- should produce the same + # results (albeit more slowly as it's relying on temporary files + # the memory requirements are extremely limited + # I decided to go for this as the previous code does rely + # on the assumption the partial result files are sorted. + # that is not true for the older generation of pipeline + for job in `seq 1 $nj`; do + { + if [ -f $kwsoutput/result.${job}.gz ]; then + gunzip -c $kwsoutput/result.${job}.gz + else + cat $kwsoutput/result.${job} + fi + } | cat - $kwsoutput/results | \ + local/search/filter_kws_results.pl --nbest $nbest | \ + sort -u > $kwsoutput/results.${job} + mv $kwsoutput/results.${job} $kwsoutput/results + done + + done + + +fi + +if [ $stage -le 2 ]; then + echo "Writing unnormalized results" + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_unnormalized.LMWT.log \ + set -e ';' set -o pipefail ';'\ + cat ${kwsoutdir}_LMWT/results \| sort -u \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \ + --segments=$datadir/segments --normalize=false --duptime=$duptime --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map\ + - ${kwsoutdir}_LMWT/kwslist.unnormalized.xml || exit 1; +fi + +if [ $stage -le 3 ]; then + echo "Writing normalized results" + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_normalized.LMWT.log \ + set -e ';' set -o pipefail ';'\ + cat ${kwsoutdir}_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \ + --segments=$datadir/segments --normalize=true --duptime=$duptime --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map --digits=3\ + - ${kwsoutdir}_LMWT/kwslist.xml || exit 1 +fi + + +if [ -z $extraid ] ; then + extraid_flags= +else + extraid_flags=" --extraid ""$extraid"" " +fi + +if [ $stage -le 4 ]; then + if [[ (! -x local/kws_score.sh ) ]] ; then + echo "Not scoring, because the file local/kws_score.sh is not present" + elif [[ $skip_scoring == true ]] ; then + echo "Not scoring, because --skip-scoring true was issued" + else + echo "Scoring KWS results" + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/scoring.LMWT.log \ + local/kws_score.sh $extraid_flags $datadir ${kwsoutdir}_LMWT || exit 1; + fi +fi + +exit 0 diff --git a/egs/babel/s5d/local/kws_setup.sh b/egs/babel/s5d/local/kws_setup.sh new file mode 100755 index 00000000000..93513a56d94 --- /dev/null +++ b/egs/babel/s5d/local/kws_setup.sh @@ -0,0 +1,158 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +cmd=run.pl +case_insensitive=true +subset_ecf= +rttm_file= +extraid= +use_icu=true +icu_transform="Any-Lower" +kwlist_wordlist=false +langid=107 +annotate=true +silence_word= # Optional silence word to insert (once) between words of the transcript. +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +set -e +set -u +set -o pipefail + +help_message="$0: Initialize and setup the KWS task directory +Usage: + $0 [rttm-file] +allowed switches: + --subset-ecf /path/to/filelist # The script will subset the ecf file + # to contain only the files from the filelist + --rttm-file /path/to/rttm # the preferred way how to specify the rttm + # the older way (as an in-line parameter is + # obsolete and will be removed in near future + --case-insensitive # Shall we be case-sensitive or not? + # Please not the case-sensitivness depends + # on the shell locale! + --annotate + --use-icu # Use the ICU uconv binary to normalize casing + --icu-transform # When using ICU, use this transliteration + --kwlist-wordlist # The file with the list of words is not an xml + " + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. utils/parse_options.sh || exit 1; + +if [ "$#" -ne "5" ] && [ "$#" -ne "4" ] ; then + printf "FATAL: invalid number of arguments.\n\n" + printf "$help_message\n" + exit 1 +fi + +ecf_file=$1 +kwlist_file=$2 +if [ "$#" -eq "5" ] ; then + rttm_file=$3 + langdir=$4 + datadir=$5 +else + langdir=$3 + datadir=$4 +fi + +# don't quote rttm_file as it's valid for it to be empty. +for filename in "$ecf_file" "$kwlist_file" $rttm_file; do + echo $filename + if [ ! -e $filename ] ; then + printf "FATAL: filename \'$filename\' does not refer to a valid file\n" + printf "$help_message\n" + exit 1; + fi +done +for dirname in "$langdir" "$datadir" ; do + if [ ! -d $dirname ] ; then + printf "FATAL: dirname \'$dirname\' does not refer to a valid directory\n" + printf "$help_message\n" + exit 1; + fi +done + +if [ ! -z $extraid ]; then + kwsdatadir=$datadir/${extraid}_kws +else + kwsdatadir=$datadir/kws +fi + +mkdir -p $kwsdatadir + +if [ -z $subset_ecf ] ; then + test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml + cp "$ecf_file" $kwsdatadir/ecf.xml || exit 1 +else + local/make_ecf_subset.sh $subset_ecf $ecf_file > $kwsdatadir/ecf.xml +fi + +if $kwlist_wordlist ; then +( + echo '' + awk '{ printf(" \n", $1); + printf(" "); for (n=2;n<=NF;n++){ printf("%s", $n); if(n\n"); + printf(" \n"); }' < ${kwlist_file} + # while read line; do + # id_str=`echo $line | cut -f 1 -d ' '` + # kw_str=`echo $line | cut -f 2- -d ' '` + # echo " " + # echo " $kw_str" + # echo " " + # done < ${kwlist_file} + echo '' +) > $kwsdatadir/kwlist.xml || exit 1 +else + test -f $kwsdatadir/kwlist.xml && rm -f $kwsdatadir/kwlist.xml + cp "$kwlist_file" $kwsdatadir/kwlist.xml || exit 1 +fi + +if [ ! -z $rttm_file ] ; then + test -f $kwsdatadir/rttm && rm -f $kwsdatadir/rttm + cp "$rttm_file" $kwsdatadir/rttm || exit 1 +fi + +sil_opt= +[ ! -z $silence_word ] && sil_opt="--silence-word $silence_word" +local/kws_data_prep.sh --case-insensitive ${case_insensitive} \ + $sil_opt --use_icu ${use_icu} --icu-transform "${icu_transform}" \ + $langdir $datadir $kwsdatadir || exit 1 + +if $annotate ; then + set -x + rm -f $kwsdatadir/kwlist.xml + cat $kwsdatadir/keywords.txt | local/search/create_categories.pl | local/search/normalize_categories.pl > $kwsdatadir/categories + cat "$kwlist_file" | local/search/annotate_kwlist.pl $kwsdatadir/categories > $kwsdatadir/kwlist.xml || exit 1 +fi +#~ ( +#~ echo '' +#~ while read line; do +#~ id_str=`echo $line | cut -f 1 -d ' '` +#~ kw_str=`echo $line | cut -f 2- -d ' '` +#~ echo " " +#~ echo " $kw_str" +#~ echo " " +#~ done < ${kwlist_file} +#~ echo '' +#~ ) > $kwsdatadir/kwlist.xml || exit 1 +#~ +#-( +#-echo '' +#-id=1 +#-while read line; do +#- id_str=$( printf "KWS$langid-%04d\n" $id ) +#- echo " " +#- echo " $line" +#- echo " " +#- id=$(( $id + 1 )) +#-done < ${kwlist_file} +#-echo '' +#-) > $kwsdatadir/kwlist.xml || exit 1 +#- diff --git a/egs/babel/s5d/local/lattice_to_ctm.sh b/egs/babel/s5d/local/lattice_to_ctm.sh new file mode 100755 index 00000000000..5fbde42d237 --- /dev/null +++ b/egs/babel/s5d/local/lattice_to_ctm.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. + +# begin configuration section. +cmd=run.pl +stage=0 +decode_mbr=true +beam=5 +word_ins_penalty=0.5 +min_lmwt=7 +max_lmwt=17 +model= + +#end configuration section. + +#debugging stuff +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1) # (createCTM | filterCTM )." + exit 1; +fi + +data=$1 +lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. +dir=$3 + +if [ -z "$model" ] ; then + model=`dirname $dir`/final.mdl # Relative path does not work in some cases + #model=$dir/../final.mdl # assume model one level up from decoding dir. + #[ ! -f $model ] && model=`(set +P; cd $dir/../; pwd)`/final.mdl +fi + + +for f in $lang/words.txt $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + +name=`basename $data`; # e.g. eval2000 + +mkdir -p $dir/scoring/log + +if [ $stage -le 0 ]; then + if [ ! -f $lang/phones/word_boundary.int ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + fi +fi + +if [ $stage -le 1 ]; then + # Remove some stuff we don't want to score, from the ctm. + for x in $dir/score_*/$name.ctm; do + cp $x $x.bkup1; + cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ + grep -v -E '|%HESITATION|\(\(\)\)' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + perl -e '@list = (); %list = (); + while(<>) { + chomp; + @col = split(" ", $_); + push(@list, $_); + $key = "$col[0]" . " $col[1]"; + $list{$key} = 1; + } + foreach(sort keys %list) { + $key = $_; + foreach(grep(/$key/, @list)) { + print "$_\n"; + } + }' > $x; + done +fi + + +echo "Lattice2CTM finished on " `date` +exit 0 diff --git a/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py new file mode 100755 index 00000000000..3670ba755bc --- /dev/null +++ b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py @@ -0,0 +1,470 @@ +#!/usr/bin/env python + +# Copyright 2016 Johns Hopkins University (Author: Matthew Wiesner) +# Apache 2.0 + +# ============ Make unicode-based graphemic lexicon ============= +# +# This script takes a list of either words or words and corresponding +# morphemes and returns a kaldi format lexicon. +# =============================================================== + +# Import Statements + +from __future__ import print_function +import codecs +import argparse +import unicodedata +import os +import re +import sys +import numpy as np + + +def main(): + args = parse_input() + baseforms = get_word_list(args.lex_in, args.fmt) + unicode_transcription = baseform2unicode(baseforms) + encoded_transcription, table = encode(unicode_transcription, + args.tag_percentage, + log=args.log) + write_table(table, args.lex_out) + + # Extract dictionary of nonspeech pronunciations + try: + nonspeech = {} + with codecs.open(args.nonspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + nonspeech[line_vals[0]] = line_vals[1] + except (IOError, TypeError): + pass + + # Extract dictionary of extraspeech pronunciations (normally ) + try: + extraspeech = {} + with codecs.open(args.extraspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + extraspeech[line_vals[0]] = line_vals[1] + except (IOError, TypeError): + pass + + write_lexicon(baseforms, encoded_transcription, args.lex_out, + nonspeech=nonspeech, extraspeech=extraspeech) + + +def parse_input(): + ''' + Parse commandline input. + ''' + if len(sys.argv[1:]) == 0: + print("Usage: ./make_unicode_lexicon.py [opts] lex_in lex_out [log]") + sys.exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument("lex_in", help="Path of input word list optionally " + "paired with a baseform. 1 word per line with the " + "baseform separated by a tab") + parser.add_argument("lex_out", help="Path of output output " + "graphemic lexicon") + parser.add_argument("log", nargs='?', default=None, + help="Directory in which the logs will be stored"); + parser.add_argument("-F", "--fmt", help="Format of input word list", + action="store", default="word_list") + parser.add_argument("-T", "--tag_percentage", help="Percentage of least" + " frequently occurring graphemes to be tagged", + type=float, action="store", default=0.1) + parser.add_argument("--nonspeech", help="File with map of nonspeech words" + " and pronunciations", action="store", default=None) + parser.add_argument("--extraspeech", help="File with map of extra speech" + " words", action="store", default=None) + parser.add_argument("-V", "--verbose", help="Include useful print outs", + action="store_true") + args = parser.parse_args() + return args + + +def _read_word_list_line(line): + try: + count, word = line.strip().split(None, 1) + float(count) + return word + except ValueError: + return line.strip() + + +def get_word_list(input_file, fmt): + ''' + Read from input file the words and potential baseforms. + + Arguments: input_file -- path to the input word list + fmt -- format of input word list ["word_list", "morfessor"] + Output: + words -- list of tuples (word, baseform) + ''' + with codecs.open(input_file, "r", "utf-8") as f: + if fmt == "word_list" or fmt is None: + words = [] + for line in f: + w = _read_word_list_line(line) + words.append((w, w)) + assert "." not in w, "FORMAT ERROR. Use --fmt [-F] morfessor" + elif fmt == "morfessor": + words = [] + for line in f: + w, bf = line.strip().split(None, 1) + words.append((w, bf)) + else: + sys.exit("Error: Bad input format name") + + return words + + +def baseform2unicode(baseforms): + ''' + Convert each baseform in the list, baseforms, to a parsed unicode + description stored as a list of lists of dictionaries. + + unicode_transcription = [ + [{'NAME':'word1_grapheme1','FIELD1':'FIELD1_VAL',...}, + {'NAME':'word1_grapheme2','FIELD1':'FIELD1_VAL',...},...], + [{'NAME':'word2_grapheme1,'FIELD1:'FIELD1_VAL',...}, + {},...] + ,...,[]] + + Arguments: + baseforms -- List of tuples (word, baseform) + e.g. baseforms = get_word_list() + + Output: + unicode_transcription -- See above description + ''' + + # Regular expression for parsing unicode descriptions + pattern = re.compile( + r"(?P[^\s]+)\s" + r"(?PSMALL\s|CAPITAL\s)?(?P" + "(?:SUBJOINED )?LETTER |(?:INDEPENDENT VOWEL )" + r"|(?:VOWEL SIGN )|VOWEL |SIGN " + r"|CHARACTER |JONGSEONG |CHOSEONG |SYMBOL |MARK |DIGIT " + r"|SEMIVOWEL |TONE |SYLLABLE |LIGATURE |KATAKANA )" + r"(?P((?!WITH).)+)" + r"(?PWITH .+)?" + ) + + # For each graphemic baseform generate a parsed unicode description + unicode_transcription = [] + for w, bf in baseforms: + # Initialize empty list of words + baseform_transcription = [] + # For each grapheme parse the unicode description + for graph in bf: + unicode_desc = unicodedata.name(graph) + # Use the canonical unicode decomposition + tags = unicodedata.normalize('NFD', graph) + match_obj = pattern.match(unicode_desc) + + # Grapheme's unicode description is non-standard + if(not match_obj): + # Underscore, dash, hastag have special meaning + if(graph in ("_", "-", "#")): + graph_dict = { + 'CHAR_TYPE': 'LINK', + 'SYMBOL': graph, + 'NAME': graph + } + # The grapheme is whitespace + elif(unicode_desc in ("ZERO WIDTH SPACE", + "ZERO WIDTH NON-JOINER", + "ZERO WIDTH JOINER", + "SPACE")): + # Ignore whitespace + continue + else: + graph_dict = {'SYMBOL': graph, 'NAME': 'NOT_FOUND'} + + # Grapheme's unicode description is standard + else: + graph_dict = match_obj.groupdict() + graph_dict["SYMBOL"] = graph + # Add tags to dictionary (The first element of tags is actually + # the base grapheme, so we only check all tags after the first. + if(len(tags) > 1): + for i, t in enumerate(tags[1:]): + graph_dict["TAG" + str(i)] = unicodedata.name(t) + + # Add grapheme unicode description dictionary to baseform list + baseform_transcription.append(graph_dict) + # Add baseform transcription to unicode transcription list + unicode_transcription.append(baseform_transcription) + return unicode_transcription + + +def encode(unicode_transcription, tag_percentage, log=False): + ''' + Arguments: + unicode_transcription -- a list of words whose graphemes are + respresented as a list of dictionaries whose + fields contain information about parsed + unicode descriptions. + + tag_percentage -- percent of least frequent graphemes to tag + log -- optional printing + + Outputs: + Lexicon -- Encoded baseforms + ''' + # Constants + VOWELS = "AEIOU" + SKIP = "/()" + + graphemes = [] + table = [] + encoded_transcription = [] + # Accumulate grapheme statistics over corpus at some point. For now just + # use the lexicon word list. For estimating grapheme frequency this is + # probably sufficient since we have many words each with many + # graphemes. We do unfortunately have to assume that case does not matter. + # We do not count dashes, underscores, parentheses, etc. . Just letters. + graph_list = [] + for w in unicode_transcription: + for graph in w: + if graph["SYMBOL"] not in "()\/,-_#.": + graph_list.append(graph["SYMBOL"].lower()) + + graph2int = {v: k for k, v in enumerate(set(graph_list))} + int2graph = {v: k for k, v in graph2int.items()} + graph_list_int = [graph2int[g] for g in graph_list] + bin_edges = range(0, len(int2graph.keys()) + 1) + graph_counts = np.histogram(graph_list_int, bins=bin_edges)[0] / float(len(graph_list_int)) + # Set count threshold to frequency that tags the bottom 10% of graphemes + bottom_idx = int(np.floor(tag_percentage * len(graph_counts))) + count_thresh = sorted(graph_counts)[bottom_idx] + graph_counts_dict = {} + for i, count in enumerate(graph_counts): + graph_counts_dict[int2graph[i]] = count + + graph_counts = graph_counts_dict + + # Print grapheme counts to histogram + if log is not None: + graph_counts_sorted = sorted(graph_counts, reverse=True, + key=graph_counts.get) + logfile = "{}/grapheme_histogram.txt".format(log) + with codecs.open(logfile, "w", "utf-8") as fp: + fp.write("Graphemes (Count Threshold = %.6f)\n" % count_thresh) + for g in graph_counts_sorted: + weight = ("-" * int(np.ceil(500.0 * graph_counts[g])) + + " %.6f\n" % graph_counts[g]) + fp.write("%s -" % (g) + weight) + + # Find a new baseform for each word + for w in unicode_transcription: + word_transcription = "" + + # Find a "pronunciation" for each grapheme in the word + for graph in w: + # Case 1: Check that the grapheme has a unicode description type + # --------------------------------------------------------------- + if("CHAR_TYPE" not in [k.strip() for k in graph.keys()]): + if(graph["SYMBOL"] == "."): + graph["MAP0"] = "\t" + if word_transcription[-1] == " ": + word_transcription = word_transcription[:-1] + "\t" + + elif(graph["SYMBOL"] not in SKIP): + graph["MAP0"] = graph["SYMBOL"].lower() + word_transcription += graph["MAP0"] + " " + + # Case 2: Standard Grapheme + # --------------------------------------------------------------- + elif(graph["CHAR_TYPE"].strip() in + ("LETTER", "VOWEL", "VOWEL SIGN", "SIGN")): + # Backoff diacritics + base_grapheme = graph["NAME"].strip().replace(" ", "-").lower() + graph["MAP0"] = _backoff_diacritics(graph["SYMBOL"].lower(), + base_grapheme, + graph_counts, + count_thresh) + # Add final space + word_transcription += graph["MAP0"] + " " + + # Case 3: Syllable (Assume consonant vowel pattern) + # This is basically just here for Amharic + # ---------------------------------------------------------------- + elif(graph["CHAR_TYPE"].strip() == "SYLLABLE"): + # Multi-word description + if(len(graph["NAME"].strip().split(' ')) > 1): + g_name = graph["NAME"].strip().replace(" ", "-").lower() + graph["MAP0"] = g_name + word_transcription += graph["MAP0"] + "\t" + + # Consonant Vowel Pattern + else: + cv_pattern = (r"(?P[^%s]*)(?P[%s]+)" % + (VOWELS, VOWELS)) + parsed_graph = re.match(cv_pattern, graph["NAME"]) + if(not parsed_graph): + sys.exit("Syllable did not obey" + "consonant-vowel pattern.") + + graph_dict = parsed_graph.groupdict() + + # Get consonant if it exists + if("CONSONANT" in graph_dict.keys() and + graph_dict["CONSONANT"]): + graph["MAP0"] = graph_dict["CONSONANT"].lower() + word_transcription += graph["MAP0"] + " " + + # Get vowel if it exists + if("VOWEL" in graph_dict.keys() and graph_dict["VOWEL"]): + graph["MAP1"] = graph_dict["VOWEL"].lower() + word_transcription += graph["MAP1"] + "\t" + + # Case 4: Commonly occurring symbols + # ---------------------------------------------------------------- + elif(graph["CHAR_TYPE"].strip() == "LINK"): + # Add tab for underscores (kaldi lexicon format) + if(graph["SYMBOL"] in ("_", "#")): + graph["MAP0"] = "\t" + if(len(word_transcription) >= 3 and + word_transcription[-2] == "\t"): + word_transcription = word_transcription[:-3] + "\t" + elif(len(word_transcription) >= 1): + word_transcription += "\t" + else: + sys.exit("Unknown rule for initial underscore") + elif(graph["SYMBOL"] == "-"): + graph["MAP0"] = "" + continue + else: + sys.exit("Unknown linking symbol found.") + sys.exit(1) + + # Update table of observed graphemes + if(graph["SYMBOL"] not in graphemes): + table.append(graph) + graphemes.append(graph["SYMBOL"]) + + # Append the newly transcribed word + encoded_transcription.append(word_transcription.strip()) + return encoded_transcription, table + + +def _backoff_diacritics(grapheme, base_grapheme, graph_counts, count_thresh): + ''' + Add diacritics as tags if the grapheme with diacritics occurs + infrequently. The grapheme built by successively peeling away + diacritics until a frequent grapheme in the lexicon is discovered. + This grapheme is then considered a distinct unit and all peeled off + diacritics are added as kaldi style tags + + Arguments: + grapheme -- the raw grapheme to be processed + base_grapheme -- the grapheme with no combining marks + (see unicode normalization NFD for more details) + graph_counts -- A dictionary of all seen graphemes as keys with + counts as values + count_thresh -- The frequency threshold below which diacritics + should be peeled away + ''' + # Initialize variables before loop + new_grapheme = grapheme + removed = [] + parts = unicodedata.normalize("NFD", new_grapheme) + # Find a backed-off (in terms of number of diacritics) grapheme with count + # above the frequency threshold (count_thresh) + while(len(parts) > 1 and + (graph_counts[new_grapheme] <= count_thresh)): + new_grapheme = unicodedata.normalize("NFC", parts[0:-1]) + tag = unicodedata.name(parts[-1]).strip().replace(" ", "").lower() + removed.append(tag) + parts = unicodedata.normalize("NFD", new_grapheme) + + # Collect all diactritics that will not be added as tags + split_tags = [] + for p in parts[1:]: + split_tag = unicodedata.name(p).strip().replace(" ", "").lower() + split_tags.append(split_tag) + + # Append non-tag diacritics to the base grapheme + base_grapheme = "".join([base_grapheme] + split_tags) + # Return the tagged grapheme + return "_".join([base_grapheme] + removed) + + +def write_table(table, outfile): + ''' + Creates table of graphemes and fields of each grapheme's corresponding + unicode description. + + Arguments: + table -- table to write + outfile -- name of the output lexicon file + ''' + + # Create output table name + outfile = os.path.splitext(outfile)[0] + "_table.txt" + # Sort keys for convenience + table_sorted = sorted(table, key=lambda k: k["NAME"]) + # Start writing to output + with codecs.open(outfile, "w", "utf-8") as fo: + # Get header names + header_names = sorted(set().union(*[d.keys() for d in table])) + # Write headers + for h in header_names[:-1]: + fo.write("%s\t" % h) + + fo.write("%s\n" % header_names[-1]) + + # Write values if present + for t in table_sorted: + for h in header_names[:-1]: + if(h in t.keys() and t[h]): + fo.write("%s\t" % t[h]) + else: + fo.write("''\t") + if(header_names[-1] in t.keys() and t[header_names[-1]]): + fo.write("%s\n" % t[header_names[-1]]) + else: + fo.write("''\n") + + +def write_lexicon(baseforms, encoded_transcription, outfile, nonspeech=None, + extraspeech=None): + ''' + Write out the encoded transcription of words + + Arguments: + words -- list of words from a word list + encoded_transcription -- input encoded lexicon + outfile -- output lexicon + ''' + # Write Lexicon File + with codecs.open(outfile, "w", "utf-8") as f: + # First write the non-speech words + try: + for w in nonspeech.iterkeys(): + f.write("%s\t%s\n" % (w, nonspeech[w])) + except AttributeError: + pass + + # Then write extra-speech words + try: + for w in extraspeech.iterkeys(): + f.write("%s\t%s\n" % (w, extraspeech[w])) + except AttributeError: + pass + + # Then write the rest of the words + for idx, w in enumerate(baseforms): + # This is really just for BABEL in case is written as a word + if(w[0].lower() == ""): + f.write("%s\t\n" % (unicode(w[0]))) + else: + f.write("%s\t%s\n" % (unicode(w[0]), + encoded_transcription[idx])) + +if __name__ == "__main__": + main() diff --git a/egs/babel/s5d/local/lexicon/make_word_list.py b/egs/babel/s5d/local/lexicon/make_word_list.py new file mode 100755 index 00000000000..a1ff385a035 --- /dev/null +++ b/egs/babel/s5d/local/lexicon/make_word_list.py @@ -0,0 +1,93 @@ +#!/usr/bin/python + +from __future__ import print_function +import sys +import os +import codecs +import argparse +import unicodedata +import pdb + + +def process_transcripts(transcripts_dir, transcripts_list): + ''' + This looks through each transcript file, and collects the words. + Arguments: transcripts -- file with list of babel training transcripts + ''' + transcripts = os.path.join(transcripts_dir, transcripts_list) + with open(transcripts, "r") as f: + transcript_files = [] + for l in f: + l_path = os.path.join(transcripts_dir, l.strip() + ".txt") + transcript_files.append(l_path) + + word_list = {} + misprons = {} + for i_f, f in enumerate(transcript_files): + print("\rFile ", i_f + 1, "of ", len(transcript_files), end="") + with codecs.open(f, "r", "utf-8") as fp: + for line in fp: + # Don't use the lines with time markers + if not line.startswith("["): + words = line.strip().split(" ") + for w in words: + if (not w.startswith("<") and not + w.startswith("(") and not + w.endswith("-") and not w.startswith("-")): + # Get rid of mispronunciation markings + if (not w.startswith("*") and not + w.endswith("*") and + w != "~"): + try: + word_list[w] += 1 + except KeyError: + word_list[w] = 1 + else: + w = w.replace("*", "") + if(w != "~"): + try: + misprons[w] += 1 + except KeyError: + misprons[w] = 1 + + word_list = sorted(word_list.items(), key=lambda x: x[0]) + misprons = sorted(misprons.items(), key=lambda x: x[0]) + print("") + + return word_list, misprons + + +def main(): + if len(sys.argv[1:]) == 0: + print("Usage: ./make_word_list.py" + " ") + sys.exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument("transcripts_list", help="Path to list of training " + "transcripts") + parser.add_argument("transcripts_dir", help="Path to the training " + "transcripts directory") + parser.add_argument("word_list", help="Path to the generated word list" + " of training words") + parser.add_argument("--misprons", help="Path to the generated word list" + " of mispronounced words", + action="store", default=None) + args = parser.parse_args() + + # Collect words + words, misprons = process_transcripts(args.transcripts_dir, + args.transcripts_list) + + # Print the word list + with codecs.open(args.word_list, "w", "utf-8") as f: + for word, count in words: + f.write("%d %s\n" % (count, unicode(word))) + + if args.misprons is not None: + with codecs.open(args.misprons, "w", "utf-8") as f: + for word, count in misprons: + f.write("%d %s\n" % (count, word)) + +if __name__ == "__main__": + main() diff --git a/egs/babel/s5d/local/lonestar.py b/egs/babel/s5d/local/lonestar.py new file mode 100755 index 00000000000..e1594e55ada --- /dev/null +++ b/egs/babel/s5d/local/lonestar.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python +from pylauncher import * +import pylauncher +import sys + +import os +import errno + +def make_path(path): + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + elif not os.path.isdir(path): + raise + +def tail(n, filename): + import subprocess + p=subprocess.Popen(['tail','-n',str(n),filename], stdout=subprocess.PIPE) + soutput,sinput=p.communicate() + soutput=soutput.split("\n") + return soutput + +def KaldiLauncher(lo, **kwargs): + import time; + jobid = JobId() + debug = kwargs.pop("debug","") + qdir= os.path.join(lo.qdir, lo.taskname); + cores = lo.nof_threads; + + ce=SSHExecutor(workdir=qdir, debug=debug, force_workdir=True, catch_output=True) + ce.outstring="out." + ce.execstring=lo.taskname + "." + + hostpool=HostPool(hostlist=HostListByName(), commandexecutor=ce ) + + completion=lambda x:FileCompletion( taskid=x, stamproot="done.", stampdir=qdir) + + logfiles = list() + commands = list() + for q in xrange(lo.jobstart, lo.jobend+1): + s = "bash " + lo.queue_scriptfile + " " + str(q) + commands.append(s) + + logfile = lo.logfile.replace("${PY_LAUNCHER_ID}", str(q)) + logfiles.append(logfile) + + generator=ListCommandlineGenerator(list=commands, cores=cores) + tasks = TaskGenerator(generator, completion=completion, debug=debug ) + + job = LauncherJob( hostpool=hostpool, taskgenerator=tasks, debug=debug,**kwargs) + + job.run() + #At this point all the .done files should exist and everything should be finalized. + num_failed=0; + time.sleep(1); #Lets wait for a while to give the shared fs time to sync + error_pending=True + for logfile in logfiles: + import time + sched_rate=[0, 0.5, 1, 2, 4, 8, 15, 32 ]; + for delay in sched_rate: + time.sleep(delay); + if os.path.isfile(logfile): + break; + if not os.path.isfile(logfile): + sys.stderr.write("ERROR: " + "The following file is missing:\n") + sys.stderr.write("ERROR: " + "\t" + logfile + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + + error_pending=True; + for delay in sched_rate: + time.sleep(delay); + + lines=tail(10, logfile) + with_status=filter(lambda x:re.search(r'with status (\d+)', x), lines) + + if len(with_status) == 0: + sys.stderr.write("The last line(s) of the log-file " + logfile + " does not seem" + " to indicate return status as expected\n"); + elif len(with_status) > 1: + sys.stderr.write("The last line(s) of the log-file " + logfile + " does seem" + " to indicate multiple return statuses \n"); + else: + status_re=re.search(r'with status (\d+)', with_status[0]); + status=status_re.group(1); + if status == '0': + error_pending=False; + break; + sys.stderr.write("INFO: Waiting for status in files, sleeping %d seconds\n" % (delay,)) + if error_pending: + num_failed+=1; + + if num_failed != 0: + sys.stderr.write(sys.argv[0] + ": " + str(num_failed) + "/" + str(len(logfiles)) + " failed \n"); + sys.stderr.write(sys.argv[0] + ": See " + lo.logfile.replace("${PY_LAUNCHER_ID}", "*" ) + " for details\n"); + sys.exit(-1); + + #Remove service files. Be careful not to remove something that might be needed in problem diagnostics + for i in xrange(len(commands)): + out_file=os.path.join(qdir, ce.outstring+str(i)) + + #First, let's wait on files missing (it might be that those are missing + #just because of slow shared filesystem synchronization + if not os.path.isfile(out_file): + import time + sched_rate=[0.5, 1, 2, 4, 8 ]; + for delay in sched_rate: + time.sleep(delay); + if os.path.isfile(out_file): + break; + if not os.path.isfile(out_file): + sys.stderr.write("ERROR: " + "The following file is missing:\n") + sys.stderr.write("ERROR: " + "\t" + out_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + + if os.stat(out_file).st_size != 0: + sys.stderr.write("ERROR: " + "The following file has non-zero size:\n") + sys.stderr.write("ERROR: " + "\t" + out_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + else: + exec_file=os.path.join(qdir, ce.execstring+str(i)) + done_file=os.path.join(qdir, "done."+str(i)) + if (not os.path.isfile(exec_file) ) or (not os.path.isfile(done_file)): + sys.stderr.write("ERROR: " + "One of the following files is missing:\n") + sys.stderr.write("ERROR: " + "\t" + exec_file + "\n") + sys.stderr.write("ERROR: " + "\t" + done_file + "\n") + sys.stderr.write("ERROR: " + "\t" + out_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + elif os.stat(done_file).st_size != 0: + sys.stderr.write("ERROR: " + "The following file has non-zero size:\n") + sys.stderr.write("ERROR: " + "\t" + done_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + else: + os.remove(exec_file) + os.remove(done_file) + os.remove(out_file) + try: + os.rmdir(qdir) + except OSError: + sys.stderr.write("ERROR: " + "Failed to remove the pylauncher task dir " + qdir + "\n"); + sys.stderr.write("ERROR: " + "Find out what is wrong and fix it\n") + sys.exit(-1); + + #print job.final_report() + +class LauncherOpts: + def __init__(self): + self.sync=0 + self.nof_threads = 1 + self.qsub_opts = None + + self.jobname=None + self.jobstart=None + self.jobend=None + pass + +def CmdLineParser(argv): + import re; + sync=0 + qsub_opts='' + nof_threads=1 + + while len(argv) >= 2 and argv[0].startswith('-'): + switch = argv.pop(0); + + if switch == '-V': + qsub_opts += switch + ' '; + else: + option = argv.pop(0) + + if switch == "-sync" and (option in ['Y', 'y']): + sync=1; + qsub_opts += switch + ' ' + option + ' '; + if switch == "-pe": + option2 = argv.pop(0); + qsub_opts += option2 + ' '; + nof_threads = int(option2); + + #Now we have to parse the JOB specifier + jobname = "" + jobstart = 0 + jobend = 0 + if (re.match( r"^[A-Za-z_]\w*=\d+:\d+$", argv[0])): + m=re.match( r"^([A-Za-z_]\w*)=(\d+):(\d+)$", argv[0]) + jobname=m.group(1) + jobstart=int(m.group(2)) + jobend=int(m.group(3)) + argv.pop(0) + elif(re.match( r"^[A-Za-z_]\w*=\d+$", argv[0])): + m=re.match( r"^([A-Za-z_]\w*)=(\d+)$", argv[0]) + jobname=m.group(1) + jobstart=int(m.group(2)) + jobend=int(m.group(2)) + argv.pop(0) + elif re.match("^.+=.*:.*$", argv[0]): + print >> sys.stderr, "warning: suspicious JOB argument " + argv[0]; + + if jobstart > jobend: + sys.stderr.write("lonestar.py: JOBSTART("+ str(jobstart) + ") must be lower than JOBEND(" + str(jobend) + ")\n") + sys.exit(1) + + logfile=argv.pop(0) + + opts=LauncherOpts() + opts.sync = sync + opts.nof_threads=nof_threads; + opts.qsub_opts=qsub_opts + opts.varname=jobname + opts.jobstart=jobstart + opts.jobend=jobend + opts.logfile=logfile + + opts.cmd = escape_cmd(argv); + + return (opts, argv) + +def escape_cmd(argv): + cmd ="" + for x in argv: + #print x + " -> ", + if re.search("^\S+$", x): + #print " A -> ", + cmd += x + " " + elif '"' in x: + cmd += "'''" + x + "''' " + else: + cmd += "\"" + x + "\" " + #print cmd + return cmd + +def setup_paths_and_vars(opts): + cwd = os.getcwd() + + if opts.varname and (opts.varname not in opts.logfile ) and (opts.jobstart != opts.jobend): + print >>sys.stderr, "lonestar.py: you are trying to run a parallel job" \ + "but you are putting the output into just one log file (" + opts.logfile + ")"; + sys.exit(1) + + if not os.path.isabs(opts.logfile): + opts.logfile = os.path.join(cwd, opts.logfile); + logfile=opts.logfile + + dir = os.path.dirname(logfile) + base = os.path.basename(logfile) + qdir = os.path.join(dir, "q"); + + if re.search("log/*q", qdir, flags=re.IGNORECASE): + qdir = re.sub("log/*q", "/q", qdir, flags=re.IGNORECASE) + + + queue_logfile= os.path.join(qdir, base) + if opts.varname: + queue_logfile = re.sub("\.?"+opts.varname, "", queue_logfile) + + taskname=os.path.basename(queue_logfile) + taskname = taskname.replace(".log", ""); + if taskname == "": + print >> sys.stderr, "lonestar.py: you specified the log file name in such form " \ + "that leads to an empty task name ("+logfile + ")"; + sys.exit(1) + + if not os.path.isabs(queue_logfile): + queue_logfile= os.path.join(cwd, queue_logfile) + + if opts.varname: + opts.logfile = opts.logfile.replace(opts.varname, "${PY_LAUNCHER_ID}") + opts.cmd = opts.cmd.replace(opts.varname, "${PY_LAUNCHER_ID}"); + + queue_scriptfile=queue_logfile; + if re.search("\.[a-zA-Z]{1,5}$", queue_scriptfile): + queue_scriptfile = re.sub("\.[a-zA-Z]{1,5}$", ".sh", queue_scriptfile); + if not os.path.isabs(queue_scriptfile): + queue_scriptfile= os.path.join(cwd, queue_scriptfile) + + + make_path(qdir) + make_path(dir) + + opts.qdir = qdir + opts.log_dir = dir + opts.queue_scriptfile = queue_scriptfile + opts.queue_logfile = queue_logfile + opts.taskname = taskname + + return opts + + + +def create_scriptfile(scriptname, opts): + import os + logfile = opts.logfile + cmd = opts.cmd + nof_threads=opts.nof_threads; + cwd = os.getcwd() + #print scriptname + f = open(scriptname, "wb") + f.write("#!/bin/bash\n") + f.write("export PY_LAUNCHER_ID=$1; shift;\n") + f.write("cd " + cwd + "\n") + f.write(". ./path.sh\n") + f.write("( echo '#' Running on `hostname`\n") + f.write(" echo '#' Started at `date`\n") + f.write(" echo -n '# '; cat < " +logfile + "\n") + f.write("time1=`date +\"%s\"`\n") + f.write("( " + cmd + ") 2>>" + logfile + " >>" + logfile + " \n") + f.write("ret=$?\n") + f.write("time2=`date +\"%s\"`\n") + f.write("echo '#' Accounting time=$(($time2 - $time1)) threads=" + str(nof_threads) + " >> " + logfile + "\n") + + f.write("echo '#' Finished at `date` with status $ret >>" + logfile + "\n") + f.write("exit $ret \n") + f.close() + + + +if __name__ == "__main__": + (opts, cmd) = CmdLineParser(sys.argv[1:]); + setup_paths_and_vars(opts) + create_scriptfile(opts.queue_scriptfile, opts); + + #pylauncher.ClassicLauncher(["true && sleep 10s", "false || sleep 1s" ], debug="job+host+task+exec+ssh") + KaldiLauncher(opts, debug="") + + diff --git a/egs/babel/s5d/local/make_L_align.sh b/egs/babel/s5d/local/make_L_align.sh new file mode 100755 index 00000000000..50e46a00493 --- /dev/null +++ b/egs/babel/s5d/local/make_L_align.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Guoguo Chen, Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +set -o pipefail +set -e + +if [ $# -ne 3 ]; then + echo "This is a simple script that will generate the L_align.fst" + echo "The FST L_align.fst is used for getting the force-aligned " + echo "utterances" + echo "The script automaticky recognizes the probabilistic lexicon" + echo "is used and will use the correct file" + echo "" + echo "usage: local/L_align.sh " + echo "e.g.: local/L_align.sh data/local/lang data/lang data/lang" + exit 1; +fi + +tmpdir=$1 +dir=$2 +outdir=$3 + +silphone=`cat $dir/phones/optional_silence.txt` || exit 1; + +# Create lexicon with alignment info +if [ -f $tmpdir/lexicon.txt ] ; then + cat $tmpdir/lexicon.txt | \ + awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' +elif [ -f $tmpdir/lexiconp.txt ] ; then + cat $tmpdir/lexiconp.txt | \ + awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' +else + echo "Neither $tmpdir/lexicon.txt nor $tmpdir/lexiconp.txt does not exist" + exit 1 +fi | utils/make_lexicon_fst.pl - 0.5 $silphone | \ +fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \ + --keep_isymbols=false --keep_osymbols=false | \ +fstarcsort --sort_type=olabel > $outdir/L_align.fst + +exit 0; diff --git a/egs/babel/s5d/local/make_corpus_subset.sh b/egs/babel/s5d/local/make_corpus_subset.sh new file mode 100755 index 00000000000..12925830268 --- /dev/null +++ b/egs/babel/s5d/local/make_corpus_subset.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +#Begin configuration +ignore_missing_txt=false #If the reference transcript txt is missing, \ + #shall we ignore it or treat it as a fatal error? +#End configuration +echo "$0 $@" # Print the command line for logging + +help_message="$0: create subset of the input directory (specified as the first directory). + The subset is specified by the second parameter. + The directory in which the subset should be created is the third parameter + Example: + $0 " + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [[ "$#" -lt "3" ]] ; then + echo -e "FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +output_data_dir=${@: -1} # last argument to the script +sources=( $@ ) +unset sources[${#sources[@]}-1] # 'pop' the last argument which is odir +num_src=${#sources[@]} # number of systems to combine + +if [ $(( $num_src % 2 )) -ne 0 ]; then + echo -e "FATAL: wrong number of script parameters!" + echo -e " : The input directories are not in pairs!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +mkdir -p $output_data_dir/transcription +mkdir -p $output_data_dir/audio + +num_warns_def=3; + +rm -f $output_data_dir/filelist.list + +for i in `seq 0 $(( $num_src / 2 - 1))` ; do + num_warns=$num_warns_def; + input_data_dir=${sources[ $[2 * $i] ]} + input_data_list=${sources[ $((2 * $i + 1)) ]} + + abs_src_dir=`readlink -f $input_data_dir` + abs_tgt_dir=`readlink -f $output_data_dir` + + if [[ ! -d "$input_data_dir" ]] ; then + echo "FATAL: input data directory does not exist"; + exit 1; + fi + if [[ ! -f "$input_data_list" ]] ; then + echo "FATAL: input data list file does not exist!"; + exit 1; + fi + + idl=`basename $input_data_list` + echo "Making subsets from $input_data_dir according to $idl" + + for file_basename in `cat $input_data_list`; do + if [[ -e $abs_src_dir/audio/$file_basename.sph ]] ; then + ln -sf $abs_src_dir/audio/$file_basename.sph $abs_tgt_dir/audio || exit 1 + else + if [[ -e $abs_src_dir/audio/$file_basename.wav ]] ; then + ln -sf $abs_src_dir/audio/$file_basename.wav $abs_tgt_dir/audio || exit 1 + else + echo "File $abs_src_dir/audio/$file_basename.sph|wav does not exist!" >&2 + exit 1 + fi + fi + + if [[ -e $abs_src_dir/transcription/$file_basename.txt ]] ; then + ln -sf $abs_src_dir/transcription/$file_basename.txt $abs_tgt_dir/transcription || exit 1 + else + if ! $ignore_missing_txt ; then + echo "File $abs_src_dir/transcription/$file_basename.txt does not exist!" + exit 1; + elif [ $num_warns -gt 0 ]; then + echo "WARNING: File $file_basename.txt does not exist!" + num_warns=$(($num_warns - 1)) + elif [ $num_warns -eq 0 ]; then + echo "Not warning anymore" + num_warns=$(($num_warns - 1)) + fi + fi + done + cat $input_data_list >> $output_data_dir/filelist.list +done + + diff --git a/egs/babel/s5d/local/make_ecf_subset.sh b/egs/babel/s5d/local/make_ecf_subset.sh new file mode 100755 index 00000000000..9bdd95c3e27 --- /dev/null +++ b/egs/babel/s5d/local/make_ecf_subset.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +echo "$0 $@" 1>&2 # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +help_message="$0: generates an subset ecf file for spoken term detection evaluation. + The first parameter specifies the descriptor of the subset, + the second parameter specifies the original ecf file. + The file will be generated in the kws subdirectory of the directory + given as a third parameter and will be named ecf.xml + Output goes to stdout. + Usage: + $0 " + + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [[ "$#" -ne "2" ]] ; then + echo -e "FATAL: wrong number of script parameters!\n\n" 1>&2 + printf "$help_message\n\n" 1>&2 + exit 1; +fi + +list_file=$1 +src_ecf_file=$2 + +if [[ ! -f "$list_file" ]]; then + echo -e "FATAL: The list file does not exist! \n\n" 1>&2 + printf "$help_message\n" 1>&2 + exit 1; +fi +if [[ ! -f "$src_ecf_file" ]]; then + echo -e "FATAL: The source ecf file does not exist! \n\n" 1>&2 + printf "$help_message\n" 1>&2 + exit -1 +fi + + +duration=`grep -F -f $list_file $src_ecf_file | sed "s/.*dur=\"\([0-9.][0-9.]*\).*/\1 /g" | awk '{x += $1;} END{print x;}'` + +# Output is produced here: +( + grep "" +) diff --git a/egs/babel/s5d/local/make_lexicon_fst_special.pl b/egs/babel/s5d/local/make_lexicon_fst_special.pl new file mode 100755 index 00000000000..3df6e7a9527 --- /dev/null +++ b/egs/babel/s5d/local/make_lexicon_fst_special.pl @@ -0,0 +1,53 @@ +#!/usr/bin/env perl +use warnings; #sed replacement for -w perl parameter +# Copyright 2012 Johns Hopkins University (author: Daniel Povey) + +# makes lexicon FST -- special version only for use in keyword search +# for allowing optional silences between words. This version has +# no pron-probs involved, and +# does support an optional silence, but this silence is only allowed +# between words (where it may occur an arbitrary number of times), +# not at the beginning or end of the file. + +if(@ARGV != 2) { + die "Usage: make_lexicon_fst_special.pl lexicon.txt silphone >lexiconfst.txt" +} + +$lexfn = shift @ARGV; +$silphone = shift @ARGV; + +open(L, "<$lexfn") || die "Error opening lexicon $lexfn"; + + +$startstate = 0; +$silstate = 1; +$endstate = 2; +$nextstate = 3; + +sub create_wseq { + my $init_state = shift @_; + my $end_state = shift @_; + my $word_or_eps = shift @_; + my @phones = @_; + if (@phones == 0) { push @phones, ""; } + my $x; + my $curstate = $init_state; + for ($x = 0; $x + 1 < @phones; $x++) { + print "$curstate\t$nextstate\t$phones[$x]\t$word_or_eps\n"; + $word_or_eps = ""; + $curstate = $nextstate; + $nextstate++; + } + print "$curstate\t$end_state\t$phones[$x]\t$word_or_eps\n"; +} + + +while() { + @A = split(" ", $_); + $w = shift @A; + create_wseq($startstate, $endstate, $w, @A); + create_wseq($endstate, $endstate, $w, @A); + create_wseq($silstate, $endstate, $w, @A); +} +print "$endstate\t$silstate\t$silphone\t\n"; +print "$endstate\t0\n"; # final-cost. diff --git a/egs/babel/s5d/local/make_lexicon_subset.sh b/egs/babel/s5d/local/make_lexicon_subset.sh new file mode 100755 index 00000000000..1e77fcaa2b9 --- /dev/null +++ b/egs/babel/s5d/local/make_lexicon_subset.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +transcriptions=$1 +input_lexicon_file=$2 +output_lexicon_file=$3 + +( + #find $dev_data_dir/transcription/ -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' + find $transcriptions -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' +) | sort -u | awk ' + BEGIN { + while(( getline line< ARGV[2] ) > 0 ) { + split(line, e, "\t") + LEXICON[ e[1] ]=line + } + FILENAME="-" + i=0 + + while(( getline word< ARGV[1] ) > 0 ) { + if (word in LEXICON) + print LEXICON[word] + } + } +' - $input_lexicon_file | sort -u > $output_lexicon_file + diff --git a/egs/babel/s5d/local/make_wordlist.sh b/egs/babel/s5d/local/make_wordlist.sh new file mode 100644 index 00000000000..ef589b917cb --- /dev/null +++ b/egs/babel/s5d/local/make_wordlist.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +transcriptions=$1 +wordlist=$2 + +( + find $transcriptions -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' +) | sort -u | grep -v -E '.*\*.*|<.*>|\(\(\)\)|^-.*|.*-$' > $wordlist + diff --git a/egs/babel/s5d/local/map_lang.sh b/egs/babel/s5d/local/map_lang.sh new file mode 100755 index 00000000000..998a11d0cd0 --- /dev/null +++ b/egs/babel/s5d/local/map_lang.sh @@ -0,0 +1,81 @@ +#! /usr/bin/bash + +VARIABLES=`diff <(compgen -A variable) <(. ./lang.conf.orig; compgen -A variable) | grep '^>'| sed 's/^> *//g'` + +. ./conf/common_vars.sh +. ./lang.conf.orig + +for variable in $VARIABLES ; do + + eval VAL=\$${variable} + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $variable=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + #declare -x $variable + declare -p $variable + fi +done + +for kwlist in $( (compgen -A variable) | grep _data_list ) ; do + declare -p $kwlist + eval KEYS="\${!${kwlist}[@]}" + #declare -p my_more_kwlist_keys + for key in $KEYS # make sure you include the quotes there + do + #echo $key + eval VAL="\${${kwlist}[$key]}" + #echo $my_more_kwlist_val + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + fi + done + declare -p $kwlist +done +unset VAL +unset KEYS + +for kwlist in $( (compgen -A variable) | grep _data_dir ) ; do + declare -p $kwlist + eval KEYS="\${!${kwlist}[@]}" + #declare -p my_more_kwlist_keys + for key in $KEYS # make sure you include the quotes there + do + #echo $key + eval VAL="\${${kwlist}[$key]}" + #echo $my_more_kwlist_val + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + fi + done + declare -p $kwlist +done +unset VAL +unset KEYS + +for kwlist in $( (compgen -A variable) | grep _more_kwlists ) ; do + declare -p $kwlist + eval KEYS="\${!${kwlist}[@]}" + #declare -p my_more_kwlist_keys + for key in $KEYS # make sure you include the quotes there + do + #echo $key + eval VAL="\${${kwlist}[$key]}" + #echo $my_more_kwlist_val + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + fi + done + declare -p $kwlist +done +unset VAL +unset KEYS + +if [ "$babel_type" == "limited" ] ; then + train_nj=32 +else + train_nj=64 +fi +dev10h_nj=60 +unsup_nj=120 +shadow_nj=60 +shadow2_nj=120 +eval_nj=120 diff --git a/egs/babel/s5d/local/naive_comb.pl b/egs/babel/s5d/local/naive_comb.pl new file mode 100755 index 00000000000..74ad20d84e3 --- /dev/null +++ b/egs/babel/s5d/local/naive_comb.pl @@ -0,0 +1,234 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; +use XML::Simple; +use Data::Dumper; +use File::Basename; + +my $tolerance = 0.5; + +sub ReadKwslist { + my $kwslist_in = shift @_; + + my $source = "STDIN"; + if ($kwslist_in ne "-") { + open(I, "<$kwslist_in") || die "Fail to open kwslist $kwslist_in.\n"; + $source = "I"; + } + + # Read in the kwslist and parse it. Note that this is a naive parse -- I simply + # assume that the kwslist is "properly" generated + my @KWS; + my (@info, $kwid, $tbeg, $dur, $file, $score, $channel); + my ($kwlist_filename, $language, $system_id) = ("", "", ""); + while (<$source>) { + chomp; + + if (/[0]\" language=\"$info->[1]\" system_id=\"$info->[2]\">\n"; + my $prev_kw = ""; + foreach my $kwentry (@{$KWS}) { + if ($prev_kw ne $kwentry->[0]) { + if ($prev_kw ne "") {$kwslist .= " \n";} + $kwslist .= " [0]\" oov_count=\"0\">\n"; + $prev_kw = $kwentry->[0]; + } + $kwslist .= " [1]\" channel=\"$kwentry->[2]\" tbeg=\"$kwentry->[3]\" dur=\"$kwentry->[4]\" score=\"$kwentry->[5]\" decision=\"$kwentry->[6]\""; + if (defined($kwentry->[7])) {$kwslist .= " threshold=\"$kwentry->[7]\"";} + if (defined($kwentry->[8])) {$kwslist .= " raw_score=\"$kwentry->[8]\"";} + $kwslist .= "/>\n"; + } + $kwslist .= " \n"; + $kwslist .= "\n"; + + return $kwslist; +} + +sub KwslistTimeCompare { + my ($a, $b) = @_; + + if ($a->[0] eq $b->[0]) { + if ($a->[1] eq $b->[1]) { + if (abs($a->[3]-$b->[3]) <= $tolerance) { + if (abs($a->[3]+$a->[4]-$b->[3]-$b->[4]) <= $tolerance) { + return 0; + } else { + return ($a->[3]+$a->[4]) <=> ($b->[3]+$b->[4]); + } + } else { + return $a->[3] <=> $b->[3]; + } + } else { + return $a->[1] cmp $b->[1]; + } + } else { + $a->[0] cmp $b->[0]; + } +} + +sub KwslistTimeSort { + return KwslistTimeCompare($a, $b); +} + +my $Usage = < w2 ... + e.g.: naive_comb.pl 0.5 kwslist1.xml 0.5 kwslist2.xml ... kwslist_comb.xml + +Allowed options: + --method : Use different combination method (int, default = 1) + 1 -- Weighted sum + 2 -- Weighted "powered" + --power : The power of method 2 (float, default = 0.5) + --tolerance : Tolerance for being the same hits (float, default = 0.5) + +EOU + +my $method = 1; +my $power = 0.5; +GetOptions('tolerance=f' => \$tolerance, + 'method=i' => \$method, + 'power=f' => \$power, + 'inv-power=f' => sub { (my $opt, my $val) = @_; $power = 1.0/$val;}); + +@ARGV >= 3 || die $Usage; + +# Workout the input/output source +@ARGV % 2 == 1 || die "Bad number of (weight, kwslist) pair.\n"; +my @kwslist_file = (); +my @weight = (); +while (@ARGV != 1) { + my $w = shift @ARGV; + $w =~ m/^[0-9.]*$/ || die "Bad weight: $w.\n"; + push(@weight, $w); + push(@kwslist_file, shift @ARGV); +} +my $output = shift @ARGV; + +# Open the first kwslist +my ($info, $KWS) = @{ReadKwslist($kwslist_file[0])}; + +# Open the rest kwslists +my @kwslist = (); +for (my $i = 1; $i < @kwslist_file; $i ++) { + push(@kwslist, @{ReadKwslist($kwslist_file[$i])}[1]); +} + +# Process the first kwslist +my @KWS = sort KwslistTimeSort @{$KWS}; +my $w = shift @weight; +foreach my $kwentry (@$KWS) { + if ($method == 1) { + $kwentry->[5] = $kwentry->[5] * $w; + } elsif ($method == 2) { + $kwentry->[5] = ($kwentry->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } +} + +# Start merging the rest kwslists +while (@kwslist > 0) { + my $w = shift @weight; + my @kws = sort KwslistTimeSort @{shift @kwslist}; + + # We'll take time information from the first system + my ($i, $j) = (0, 0); + my @from_kws; + while ($i < @KWS and $j < @kws) { + my $cmp = KwslistTimeCompare($KWS[$i], $kws[$j]); + if ($cmp == 0) { + if ($method == 1) { + $KWS[$i]->[5] += $kws[$j]->[5] * $w; + } elsif ($method == 2) { + $KWS[$i]->[5] += ($kws[$j]->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } + $i ++; + $j ++; + } elsif ($cmp == -1) { + $i ++; + } else { + if ($method == 1) { + $kws[$j]->[5] = $kws[$j]->[5] * $w; + } elsif ($method == 2) { + $kws[$j]->[5] = ($kws[$j]->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } + push(@from_kws, $kws[$j]); + $j ++; + } + } + while ($j < @kws) { + if ($method == 1) { + $kws[$j]->[5] = $kws[$j]->[5] * $w; + } elsif ($method == 2) { + $kws[$j]->[5] = ($kws[$j]->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } + push(@from_kws, $kws[$j]); + $j ++; + } + + # Sort again + @from_kws = (@KWS, @from_kws); + @KWS = sort KwslistTimeSort @from_kws; +} + +if ($method == 2) { + foreach my $kwentry (@KWS) { + $kwentry->[5] = $kwentry->[5]**(1.0/$power); + } +} + +# Sorting and pringting +my $kwslist = PrintKwslist(\@{$info}, \@KWS); + +if ($output eq "-") { + print $kwslist; +} else { + open(O, ">$output") || die "Fail to open output file: $output\n"; + print O $kwslist; + close(O); +} diff --git a/egs/babel/s5d/local/nist_eval/create_compound_set.sh b/egs/babel/s5d/local/nist_eval/create_compound_set.sh new file mode 100755 index 00000000000..737f7a0fcd9 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/create_compound_set.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +#Simple script to create compound set info that will allow for more automatized +#work with the shadow set. +# +#The notion of shadow data set came from the need to be able to verify +#the output of the recognizer during decoding the evaluation data. +#The idea is simple -- instead of decoding just the eval data, decode both +#eval data plus the dev data (or at least some portion of it) interleved +#randomly +#After decoding, we can isolate (split) the output from the decoding (and kws) +#so that we can score the dev data subset and if the score is identical to +#the score obtained by decoding the dev set previously, we can be little bit +#more sure that the eval set results are correct. + +. ./path.sh + +flen=0.01 + +[ ! -f lang.conf ] && echo "File lang.conf must exist (and contain a valid config)" +. ./lang.conf + +devset=dev10h.pem +evlset=eval.seg +tgtset=shadow.seg +tgtdir= + +. utils/parse_options.sh +[ -z $tgtdir ] && tgtdir=data/$tgtset + +devset_basename=${devset%%.*} +devset_segments=${devset#*.} + +evlset_basename=${evlset%%.*} +evlset_segments=${evlset#*.} + +eval devset_flist=\$${devset_basename}_data_list +eval devset_ecf=\$${devset_basename}_ecf_file +eval devset_rttm=\$${devset_basename}_rttm_file +eval devset_stm=\$${devset_basename}_stm_file + +eval evlset_flist=\$${evlset_basename}_data_list +eval evlset_ecf=\$${evlset_basename}_ecf_file +eval evlset_rttm=\$${evlset_basename}_rttm_file +eval evlset_stm=\$${evlset_basename}_stm_file + +rm -rf $tgtdir/compounds +mkdir -p $tgtdir/compounds +mkdir -p $tgtdir/compounds/$devset +mkdir -p $tgtdir/compounds/$evlset + +echo "Creating compound $tgtdir/compounds/$devset" +( + echo "DEVSET file list: $devset_flist" + cat `readlink -f $devset_flist` > $tgtdir/compounds/$devset/files.list + echo "DEVSET ECF file : $devset_ecf" + cat `readlink -f $devset_ecf` > $tgtdir/compounds/$devset/ecf.xml + echo "DEVSET RTTM file: $devset_rttm" + cat `readlink -f $devset_rttm` > $tgtdir/compounds/$devset/rttm + echo "DEVSET STM file : $devset_stm" + cat `readlink -f $devset_stm` | sed 's/ 1 / A /g' > $tgtdir/compounds/$devset/stm + + cat $tgtdir/segments | grep -w -F -f $tgtdir/compounds/$devset/files.list > $tgtdir/compounds/$devset/segments + awk '{print $1}' $tgtdir/compounds/$devset/segments > $tgtdir/compounds/$devset/utterances + + for kwset_path in $tgtdir/kwset_*; do + kwset=`basename $kwset_path` + output=$tgtdir/compounds/$devset/$kwset + + mkdir -p $output/tmp + cp $tgtdir/$kwset/kwlist.xml $output/ + cp $tgtdir/$kwset/utt.map $output/ + cp $tgtdir/compounds/$devset/ecf.xml $output/ + cp $tgtdir/compounds/$devset/rttm $output/ + local/search/rttm_to_hitlists.sh --segments $tgtdir/segments \ + --utt-table $tgtdir/$kwset/utt.map $tgtdir/compounds/$devset/rttm \ + $tgtdir/$kwset/kwlist.xml $tgtdir/compounds/$devset/ecf.xml \ + $output/tmp $output/hitlist 2> $output/hitlist.fails + + n1=`cat $output/hitlist.fails | wc -l` + n2=`awk '{print $13}' $output/hitlist.fails | sort |uniq -c | wc -l` + + echo "INFO: For kwlist $kwset, $n2 KW types won't be found ($n1 tokens in total)" + + duration=$(cat $devset_ecf | perl -ne 'BEGIN{$dur=0;}{next unless $_ =~ /dur\=/; s/.*dur="([^"]*)".*/$1/; $dur+=$_;}END{print $dur/2}') + + echo $duration > $output/trials + echo $flen > $output/frame_length + + echo "Number of trials: `cat $output/trials`" + echo "Frame lengths: `cat $output/frame_length`" + { + cat $tgtdir/$kwset/f4de_attribs | grep kwlist_name + language=$(grep kwlist $tgtdir/$kwset/kwlist.xml | head -n 1 | sed -E 's/.*language="([^"]*)".*/\1/g') + echo "language=$language" + echo "flen=$flen" + } > $output/f4de_attribs + + cp $tgtdir/$kwset/categories $output/ + done +) + +echo "Creating compound $tgtdir/compounds/$evlset" +( + echo "EVLSET file list: $evlset_flist" + cat `readlink -f $evlset_flist` > $tgtdir/compounds/$evlset/files.list + echo "EVLSET ECF file : $evlset_ecf" + cat `readlink -f $evlset_ecf` > $tgtdir/compounds/$evlset/ecf.xml + if [ ! -z "$evlset_rttm" ]; then + echo "EVLSET RTTM file: $evlset_rttm" + cat `readlink -f $evlset_rttm` > $tgtdir/compounds/$evlset/rttm + fi + if [ ! -z "$evlset_stm" ]; then + echo "EVLSET STM file : $evlset_stm" + cat `readlink -f $evlset_stm` | sed 's/ 1 / A /g' > $tgtdir/compounds/$evlset/stm + fi + + cat $tgtdir/segments | \ + grep -w -F -f $tgtdir/compounds/$evlset/files.list > $tgtdir/compounds/$evlset/segments + awk '{print $1}' $tgtdir/compounds/$evlset/segments > $tgtdir/compounds/$evlset/utterances + + for kwset_path in $tgtdir/kwset_*; do + kwset=`basename $kwset_path` + output=$tgtdir/compounds/$evlset/$kwset + + mkdir -p $output/tmp + cp $tgtdir/$kwset/kwlist.xml $output/ + cp $tgtdir/$kwset/utt.map $output/ + cp $tgtdir/compounds/$evlset/ecf.xml $output/ + + if [ -f "$tgtdir/compounds/$evlset/rttm" ]; then + cp $tgtdir/compounds/$evlset/rttm $output/ + local/search/rttm_to_hitlists.sh --segments $tgtdir/segments \ + --utt-table $tgtdir/$kwset/utt.map $tgtdir/compounds/$evlset/rttm \ + $tgtdir/$kwset/kwlist.xml $tgtdir/compounds/$evlset/ecf.xml \ + $output/tmp $output/hitlist 2> $output/hitlist.fails + + n1=`cat $output/hitlist.fails | wc -l` + n2=`awk '{print $13}' $output/hitlist.fails | sort |uniq -c | wc -l` + + echo "INFO: For kwlist $kwset, $n2 KW types won't be found ($n1 tokens in total)" + fi + + duration=$(cat $evlset_ecf | perl -ne 'BEGIN{$dur=0;}{next unless $_ =~ /dur\=/; s/.*dur="([^"]*)".*/$1/; $dur+=$_;}END{print $dur/2}') + + echo $duration > $output/trials + echo $flen > $output/frame_length + + echo "Number of trials: `cat $output/trials`" + echo "Frame lengths: `cat $output/frame_length`" + { + cat $tgtdir/$kwset/f4de_attribs | grep kwlist_name + language=$(grep kwlist $tgtdir/$kwset/kwlist.xml | head -n 1 | sed -E 's/.*language="([^"]*)".*/\1/g') + echo "language=$language" + echo "flen=$flen" + } > $output/f4de_attribs + + cp $tgtdir/$kwset/categories $output/ + done +) + +echo "Compound creation OK." + + diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh new file mode 100755 index 00000000000..2af8dc9e410 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh @@ -0,0 +1,236 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +language="201-haitian" +corpus=/export/babel/data/ +indus=/export/babel/data/scoring/IndusDB +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +corpus=$corpus/$language +lists=./conf/lists/$language/ + +corpusdir=$(find $corpus -maxdepth 1 \( -name "release-current" -o -name "release-current-b" \) -type d) || exit 1 +[ -z "$corpusdir" ] && corpusdir=$(find $corpus -maxdepth 1 -name "*-build" -type d) +[ -z "$corpusdir" ] && echo >&2 "Corpus directory for $language not found!" && exit 1 + +train_dir=$(find $corpusdir -ipath "*/conversational/*" -name "training" -type d) || exit 1 +[ -z "$train_dir" ] && echo >&2 "Corpus directory $corpusdir/*/training/ not found!" && exit 1 + +train_rom_dir=$(find $train_dir -name "transcript_roman" -type d) || exit 1 +echo "# include common settings for fullLP systems." +echo ". conf/common.fullLP || exit 1;" +echo -e "\n" + +echo "#speech corpora files location" +echo "train_data_dir=$train_dir" +if [ -f "$lists/training.list" ] ; then + echo "train_data_list=$lists/training.list" +elif [ -f "$lists/train.FullLP.list" ] ; then + echo "train_data_list=$lists/train.FullLP.list" +else + echo >&2 "Training list $lists/training.list not found" +fi + +echo "train_nj=32" +echo -e "\n" + + +indusid=$(find $corpus -name "IARPA*-build" -type d) +[ -z $indusid ] && indusid=$(find $corpus \( -name "release-current" -o -name "release-current-b" \) -type d) +[ -z $indusid ] && echo >&2 "Didn't find anything that could be used as IndusDB id" && exit 1 + +indusid=$(basename ${indusid}) +indusid=${indusid%%-build} +dataset=dev10h +dev10h_dir=$(find $corpusdir -ipath "*/conversational/*" -name "dev" -type d) || exit 1 +indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) +if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB entry \"$indusid*dev\" not found -- removing the version and retrying" + indusid=${indusid%%-v*} + indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) + if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB entry \"$indusid*dev\" not found -- keeping only the language code and retrying" + indusid=${language%%-*} + indusdev10=$(find $indus/ -maxdepth 1 -name "*${indusid}*dev" -type d) + if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB configuration for the language code $indusid not found" + exit 1 + fi + fi +fi + +if [ -z "$indusdev10" ] ; then + echo "" +else + dev10h_rttm=$(find $indusdev10/ -name "*mitllfa3.rttm" ) + dev10h_ecf=$(find $indusdev10/ -name "*ecf.xml" ) + dev10h_stm=$(find $indusdev10/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indusdev10/ -name "*.kwlist.xml" | sort -V ) + kwlists2=$(find $indusdev10/ -name "*.kwlist?*.xml" | sort -V ) + kwlists="$kwlists1 $kwlists2" + dev10h_kwlists="$kwlists" +fi + +echo "#Radical reduced DEV corpora files location" +echo "dev2h_data_dir=$dev10h_dir" +echo "dev2h_data_list=$lists/dev.2h.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev2h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev2h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev2h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev2h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev2h_kwlists" +fi +echo "dev2h_nj=16" +echo "dev2h_subset_ecf=true" +echo -e "\n" + +echo "#Official DEV corpora files location" +echo "dev10h_data_dir=$dev10h_dir" +echo "dev10h_data_list=$lists/dev.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev10h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev10h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev10h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev10h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev10h_kwlists" +fi +echo "dev10h_nj=32" +echo -e "\n" + +dataset="eval" +eval_dir=$(find $corpus -ipath "*-eval/*/conversational/*" -name "$dataset" -type d) || exit 1 +[ -z "$eval_dir" ] && { eval_dir=$(find $corpusdir -ipath "*/conversational/*" -name "eval" -type d) || exit 1; } +if [ ! -z "$eval_dir" ] ; then + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + eval_ecf=$(find $indus/ -maxdepth 1 -type f -name "*$indusid*${dataset}.ecf.xml" ) + eval_kwlists1=$(find $indus -name "*$indusid*${dataset}.kwlist*.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*$indusid*${dataset}.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlists1 $kwlists2" + else + eval_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + eval_ecf=$(find $indus_set/ -name "*ecf.xml" ) + eval_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + eval_kwlists1=$(find $indus -name "*.kwlist.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlist1 $kwlist2" + fi + echo "#Official EVAL period evaluation data files" + echo "eval_data_dir=$eval_dir" + echo "eval_data_list=$lists/eval.list" + echo "${dataset}_ecf_file=$eval_ecf" + echo "${dataset}_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # ${dataset}_kwlists" + echo "eval_nj=32" + echo -e "\n" + + dataset=evalpart1 + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + echo >&2 "IndusDB entry \"$indusid*$dataset\" not found -- keeping only the language code and retrying" + indusid=${language%%-*} + indus_set=$(find $indus/ -maxdepth 1 -name "*${indusid}*$dataset" -type d) + if [ -z "$indus_set" ] ; then + echo >&2 "IndusDB configuration for the language code $indus_set not found" + fi + fi + if [ ! -z "$indus_set" ] ; then + evalpart1_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + evalpart1_ecf=$(find $indus_set/ -name "*ecf.xml" ) + evalpart1_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indus_set/ -name "*.kwlist.xml" | sort -V) + kwlists2=$(find $indus_set/ -name "*.kwlist?*.xml" | sort -V) + kwlists="$kwlists1 $kwlists2" + + kwlists="$dev10h_kwlists $eval_kwlists $kwlists" + echo "#Official post-EVAL period data files" + echo "${dataset}_data_dir=$eval_dir" + echo "${dataset}_data_list=$lists/${dataset}.list" + echo "${dataset}_rttm_file=$evalpart1_rttm" + echo "${dataset}_ecf_file=$evalpart1_ecf" + echo "${dataset}_stm_file=$evalpart1_stm" + echo "${dataset}_kwlists=(" + declare -A tmp_kwlists; + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + tmp_kwlists[$id]="$list" + done + + indices=$( + for id in "${!tmp_kwlists[@]}"; do + echo $id + done | sort -V | paste -s + ) + for id in $indices; do + echo " [$id]=${tmp_kwlists[$id]}" + done + echo ") # ${dataset}_kwlists" + echo "${dataset}_nj=32" + echo -e "\n" + fi + + dataset=shadow + echo "#Shadow data files" + echo "shadow_data_dir=(" + echo " $dev10h_dir" + echo " $eval_dir" + echo ") # shadow_data_dir" + echo "shadow_data_list=(" + echo " $lists/dev.list" + echo " $lists/eval.list" + echo ") # shadow_data_dir" + echo "shadow_ecf_file=$dev10h_ecf" + echo "shadow_rttm_file=$dev10h_rttm" + echo "shadow_stm_file=$dev10h_stm" + echo "shadow_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # shadow_kwlists" + echo "shadow_nj=32" + echo -e "\n" +fi + +dataset=untranscribed-training +unsup_dir=$(find $corpusdir -ipath "*/conversational/*" -name "$dataset" -type d) || exit 1 +unsup_list=$lists/untranscribed-training.list +[ ! -f $unsup_list ] && echo >&2 "Unsupervised training set not found $unsup_list" +if [ -f $unsup_list ] ; then + echo "#Unsupervised dataset for FullLP condition" + echo "unsup_data_dir=$unsup_dir" + echo "unsup_data_list=$unsup_list" + echo "unsup_nj=32" + echo -e "\n" +else + echo "#Unsupervised training set file ($unsup_list) not found." +fi + +lexicon=$(find $corpusdir -ipath "*/conversational/*" -name "lexicon.txt" -type f) || exit 1 +echo "lexicon_file=$lexicon" + +if [ ! -z "$train_rom_dir" ] ; then + echo "lexiconFlags=\"--romanized --oov \"" +fi +echo -e "\n\n" + + diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh new file mode 100755 index 00000000000..be6aa5c2b40 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh @@ -0,0 +1,205 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +language="201-haitian" +corpus=/export/babel/data/ +indus=/export/babel/data/scoring/IndusDB +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +corpus=$corpus/$language +lists=./conf/lists/$language/ + +corpusdir=$(find $corpus -maxdepth 1 -name "*-build" -type d) || exit 1 +[ -z "$corpusdir" ] && "Corpus directory for $language not found!" && exit 1 + +train_dir=$(find $corpusdir -ipath "*/conversational/*" -name "training" -type d) || exit 1 +[ -z "$train_dir" ] && "Corpus directory $corpusdir/*/training/ not found!" && exit 1 + +train_rom_dir=$(find $train_dir -name "transcript_roman" -type d) || exit 1 +echo "# include common settings for fullLP systems." +echo ". conf/common.limitedLP || exit 1;" +echo -e "\n" + +echo "#speech corpora files location" +echo "train_data_dir=$train_dir" +echo "train_data_list=$lists/sub-train.list" +echo "train_nj=32" +echo -e "\n" + + +indusid=$(find $corpus -name "IARPA*-build" -type d) +indusid=$(basename ${indusid}) +indusid=${indusid%%-build} +dataset=dev10h +dev10h_dir=$(find $corpusdir -ipath "*/conversational/*" -name "dev" -type d) || exit 1 +indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) +if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB entry \"$indusid*dev\" not found -- removing the version and retrying" + indusid=${indusid%%-v*} + indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) +fi + +if [ -z "$indusdev10" ] ; then + echo "" +else + dev10h_rttm=$(find $indusdev10/ -name "*mitllfa3.rttm" ) + dev10h_ecf=$(find $indusdev10/ -name "*ecf.xml" ) + dev10h_stm=$(find $indusdev10/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indusdev10/ -name "*.kwlist.xml" | sort -V ) + kwlists2=$(find $indusdev10/ -name "*.kwlist?*.xml" | sort -V ) + kwlists="$kwlists1 $kwlists2" +fi + +echo "#Radical reduced DEV corpora files location" +echo "dev2h_data_dir=$dev10h_dir" +echo "dev2h_data_list=$lists/dev.2h.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev2h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev2h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev2h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev2h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev2h_kwlists" +fi +echo "dev2h_nj=16" +echo "dev2h_subset_ecf=true" +echo -e "\n" + +echo "#Official DEV corpora files location" +echo "dev10h_data_dir=$dev10h_dir" +echo "dev10h_data_list=$lists/dev.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev10h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev10h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev10h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev10h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev10h_kwlists" +fi +echo "dev10h_nj=32" +echo -e "\n" + +dataset="eval" +eval_dir=$(find $corpus -ipath "*-eval/*/conversational/*" -name "$dataset" -type d) || exit 1 +if [ ! -z "$eval_dir" ] ; then + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + eval_ecf=$(find $indus/ -maxdepth 1 -type f -name "*$indusid*${dataset}.ecf.xml" ) + eval_kwlists1=$(find $indus -name "*$indusid*${dataset}.kwlist*.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*$indusid*${dataset}.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlists1 $kwlists2" + else + eval_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + eval_ecf=$(find $indus_set/ -name "*ecf.xml" ) + eval_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + eval_kwlists1=$(find $indus -name "*.kwlist.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlist1 $kwlist2" + fi + echo "#Official EVAL period evaluation data files" + echo "eval_data_dir=$eval_dir" + echo "eval_data_list=$lists/eval.list" + echo "${dataset}_ecf_file=$eval_ecf" + echo "${dataset}_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # ${dataset}_kwlists" + echo "eval_nj=32" + echo -e "\n" + + dataset=evalpart1 + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + echo "" + else + evalpart1_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + evalpart1_ecf=$(find $indus_set/ -name "*ecf.xml" ) + evalpart1_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indus_set/ -name "*.kwlist.xml" | sort -V) + kwlists2=$(find $indus_set/ -name "*.kwlist?*.xml" | sort -V) + kwlists="$kwlists1 $kwlists2" + fi + echo "#Official post-EVAL period data files" + echo "${dataset}_data_dir=$eval_dir" + echo "${dataset}_data_list=$lists/${dataset}.list" + echo "${dataset}_rttm_file=$evalpart1_rttm" + echo "${dataset}_ecf_file=$evalpart1_ecf" + echo "${dataset}_stm_file=$evalpart1_stm" + echo "${dataset}_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # ${dataset}_kwlists" + echo "${dataset}_nj=32" + echo -e "\n" + + + dataset=shadow + echo "#Shadow data files" + echo "shadow_data_dir=(" + echo " $dev10h_dir" + echo " $eval_dir" + echo ") # shadow_data_dir" + echo "shadow_data_list=(" + echo " $lists/dev.list" + echo " $lists/eval.lists" + echo ") # shadow_data_dir" + echo "shadow_ecf_file=$dev10h_ecf" + echo "shadow_rttm_file=$dev10h_rttm" + echo "shadow_stm_file=$dev10h_stm" + echo "shadow_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # shadow_kwlists" + echo "shadow_nj=32" + echo -e "\n" +fi + +dataset=untranscribed-training +unsup_dir=$(find $corpusdir -ipath "*/conversational/*" -name "$dataset" -type d) || exit 1 +unsup_lists=( $lists/untranscribed-training.list $lists/sub-train.untranscribed.list) +unsup_dirs=( $unsup_dir $train_dir ) +echo "#Unsupervised dataset for LimitedLP condition" +echo "unsup_data_list=(" +for list in ${unsup_lists[*]}; do + [ ! -f $list ] && echo "Unsupervised training set not found $list" + echo " $list"; +done +echo ") # unsup_data_list" + +echo "unsup_data_dir=(" +for dir in ${unsup_dirs[*]}; do + [ ! -d $dir ] && echo "Unsupervised training data dir not found $dir" + echo " $dir"; +done +echo ") # unsup_data_dir" + +echo "unsup_nj=32" +echo -e "\n" + +lexicon=$(find $corpusdir -ipath "*/conversational/*" -name "lexicon.sub-train.txt" -type f) || exit 1 +echo "lexicon_file=$lexicon" + +if [ ! -z "$train_rom_dir" ] ; then + echo "lexiconFlags=\"--romanized --oov \"" +fi +echo -e "\n\n" + + diff --git a/egs/babel/s5d/local/nist_eval/export_systems.sh b/egs/babel/s5d/local/nist_eval/export_systems.sh new file mode 100755 index 00000000000..d0af608416c --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/export_systems.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -e +set -o pipefail + +. ./cmd.sh; . ./path.sh; + + +#( +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* +#) & +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/tri6*_nnet*/decode_shadow.uem* +#wait + +( +bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +) & +bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem +wait + +wait +exit + +bash make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --data data/shadow.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp/tri6b_nnet --data data/shadow.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --data data/shadow.uem --master dev10h.uem lang.conf ./release + +bash make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --extrasys "NEWJHU" --data data/dev10h.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp/tri6b_nnet --extrasys "NEWJHU" --data data/dev10h.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --extrasys "NEWJHU" --data data/dev10h.uem --master dev10h.uem lang.conf ./release + + diff --git a/egs/babel/s5d/local/nist_eval/filter_data.sh b/egs/babel/s5d/local/nist_eval/filter_data.sh new file mode 100755 index 00000000000..143102032c2 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/filter_data.sh @@ -0,0 +1,152 @@ +. ./path.sh + +min_lmwt=5 +max_lmwt=25 +cer=0 +nbest=-1 +cmd=run.pl +ntrue_from= +. ./utils/parse_options.sh + +min_lmwt_start=$min_lmwt +max_lmwt_start=$max_lmwt + +datadir=$1; shift +name=$1; shift +. ./lang.conf + +set -e +set -o pipefail + +[ ! -d $datadir/compounds/$name ] && echo "Component called $name does not exist in $datadir/compounds/" && exit 1 +ecf=$datadir/compounds/$name/ecf.xml +cat $ecf | grep -P -o '(?<=audio_filename\=")[^"]*' > $datadir/compounds/$name/files.list +filelist=$datadir/compounds/$name/files.list +[ -f $datadir/compounds/$name/rttm ] && rttm=$datadir/compounds/$name/rttm +[ -f $datadir/compounds/$name/stm ] && stm=$datadir/compounds/$name/stm + +if [ -f $ecf ] ; then + duration=`head -1 $ecf |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2.0;}'` + echo "INFO: Using duration $duration seconds (from ECF)." +else + echo "WARNING: Using default duration. ECF wasn't specified?" + duration=9999 +fi + +inputname=`basename $datadir` +outputname=$name + +while (( "$#" )); do + resultdir=$1;shift + echo "Processing data directory $resultdir" + + [ ! -d $resultdir ] && echo "Decode dir $resultdir does not exist!" && exit 1; + + targetdir=$resultdir/$outputname + + + min_existing= + max_existing= + for lmw in `seq $min_lmwt_start $max_lmwt_start`; do + [ -d $resultdir/score_$lmw ] && [ -z $min_existing ] && min_existing=$lmw + [ -d $resultdir/score_$lmw ] && [ ! -z $min_existing ] && max_existing=$lmw + done + if [ -z $min_existing ] || [ -z $max_existing ] ; then + for lmw in `seq $min_lmwt_start $max_lmwt_start`; do + [ -d $resultdir/kwset_kwlist_$lmw ] && [ -z $min_existing ] && min_existing=$lmw + [ -d $resultdir/kwset_kwlist_$lmw ] && [ ! -z $min_existing ] && max_existing=$lmw + done + fi + [ -z $min_existing ] && echo "Data directories to be scored could not be found!" && exit 1 + [ -z $max_existing ] && echo "Data directories to be scored could not be found!" && exit 1 + min_lmwt=$min_existing + max_lmwt=$max_existing + echo "Found data directories for range LMWT=$min_lmwt:$max_lmwt" + + if [ -d $resultdir/score_${min_lmwt} ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $targetdir/scoring/filter.LMWT.log \ + set -e';' set -o pipefail';' \ + mkdir -p $targetdir/score_LMWT/';'\ + test -f $resultdir/score_LMWT/$inputname.ctm '&&' \ + utils/filter_scp.pl $filelist $resultdir/score_LMWT/$inputname.ctm '>' \ + $targetdir/score_LMWT/$outputname.ctm || exit 1 + + if [ ! -z $stm ] && [ -f $stm ] ; then + echo "For scoring CTMs, this STM is used $stm" + local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt --cer $cer --cmd "$cmd" $datadir/compounds/$name data/lang $targetdir + else + echo "Not running scoring, $datadir/compounds/$name/stm does not exist" + fi + fi + + + kws_tasks="" + + for kws in $datadir/kwset_*; do + kws=`basename $kws` + echo $kws + kws_tasks+=" $kws" + done + + for kws in $kws_tasks ; do + echo "Processing KWS task: $kws" + mkdir -p $targetdir/$kws + + echo -e "\tFiltering... $kws LMWT=$min_lmwt:$max_lmwt" + + indices_dir=$resultdir/kws_indices + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + kwsoutput=${targetdir}/${kws}_${lmwt} + indices=${indices_dir}_$lmwt + nj=$(cat $indices/num_jobs) + + # This is a memory-efficient way how to do the filtration + # we do this in this way because the result.* files can be fairly big + # and we do not want to run into troubles with memory + files="" + for job in $(seq 1 $nj); do + if [ -f $resultdir/${kws}_${lmwt}/result.${job}.gz ] ; then + files="$files <(gunzip -c $resultdir/${kws}_${lmwt}/result.${job}.gz)" + elif [ -f $resultdir/${kws}_${lmwt}/result.${job} ] ; then + files="$files $resultdir/${kws}_${lmwt}/result.${job} " + else + echo >&2 "The file $resultdir/${$kws}_${lmwt}/result.${job}[.gz] does not exist" + exit 1 + fi + done + # we have to call it using eval as we need the bash to interpret + # the (possible) command substitution in case of gz files + # bash -c would probably work as well, but would spawn another + # shell instance + echo $kwsoutput + echo $datadir/compounds/$name/utterances + mkdir -p $kwsoutput + eval "sort -m -u $files" |\ + int2sym.pl -f 2 $datadir/$kws/utt.map | \ + utils/filter_scp.pl -f 2 $datadir/compounds/$name/utterances |\ + sym2int.pl -f 2 $datadir/$kws/utt.map |\ + local/search/filter_kws_results.pl --likes --nbest $nbest > $kwsoutput/results || exit 1 + done + + ntrue_from_args="" + if [ ! -z "$ntrue_from" ]; then + echo "Using $resultdir/$ntrue_from/$kws for NTRUE" + ntrue_from_args=" --ntrue-from $resultdir/$ntrue_from/$kws" + fi + if [ ! -z $rttm ] ; then + local/search/score.sh --cmd "$cmd" --extraid ${kws##kwset_}\ + --min-lmwt $min_lmwt --max-lmwt $max_lmwt $ntrue_from_args \ + data/lang $datadir/compounds/$name ${targetdir}/${kws} || exit 1; + elif [ ! -z $ntrue_from ] ; then + local/search/normalize.sh --cmd "$cmd" --extraid ${kws##kwset_}\ + --min-lmwt $min_lmwt --max-lmwt $max_lmwt $ntrue_from_args \ + data/lang $datadir/compounds/$name ${targetdir}/${kws} || exit 1; + else + echo >&2 "Cannot score and don't know which compound set to use to inherit the config" + exit 1 + fi + done + +done diff --git a/egs/babel/s5d/local/nist_eval/get_training_times.sh b/egs/babel/s5d/local/nist_eval/get_training_times.sh new file mode 100755 index 00000000000..f5b0012c2f2 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/get_training_times.sh @@ -0,0 +1,229 @@ +if [ -z $1 ] ; then + dir=`pwd` +else + dir=$1 +fi +echo $dir + + +convertsecs() { + h=$(($1/3600)) + m=$((($1/60)%60)) + s=$(($1%60)) + printf "%02d:%02d:%02d\n" $h $m $s +} + +function process { + count=1 + if [ ! -z $1 ]; then + count=$1 + fi + + replace="" + for a in `seq 1 $count` ; do + replace+="\t" + done + + ( + eval `grep "group=all"` + echo -n "threads=$total_threads" + echo -n " cpu_time=$total_cpu_time wall_time=$clock_time" + echo -n " human_cpu_time="`convertsecs $total_cpu_time` + echo -n " human_wall_time="`convertsecs $clock_time` + echo "" + ) | sed 's/^/'$replace'/g' +} + +function legend { + echo -ne '"'"$@"'" ' +} + +legend Parameterization dev/train +local/summarize_logs.pl $dir/exp/make_*/*train*/ | process + +if [ -d $dir/data/local/extend ] ; then + legend "Extending the lexicon" + local/summarize_logs.pl $dir/data/local/extend/tmp/log | process +fi + +legend "Training upto stage tri5" +local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process + +legend "SGMM2 stage training" +local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log | process + +legend "SGMM2+bMMI stage training" +local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process + +nnet=tri6_nnet +[ ! -d $dir/exp/$nnet ] && nnet=tri6b_nnet + +legend "DNN stage training GPU" +local/summarize_logs.pl $dir/exp/$nnet/log | process + +legend "BNF stage training" +local/summarize_logs.pl $dir/exp_bnf/tri6_bnf/log | process + +legend "BNF stage training GPU" +local/summarize_logs.pl $dir/exp_bnf/tri{5,6}/log $dir/exp_bnf/sgmm7*/log \ + $dir/exp_bnf/sgmm7_denlats/log/* $dir/exp_bnf/ubm7 | process + +legend "SEGMENTATION TRAINING: " +local/summarize_logs.pl $dir/exp/tri4_train_seg_ali/log \ + $dir/exp/make_plp_pitch/train_seg/ \ + $dir/exp/tri4b_seg/log | process + +semisup=exp_bnf_semisup2 +if [ -d $dir/param_bnf_semisup ] || [ -d $dir/param_bnf_semisup2 ] ; then + [ ! -d $dir/$semisup ] && semisup=exp_bnf_semisup + + decode=unsup.seg + legend "BNF_SEMISUP training, segmentation " + local/summarize_logs.pl $dir/exp/make_seg/$decode/log \ + $dir/exp/make_seg/$decode/make_plp/ \ + $dir/exp/tri4b_seg/decode_${decode}/log \ + $dir/exp/make_plp/$decode | process + + legend "BNF_SEMISUP training, ecode unsup.seg TRI5 " + local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process + legend "BNF_SEMISUP training, ecode unsup.seg PLP " + local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process + legend "BNF_SEMISUP training, ecode unsup.seg DNN " + local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process + legend "BNF_SEMISUP training, data preparation for BNF_SEMISUP " + local/summarize_logs.pl $dir/exp/combine2_post/unsup.seg/log \ + $dir/exp/combine2_post/unsup.seg/decode_unsup.seg/log\ + $dir/exp/tri6_nnet_ali/log | process + + legend "BNF_SEMISUP training, TRAIN BNF_SEMISUP BNF GPU " + local/summarize_logs.pl $dir/$semisup/tri6_bnf/log | process + legend "BNF_SEMISUP training, TRAIN BNF_SEMISUP BNF " + local/summarize_logs.pl $dir/$semisup/tri{5,6}/log $dir/exp_bnf/sgmm7*/log \ + $dir/exp_bnf/sgmm7_denlats/log/* $dir/exp_bnf/ubm7 | process +fi + +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "DNN_MPE stage CPU training" + local/summarize_logs.pl $dir/exp/tri6_nnet_ali/log/ \ + $dir/exp/tri6_nnet_denlats/log/* | process + + legend "DNN_MPE stage GPU training" + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/log/ | process +fi + +#~decode=dev10h.seg +#~legend "DEV10H.SEG decoding" +#~legend "Segmentation: " +#~local/summarize_logs.pl $dir/exp/make_seg/$decode/log \ +#~ $dir/exp/make_seg/$decode/make_plp/ \ +#~ $dir/exp/tri4b_seg/decode_${decode}/log \ +#~ $dir/exp/make_plp/$decode | process +#~legend "Decode $decode TRI5: " +#~local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process +#~legend "Decode $decode PLP: " +#~local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process +#~legend "Decode $decode DNN: " +#~local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process +#~legend "Decode $decode PLP: " +#~local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process + +legend "G2P and confusion matrix: " +local/summarize_logs.pl $dir/exp/conf_matrix/log $dir/exp/g2p/log | process +if [ -d $dir/data/shadow2.uem ]; then + decode=shadow2.uem +else + decode=shadow.uem +fi + +legend "Segmentation $decode: provided..." +echo +#--legend "Segmentation: " +#--local/summarize_logs.pl $dir/exp/make_seg/$decode/log \ +#-- $dir/exp/make_seg/$decode/make_plp/ \ +#-- $dir/exp/tri4b_seg/decode_${decode}/log \ +#-- $dir/exp/make_plp/$decode | process +legend "Parametrization: " +local/summarize_logs.pl $dir/exp/make_plp/$decode | process +legend "Decode $decode TRI5: " +local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process +legend "Decode $decode PLP: " +local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process +legend "Decode $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process +legend "Decode $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/{tri6,sgmm7,sgmm7_mmi_b0.1}/decode_*${decode}*/log | process +if [ -d $dir/$semisup ] ; then + legend "Decode $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/{tri6,sgmm7,sgmm7_mmi_b0.1}/decode_*${decode}*/log | process +fi +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Decode $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/log | process +fi + +legend "Indexing $decode PLP: " +local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process +legend "Indexing $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/kws_indices*/log | process +legend "Indexing $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process +if [ -d $dir/$semisup ] ; then + legend "Indexing $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process +fi +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Indexing $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/kws_indices*/log | process +fi + +legend "Search $decode PLP: " +local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_kws \ + $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_kws_*/log | process +legend "Search $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/evalKW_kws \ + $dir/exp/$nnet/decode_*${decode}*/evalKW_kws_*/log | process +legend "Search $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws \ + $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws_*/log | process +if [ -d $dir/$semisup ] ; then + legend "Search $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws/ \ + $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws*/log | process +fi +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Search $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_kws \ + $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_kws*/log | process +fi + +legend "Proxies generation: " +local/summarize_logs.pl $dir/data/$decode/evalKW_oov_kws/g2p/log \ + $dir/data/$decode/evalKW_oov_kws/tmp/split/log | process +legend "Search $decode PLP: " +local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws \ + $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws_*/log | process +legend "Search $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/evalKW_oov_kws \ + $dir/exp/$nnet/decode_*${decode}*/evalKW_oov_kws_*/log | process +legend "Search $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws \ + $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws_*/log | process + +if [ -d $dir/$semisup ] ; then + legend "Search $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws/ \ + $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws*/log | process +fi + + +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Search $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_oov_kws \ + $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_oov_kws*/log | process +fi + + + + + + diff --git a/egs/babel/s5d/local/nist_eval/make_release.sh b/egs/babel/s5d/local/nist_eval/make_release.sh new file mode 100755 index 00000000000..aff89f92846 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/make_release.sh @@ -0,0 +1,356 @@ +#!/bin/bash + +team=RADICAL +corpusid= +partition= +scase=BaEval #BaDev|BaEval +master= +version=1 +sysid= +prim=c +cer=0 +dryrun=true +dir="exp/sgmm5_mmi_b0.1/" +data=data/dev10h.seg +master=dev10h +extrasys= +final=false + +#end of configuration + + +echo $0 " " "$@" + +[ -f ./cmd.sh ] && . ./cmd.sh +[ -f ./path.sh ] && . ./path.sh +. ./utils/parse_options.sh + +. $1 +outputdir=$2 + +set -e +set -o pipefail + +function submit_to_google { + SYSPATH=$1 + #curl 'https://docs.google.com/forms/d/1MV4gf-iVOX79ZEAekEiLIo7L_UVrJnoPjdtICK5F-nc/formResponse' \ + # --data 'entry.1721972547='$MTWV'&entry.485509816='$ATWV'&entry.694031153='$RESPATH'&entry.1851048707='$(whoami)'&submit=Submit' \ + # --compressed + curl -sS 'https://docs.google.com/forms/d/1MV4gf-iVOX79ZEAekEiLIo7L_UVrJnoPjdtICK5F-nc/formResponse' \ + --data 'entry.1721972547='$MTWV'&entry.485509816='$ATWV'&entry.694031153='$SYSPATH'&entry.1851048707='$(whoami)'&entry.880350279='$STWV'&entry.60995624='$OTWV'&entry.1338769660='$LatticeRecall'&entry.1333349334='$THRESHOLD'&entry.1423358838='$(pwd)'&submit=Submit' --compressed |\ + grep --color "Your response has been recorded." || return 1 + return 0 +} + +function export_file { + #set -x + source_file=$1 + target_file=$2 + if [ ! -f $source_file ] ; then + echo "The file $source_file does not exist!" + exit 1 + else + if [ ! -f $target_file ] ; then + if ! $dryrun ; then + ln -s `readlink -f $source_file` $target_file || exit 1 + ls -al $target_file + else + echo "$source_file -> $target_file" + fi + + else + echo "The file is already there, not doing anything. Either change the version (using --version), or delete that file manually)" + exit 1 + fi + fi + #set +x + return 0 +} + +function export_kws_file { + source_xml=$1 + fixed_xml=$2 + kwlist=$3 + export_xml=$4 + + echo "Exporting KWS $source_xml as `basename $export_xml`" + if [ -f $source_xml ] ; then + cp $source_xml $fixed_xml.bak + fdate=`stat --printf='%y' $source_xml` + echo "The source file $source_xml has timestamp of $fdate" + echo "Authorizing empty terms from `basename $kwlist`..." + if ! $dryrun ; then + local/fix_kwslist.pl $kwlist $source_xml $fixed_xml || exit 1 + else + fixed_xml=$source_xml + fi + echo "Exporting...export_file $fixed_xml $export_xml " + export_file $fixed_xml $export_xml || exit 1 + else + echo "The file $source_xml does not exist. Exiting..." + exit 1 + fi + echo "Export done successfully..." + return 0 +} + +function find_best_kws_result { + local dir=$1 + local mask=$2 + local record=`(find $dir -name "sum.txt" -path "$mask" -not -ipath "*rescored*" | xargs grep "^| *Occ") | cut -f 1,13,17 -d '|' | sed 's/|//g' | column -t | sort -r -n -k 3 | head -n 1` + echo $record >&2 + local file=`echo $record | awk -F ":" '{print $1}'` + #echo $file >&2 + local path=`dirname $file` + #echo $path >&2 + echo $path +} + +function find_best_stt_result { + local dir=$1 + local mask=$2 + local record=`(find $dir -name "*.ctm.sys" -path "$mask" -not -ipath "*rescore*" | xargs grep Avg) | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1` + + echo $record >&2 + local file=`echo $record | awk -F ":" '{print $1}'` + #echo $file >&2 + local path=`dirname $file` + #echo $path >&2 + echo $path +} + +function create_sysid { + local best_one=$1 + local sysid= + local taskid=`basename $best_one` + local system_path=`dirname $best_one` + if [[ $system_path =~ .*sgmm5.* ]] ; then + sysid=PLP + elif [[ $system_path =~ .*nnet.* ]] ; then + sysid=DNN + elif [[ $system_path =~ .*sgmm7.* ]] ; then + sysid=BNF + elif [[ $system_path =~ .*4way.* ]] ; then + sysid=4way-comb + else + echo "Unknown system path ($system_path), cannot deduce the systemID" >&2 + exit 1 + fi + if [[ $taskid == *kws_* ]] ; then + local kwsid=${taskid//kws_*/} + kwsid=${kwsid//_/} + if [ -z $kwsid ]; then + echo ${sysid} + else + echo ${sysid}-$kwsid + fi + else + echo ${sysid} + fi +} + + +function get_ecf_name { + local best_one=$1 + local taskid=`basename $best_one` + local kwstask=${taskid//kws_*/kws} + local kwlist= + #echo $kwstask + if [ -z $kwstask ] ; then + #echo $data/kws/kwlist.xml + kwlist= `readlink -f $data/kws/kwlist.xml` + else + #echo $data/$kwstask/kwlist.xml + kwlist=`readlink -f $data/$kwstask/kwlist.xml` + fi + ecf=`head -n 1 $kwlist | grep -Po "(?<=ecf_filename=\")[^\"]*"` + echo -e "\tFound ECF: $ecf" >&2 + echo $ecf + return 0 +} + +function compose_expid { + local task=$1 + local best_one=$2 + local extraid=$3 + echo "TASK: $task" >&2 + echo "BEST ONE: $best_one" >&2 + echo "EXTRA ID: $extraid" >&2 + + [ ! -z $extraid ] && extraid="-$extraid" + local sysid=`create_sysid $best_one` + echo "SYS ID: $sysid" >&2 + if [ "$task" == "KWS" ]; then + ext="kwslist.xml" + elif [ "$task" == "STT" ]; then + ext="ctm" + else + echo "Incorrect task ID ($task) given to compose_expid function!" >&2 + exit 1 + fi + echo "${corpusid}" >&2 + echo "${partition}" >&2 + echo "${scase}" >&2 + echo "KWS14_${team}_${corpusid}_${partition}_${scase}_${task}_${prim}-${sysid}${extraid}_$version.$ext" + return 0 +} + +function figure_out_scase { + local ecf=`basename $1` + if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then + local basnam=${ecf%%.ecf.xml} + local scase=`echo $basnam | awk -F _ '{print $2}'` + + if [[ $scase =~ conv-dev(\..*)? ]]; then + echo "BaDev" + elif [[ $scase =~ conv-eval(\..*)? ]]; then + echo "BaEval" + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "WARNING: Does not contain conv-dev|conv-eval ($scase)" >&2 + echo "BaDev" + return 1 + fi + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "WARNING: Does not match the mask IARPA-babel.*.ecf.xml" >&2 + echo "BaDev" + return 1 + fi + return 0 +} + +function figure_out_partition { + local ecf=`basename $1` + if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then + local basnam=${ecf%%.ecf.xml} + local scase=`echo $basnam | awk -F _ '{print $2}'` + + if [[ $scase =~ conv-dev(\..*)? ]]; then + echo "conv-dev" + elif [[ $scase =~ conv-eval(\..*)? ]]; then + echo "conv-eval" + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "conv-dev" + return 1 + fi + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "conv-dev" + return 1 + fi + return 0 +} + +function figure_out_corpusid { + local ecf=`basename $1` + if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then + local basnam=${ecf%%.ecf.xml} + local corpusid=`echo $basnam | awk -F _ '{print $1}'` + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + local corpusid=${ecf%%.*} + fi + echo $corpusid +} + +mkdir -p $outputdir +extrasys_unnorm="unnorm" +if [ ! -z $extrasys ] ; then + extrasys_unnorm="${extrasys}-unnorm" +fi + +#data=data/shadow.uem +dirid=`basename $data` +kws_tasks="kws " +[ -f $data/extra_kws_tasks ] && kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` +[ -d $data/compounds ] && compounds=`ls $data/compounds` + +if [ -z "$compounds" ] ; then + for kws in $kws_tasks ; do + echo $kws + best_one=`find_best_kws_result "$dir/decode_*${dirid}*/${kws}_*" "*"` + sysid=`create_sysid $best_one` + ecf=`get_ecf_name $best_one` + scase=`figure_out_scase $ecf` || break + partition=`figure_out_partition $ecf` || break + corpusid=`figure_out_corpusid $ecf` + + expid=`compose_expid KWS $best_one "$extrasys"` + echo -e "\tEXPORT NORMALIZED as: $expid" + expid_unnormalized=`compose_expid KWS $best_one "$extrasys_unnorm"` + echo -e "\tEXPORT UNNORMALIZED as: $expid_unnormalized" + + export_kws_file $best_one/kwslist.xml $best_one/kwslist.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid + export_kws_file $best_one/kwslist.unnormalized.xml $best_one/kwslist.unnormalized.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid_unnormalized + done +else + [ -z $master ] && echo "You must choose the master compound (--master ) for compound data set" && exit 1 + for kws in $kws_tasks ; do + echo $kws + best_one=`find_best_kws_result "$dir/decode_*${dirid}*/$master/${kws}_*" "*"` + ( + eval "`cat $best_one/metrics.txt | sed 's/ *= */=/g' | sed 's/,/;/g' | sed 's/Lattice Recall/LatticeRecall/g' `" + submit_to_google $best_one $ATWV $MTWV + ) || echo "Submission failed!" + + + for compound in $compounds ; do + compound_best_one=`echo $best_one | sed "s:$master/${kws}_:$compound/${kws}_:g"` + echo "From ($kws) $best_one going to $compound_best_one" + echo -e "\tPREPARE EXPORT: $compound_best_one" + sysid=`create_sysid $compound_best_one` + #ecf=`get_ecf_name $best_one` + ecf=`readlink -f $data/compounds/$compound/ecf.xml` + scase=`figure_out_scase $ecf` + partition=`figure_out_partition $ecf` + corpusid=`figure_out_corpusid $ecf` + expid=`compose_expid KWS $compound_best_one "$extrasys"` + echo -e "\tEXPORT NORMALIZED as: $expid" + expid_unnormalized=`compose_expid KWS $compound_best_one "$extrasys_unnorm"` + echo -e "\tEXPORT UNNORMALIZED as: $expid_unnormalized" + + export_kws_file $compound_best_one/kwslist.xml $compound_best_one/kwslist.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid + export_kws_file $compound_best_one/kwslist.unnormalized.xml $compound_best_one/kwslist.unnormalized.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid_unnormalized + done + done +fi + +##EXporting STT -- more straightforward, because there is only one task +if [ -z "$compounds" ] ; then + #best_one=`find_best_stt_result "$dir/decode_*${dirid}*/score_*" "*"` + best_one=`find_best_stt_result "$dir/*${dirid}*/score_*" "*"` + echo -e "\tERROR: I don't know how to do this, yet" + ecf=`get_ecf_name kws` + sysid=`create_sysid $best_one` + scase=`figure_out_scase $ecf` || break + partition=`figure_out_partition $ecf` + corpusid=`figure_out_corpusid $ecf` + expid=`compose_expid STT $best_one "$extrasys"` + echo -e "\tEXPORT NORMALIZED as: $expid" + export_file $best_one/${dirid}.ctm $outputdir/$expid +else + [ -z $master ] && echo "You must choose the master compound (--master ) for compound data set" && exit 1 + #best_one=`find_best_stt_result "$dir/decode_*${dirid}*/$master/score_*" "*"` + best_one=`find_best_stt_result "$dir/*${dirid}*/$master/score_*" "*"` + + for compound in $compounds ; do + compound_best_one=`echo $best_one | sed "s:$master/score_:$compound/score_:g"` + echo -e "\tPREPARE EXPORT: $compound_best_one" + sysid=`create_sysid $compound_best_one` + #ecf=`get_ecf_name $best_one` + ecf=`readlink -f $data/compounds/$compound/ecf.xml` + scase=`figure_out_scase $ecf` + partition=`figure_out_partition $ecf` + corpusid=`figure_out_corpusid $ecf` + expid=`compose_expid STT $compound_best_one $extrasys` + echo -e "\tEXPORT NORMALIZED as: $expid" + + export_file $compound_best_one/${compound}.ctm $outputdir/$expid + done +fi + +echo "Everything looks fine, good luck!" +exit 0 + diff --git a/egs/babel/s5d/local/nist_eval/split_compound_set.sh b/egs/babel/s5d/local/nist_eval/split_compound_set.sh new file mode 100755 index 00000000000..59ea4c162d7 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/split_compound_set.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh; + +devset=dev10h.pem +evalset=eval.seg +cmd="$decode_cmd" + + +rootdir=exp/nnet3/lstm_bidirectional_sp/decode_shadow.seg +combinedir=exp/combine/lstm_bidirectional_sp/shadow.seg + +[ ! -d data/shadow.seg/compounds/$devset ] && \ + echo >&2 "data/shadow.seg/compounds/$devset does not exist!" && exit 1 +[ ! -d data/shadow.seg/compounds/$evalset ] && \ + echo >&2 "data/shadow.seg/compounds/$evalset does not exist!" && exit 1 + +for decode in $rootdir/{,phones,syllabs}; do + [ ! -d $decode ] && \ + echo >&2 "$decode does not exist!" && exit 1 + local/nist_eval/filter_data.sh \ + data/shadow.seg ${devset} $decode + local/nist_eval/filter_data.sh --ntrue-from ${devset} \ + data/shadow.seg ${evalset} $decode +done + + + +for kwset in data/shadow.seg/compounds/$devset/kwset_* ; do + kwsetdir=$(basename $kwset) + kwsetid=${kwsetdir#*_} + + echo "Processing kwset id=$kwsetid" + local/search/combine.sh --extraid "$kwsetid" --cmd "$cmd" \ + data/shadow.seg/compounds/${devset}/ data/langp_test \ + $rootdir/{,syllabs/,phones/}${devset}/${kwsetdir} $combinedir/${devset} + + local/search/combine_special.sh --extraid "$kwsetid" --cmd "$cmd" \ + data/shadow.seg/compounds/${evalset}/ data/langp_test \ + $combinedir/${devset}/${kwsetdir}/ \ + $rootdir/{,syllabs/,phones/}${evalset}/${kwsetdir} $combinedir/${evalset} +done + + + + diff --git a/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh new file mode 100755 index 00000000000..3b12222e13a --- /dev/null +++ b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh @@ -0,0 +1,374 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2014 Vimal Manohar +# Apache 2.0. + +# This script, which will generally be called from other neural-net training +# scripts, extracts the training examples used to train the neural net (and also +# the validation examples used for diagnostics), and puts them in separate archives. +# This is similar to the script steps/nnet2/get_egs.sh, but this also extracts +# frames from unsupervsied data. Decode directory for unsupervised data which +# has the best path done along with posteriors (can be done using local/combine_posteriors.sh) + +set -o pipefail + +# Begin configuration section. +cmd=run.pl +feat_type= +num_utts_subset=300 # number of utterances in validation and training + # subsets used for shrinkage and diagnostics +num_valid_frames_combine=0 # #valid frames for combination weights at the very end. +num_train_frames_combine=10000 # # train frames for the above. +num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs +samples_per_iter=400000 # each iteration of training, see this many samples + # per job. This is just a guideline; it will pick a number + # that divides the number of samples in the entire data. +transform_dir_sup= # If supplied, overrides alidir +transform_dir_unsup= +num_jobs_nnet=16 # Number of neural net jobs to run in parallel +stage=-10 +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. +splice_width=4 # meaning +- 4 frames on each side for second LDA +spk_vecs_dir_sup= +spk_vecs_dir_unsup= +random_copy=false +weight_threshold=0.7 # Threshold on confidence factor of an unsupervised data + # frame for it to not be ignored +supervised_copies=3 # Make x copies of supervised data. +use_frame_selection=true +use_frame_weights=false # TODO: Not coded + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + + +if [ $# != 6 ]; then + echo "Usage: steps/nnet2/get_egs_semi_supervised.sh [opts] " + echo " e.g.: steps/nnet2/get_egs_semi_supervised.sh data/train data/train_unt data/lang exp/tri3_ali exp/tri3/dev_unt exp/tri4_nnet" + echo "" + echo "Main options (for others, see top of script file)" + echo " --config # config file containing options" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --num-jobs-nnet # Number of parallel jobs to use for main neural net" + echo " # training (will affect results as well as speed; try 8, 16)" + echo " # Note: if you increase this, you may want to also increase" + echo " # the learning rate." + echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" + echo " # process." + echo " --splice-width # Number of frames on each side to append for feature input" + echo " # (note: we splice processed, typically 40-dimensional frames" + echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics" + echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the" + echo " # very end." + echo " --stage # Used to run a partially-completed training process from somewhere in" + echo " # the middle." + echo " --weight-threshold # Threshold on the confidence factor below which the " + echo " # a frame of unsupervised training data is ignored." + echo " --supervised-copies <#copies|3> # Make copies of supervised data" + echo " --transform-dir-sup # Directory with transforms for supervised training data" + echo " --transform-dir-unsup # Directory with transforms for unsupervised training data" + + exit 1; +fi + +data_sup=$1 +data_unsup=$2 +lang=$3 +alidir=$4 +latdir=$5 +dir=$6 + +# Check some files. +for f in $data_sup/feats.scp $data_unsup/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $latdir/best_path_ali.1.gz $latdir/weights.1.gz $latdir/../final.mdl $latdir/../tree; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; +done + +# Set some variables. +oov=`cat $lang/oov.int` +num_leaves=`tree-info $alidir/tree 2>/dev/null | awk '{print $2}'` || exit 1; +silphonelist=`cat $lang/phones/silence.csl` || exit 1; +[ -z $num_leaves ] && echo "\$num_leaves is unset" && exit 1 +[ "$num_leaves" -eq "0" ] && echo "\$num_leaves is 0" && exit 1 + +nj_sup=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir... +nj_unsup=`cat $latdir/num_jobs` || exit 1; # number of jobs in decode dir +# in this dir we'll have just one job. +sdata_sup=$data_sup/split$nj_sup +sdata_unsup=$data_unsup/split$nj_unsup +utils/split_data.sh $data_sup $nj_sup +utils/split_data.sh $data_unsup $nj_unsup + +mkdir -p $dir/log +echo $nj_sup > $dir/num_jobs_sup +echo $nj_unsup > $dir/num_jobs_unsup + +cp $alidir/tree $dir + +awk '{print $1}' $data_sup/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset > $dir/valid_uttlist + +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +if [ -f $data_sup/utt2uniq ]; then + echo "File $data_sup/utt2uniq exists, so augmenting valid_uttlist to" + echo "include all perturbed versions of the same 'real' utterances." + mv $dir/valid_uttlist $dir/valid_uttlist.tmp + utils/utt2spk_to_spk2utt.pl $data_sup/utt2uniq > $dir/uniq2utt + cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data_sup/utt2uniq | \ + sort | uniq | utils/apply_map.pl $dir/uniq2utt | \ + awk '{for(n=1;n<=NF;n++) print $n;}' | sort > $dir/valid_uttlist + rm $dir/uniq2utt $dir/valid_uttlist.tmp +fi + +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +awk '{print $1}' $data_sup/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \ + head -$num_utts_subset > $dir/train_subset_uttlist + +[ -z "$transform_dir_sup" ] && transform_dir_sup=$alidir +[ -z "$transform_dir_unsup" ] && transform_dir_unsup=$latdir +norm_vars=`cat $alidir/norm_vars 2>/dev/null` || norm_vars=false # cmn/cmvn option, default false. +norm_vars_unsup=`cat $latdir/norm_vars 2>/dev/null` || norm_vars_unsup=false + +if [ "$norm_vars" != "$norm_vars_unsup" ]; then + echo "ERROR: Features mismatch for supervised and unsupervised data!" + echo "Variance normalization $norm_vars for supervised data vs $norm_vars_unsup for unsupervised data" + exit 1 +fi +cp $alidir/norm_vars $dir 2>/dev/null + +## Set up features. +if [ -z $feat_type ]; then + if [ -f $alidir/final.mat ] && [ ! -f $transform_dir_sup/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi +fi + +echo "$0: feature type is $feat_type" + +case $feat_type in + raw) feats_sup="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata_sup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_sup/JOB/utt2spk scp:$sdata_sup/JOB/cmvn.scp scp:- ark:- |" + feats_unsup="ark,s,cs:cat $sdata_unsup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_unsup/JOB/utt2spk scp:$sdata_unsup/JOB/cmvn.scp scp:- ark:- |" + valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" + train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" + ;; + lda) + splice_opts=`cat $alidir/splice_opts 2>/dev/null` + #splice_opts_unsup=`cat $latdir/../splice_opts 2>/dev/null` + #if [ "$splice_opts" -ne "$splice_opts_unsup" ]; then + # echo "ERROR: Features mismatch for supervised and unsupervised data!" + # echo "Splice options $splice_opts for supervised data vs $splice_opts_unsup for unsupervised data" + # exit 1 + #fi + cp $alidir/splice_opts $dir/splice_opts 2>/dev/null + + #if [ "`diff $alidir/final.mat $latdir/../final.mat &> /dev/null; echo $?`" -ne "0" ]; then + # echo "ERROR: Features mismatch for supervised and unsupervised data!" + # echo "LDA matrices $alidir/final.mat for supervised data and $latdir/../final.mat for unsupervised data don't match" + # exit 1 + #fi + + cp $alidir/final.mat $dir + feats_sup="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata_sup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_sup/JOB/utt2spk scp:$sdata_sup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + feats_unsup="ark,s,cs:cat $sdata_unsup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_unsup/JOB/utt2spk scp:$sdata_unsup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + ;; + *) echo "$0: invalid feature type $feat_type" && exit 1; +esac + +if [ -f $transform_dir_sup/trans.1 ] && [ $feat_type != "raw" ]; then + echo "$0: using transforms from $transform_dir_sup for supervised data" + feats_sup="$feats_sup transform-feats --utt2spk=ark:$sdata_sup/JOB/utt2spk ark:$transform_dir_sup/trans.JOB ark:- ark:- |" + valid_feats="$valid_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/trans.*|' ark:- ark:- |" + train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/trans.*|' ark:- ark:- |" +fi +if [ -f $transform_dir_sup/raw_trans.1 ] && [ $feat_type == "raw" ]; then + echo "$0: using raw-fMLLR transforms from $transform_dir_sup" + feats_sup="$feats_sup transform-feats --utt2spk=ark:$sdata_sup/JOB/utt2spk ark:$transform_dir_sup/raw_trans.JOB ark:- ark:- |" + valid_feats="$valid_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/raw_trans.*|' ark:- ark:- |" + train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/raw_trans.*|' ark:- ark:- |" +fi + +if [ -f $transform_dir_unsup/trans.1 ] && [ $feat_type != "raw" ]; then + echo "$0: using transforms from $transform_dir_unsup for supervised data" + feats_unsup="$feats_unsup transform-feats --utt2spk=ark:$sdata_unsup/JOB/utt2spk ark:$transform_dir_unsup/trans.JOB ark:- ark:- |" +fi +if [ -f $transform_dir_unsup/raw_trans.1 ] && [ $feat_type == "raw" ]; then + echo "$0: using raw-fMLLR transforms from $transform_dir_unsup" + feats_unsup="$feats_unsup transform-feats --utt2spk=ark:$sdata_unsup/JOB/utt2spk ark:$transform_dir_unsup/raw_trans.JOB ark:- ark:- |" +fi + +if [ $stage -le 0 ]; then + echo "$0: working out number of frames of training data" + num_sup_frames=`feat-to-len scp:$data_sup/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1; + num_unsup_frames=`feat-to-len scp:$data_unsup/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1; + num_frames=$(perl -e "print STDOUT ($num_sup_frames * $supervised_copies + $num_unsup_frames)") + echo $num_frames > $dir/num_frames +else + num_frames=`cat $dir/num_frames` || exit 1; +fi + +# Working out number of iterations per epoch. +iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1; +[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1 +samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)] +echo "$0: Every epoch, splitting the data up into $iters_per_epoch iterations," +echo "$0: giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)." + +# Making soft links to storage directories. +for x in `seq 1 $num_jobs_nnet`; do + for y in `seq 0 $[$iters_per_epoch-1]`; do + utils/create_data_link.pl $dir/egs/egs.$x.$y.ark + utils/create_data_link.pl $dir/egs/egs_tmp.$x.$y.ark + done + for y in `seq 1 $nj_sup`; do + utils/create_data_link.pl $dir/egs/egs_orig.$x.$y.ark + done +done + +nnet_context_opts="--left-context=$splice_width --right-context=$splice_width" +mkdir -p $dir/egs + +if [ ! -z $spk_vecs_dir_sup ]; then + [ ! -f $spk_vecs_dir_sup/vecs.1 ] && echo "No such file $spk_vecs_dir_sup/vecs.1" && exit 1; + spk_vecs_opt_sup=("--spk-vecs=ark:cat $spk_vecs_dir_sup/vecs.*|" "--utt2spk=ark:$data_sup/utt2spk") +else + spk_vecs_opt_sup=() +fi + +if [ ! -z $spk_vecs_dir_unsup ]; then + [ ! -f $spk_vecs_dir_unsup/vecs.1 ] && echo "No such file $spk_vecs_dir_unsup/vecs.1" && exit 1; + spk_vecs_opt_unsup=("--spk-vecs=ark:cat $spk_vecs_dir_unsup/vecs.*|" "--utt2spk=ark:$data_unsup/utt2spk") +else + spk_vecs_opt_unsup=() +fi + +if [ $stage -le 2 ]; then + echo "Getting validation and training subset examples." + rm $dir/.error 2>/dev/null + $cmd $dir/log/create_valid_subset.log \ + nnet-get-egs $nnet_context_opts "${spk_vecs_opt_sup[@]}" "$valid_feats" \ + "ark,cs:gunzip -c $alidir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ + "ark:$dir/egs/valid_all.egs" || touch $dir/.error & + $cmd $dir/log/create_train_subset.log \ + nnet-get-egs $nnet_context_opts "${spk_vecs_opt_sup[@]}" "$train_subset_feats" \ + "ark,cs:gunzip -c $alidir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ + "ark:$dir/egs/train_subset_all.egs" || touch $dir/.error & + wait; + [ -f $dir/.error ] && exit 1; + echo "Getting subsets of validation examples for diagnostics and combination." + $cmd $dir/log/create_valid_subset_combine.log \ + nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/egs/valid_all.egs \ + ark:$dir/egs/valid_combine.egs || touch $dir/.error & + $cmd $dir/log/create_valid_subset_diagnostic.log \ + nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/egs/valid_all.egs \ + ark:$dir/egs/valid_diagnostic.egs || touch $dir/.error & + + $cmd $dir/log/create_train_subset_combine.log \ + nnet-subset-egs --n=$num_train_frames_combine ark:$dir/egs/train_subset_all.egs \ + ark:$dir/egs/train_combine.egs || touch $dir/.error & + $cmd $dir/log/create_train_subset_diagnostic.log \ + nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/egs/train_subset_all.egs \ + ark:$dir/egs/train_diagnostic.egs || touch $dir/.error & + wait + cat $dir/egs/valid_combine.egs $dir/egs/train_combine.egs > $dir/egs/combine.egs + + for f in $dir/egs/{combine,train_diagnostic,valid_diagnostic}.egs; do + [ ! -s $f ] && echo "No examples in file $f" && exit 1; + done + rm $dir/egs/valid_all.egs $dir/egs/train_subset_all.egs $dir/egs/{train,valid}_combine.egs +fi + +if [ $stage -le 3 ]; then + mkdir -p $dir/temp + + # Other scripts might need to know the following info: + echo $num_jobs_nnet >$dir/egs/num_jobs_nnet + echo $iters_per_epoch >$dir/egs/iters_per_epoch + echo $samples_per_iter_real >$dir/egs/samples_per_iter + + echo "Creating training examples"; + # in $dir/egs, create $num_jobs_nnet separate files with training examples. + # The order is not randomized at this point. + + echo "Generating training examples on disk" + # The examples will go round-robin to egs_list. + + egs_list= + for n in `seq 1 $num_jobs_nnet`; do + egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark" + done + + $cmd $io_opts JOB=1:$nj_unsup $dir/log/get_weighted_egs.JOB.log \ + nnet-get-weighted-egs $nnet_context_opts "${spk_vecs_opt_unsup[@]}" \ + --weight-threshold=$weight_threshold --use-frame-weights=$use_frame_weights \ + --use-frame-selection=$use_frame_selection "$feats_unsup" \ + "ark,s,cs:gunzip -c $latdir/best_path_ali.JOB.gz | convert-ali $latdir/../final.mdl $alidir/final.mdl $dir/tree ark:- ark:- | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ + "ark,s,cs:gunzip -c $latdir/weights.JOB.gz |" ark:- \| \ + nnet-copy-egs ark:- $egs_list || exit 1; + + for (( i=0; i $dir/egs/egs_tmp.$n.0.ark || exit 1; + rm $dir/egs/egs_orig.$n.*.ark # don't "|| exit 1", due to NFS bugs... + done + else # We'll have to split it up using nnet-copy-egs. + egs_list= + for n in `seq 0 $[$iters_per_epoch-1]`; do + egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark" + done + # note, the "|| true" below is a workaround for NFS bugs + # we encountered running this script with Debian-7, NFS-v4. + $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \ + nnet-copy-egs --random=$random_copy --srand=JOB \ + "ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \ + '(' rm $dir/egs/egs_orig.JOB.*.ark '||' true ')' || exit 1; + fi +fi + +if [ $stage -le 5 ]; then + # Next, shuffle the order of the examples in each of those files. + # Each one should not be too large, so we can do this in memory. + echo "Shuffling the order of training examples" + echo "(in order to avoid stressing the disk, these won't all run at once)." + + + # note, the "|| true" below is a workaround for NFS bugs + # we encountered running this script with Debian-7, NFS-v4. + for n in `seq 0 $[$iters_per_epoch-1]`; do + $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \ + nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \ + ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \ + '(' rm $dir/egs/egs_tmp.JOB.$n.ark '||' true ')' || exit 1; + done +fi + +echo "$0: Finished preparing training examples" diff --git a/egs/babel/s5d/local/nnet3/run_blstm.sh b/egs/babel/s5d/local/nnet3/run_blstm.sh new file mode 100755 index 00000000000..fcf7fb8947d --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_blstm.sh @@ -0,0 +1,29 @@ + +stage=0 +train_stage=-10 +cell_dim=512 +rp_dim=128 +nrp_dim=128 +affix=bidirectional +multicondition=false +common_egs_dir= +num_epochs=8 + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +local/nnet3/run_lstm.sh --affix $affix \ + --stage $stage \ + --train-stage $train_stage \ + --num-epochs $num_epochs \ + --lstm-delay " [-1,1] [-2,2] [-3,3] " \ + --label-delay 0 \ + --cell-dim $cell_dim \ + --recurrent-projection-dim $rp_dim \ + --non-recurrent-projection-dim $nrp_dim \ + --common-egs-dir "$common_egs_dir" \ + --multicondition $multicondition \ + --chunk-left-context 40 \ + --chunk-right-context 40 diff --git a/egs/babel/s5d/local/nnet3/run_blstm_realigned.sh b/egs/babel/s5d/local/nnet3/run_blstm_realigned.sh new file mode 100755 index 00000000000..05c9a057512 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_blstm_realigned.sh @@ -0,0 +1,32 @@ +stage=0 +train_stage=-10 +cell_dim=512 +rp_dim=128 +nrp_dim=128 +affix=bidirectional +multicondition=false +common_egs_dir= +num_epochs=8 +align_model_dir=exp/nnet3/tdnn_sp +extra_align_opts= + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +local/nnet3/run_lstm_realigned.sh --affix $affix \ + --stage $stage \ + --train-stage $train_stage \ + --num-epochs $num_epochs \ + --lstm-delay " [-1,1] [-2,2] [-3,3] " \ + --label-delay 0 \ + --cell-dim $cell_dim \ + --recurrent-projection-dim $rp_dim \ + --non-recurrent-projection-dim $nrp_dim \ + --common-egs-dir "$common_egs_dir" \ + --multicondition $multicondition \ + --chunk-left-context 40 \ + --chunk-right-context 40 \ + --extra-align-opts "$extra_align_opts" \ + --align-model-dir "$align_model_dir" diff --git a/egs/babel/s5d/local/nnet3/run_ivector_common.sh b/egs/babel/s5d/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..7313230a7ee --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_ivector_common.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +. ./cmd.sh +set -e +stage=1 +generate_alignments=true # false if doing ctc training +speed_perturb=true + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +# perturbed data preparation +train_set=train +if [ "$speed_perturb" == "true" ]; then + if [ $stage -le 1 ]; then + #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment + # _sp stands for speed-perturbed + for datadir in train; do + utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1 + utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2 + utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2 + utils/validate_data_dir.sh --no-feats data/${datadir}_tmp + rm -r data/temp1 data/temp2 + + featdir=plp_perturbed + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp_pitch/${datadir}_tmp $featdir + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir + fi + + steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir || exit 1; + utils/fix_data_dir.sh data/${datadir}_tmp + + utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0 + utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0 + utils/fix_data_dir.sh data/${datadir}_sp + rm -r data/temp0 data/${datadir}_tmp + done + fi + + train_set=train_sp + if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then + #obtain the alignment of the perturbed data + steps/align_fmllr.sh \ + --nj 70 --cmd "$train_cmd" \ + --boost-silence $boost_sil \ + data/$train_set data/langp/tri5_ali exp/tri5 exp/tri5_ali_sp || exit 1 + touch exp/tri5_ali_sp/.done + fi +fi + +if [ $stage -le 3 ]; then + mfccdir=mfcc_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$mfccdir/storage $mfccdir/storage + fi + + # the 100k_nodup directory is copied seperately, as + # we want to use exp/tri2_ali_100k_nodup for lda_mllt training + # the main train directory might be speed_perturbed + for dataset in $train_set ; do + utils/copy_data_dir.sh data/$dataset data/${dataset}_hires + + # scale the waveforms, this is useful as we don't use CMVN + data_dir=data/${dataset}_hires + cat $data_dir/wav.scp | python -c " +import sys, os, subprocess, re, random +scale_low = 1.0/8 +scale_high = 2.0 +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) +"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1; + mv $data_dir/wav.scp_scaled $data_dir/wav.scp + + steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; + steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; + + # Remove the small number of utterances that couldn't be extracted for some + # reason (e.g. too short; no such file). + utils/fix_data_dir.sh data/${dataset}_hires; + done + +fi + +# ivector extractor training +if [ $stage -le 5 ]; then + # We need to build a small system just because we need the LDA+MLLT transform + # to train the diag-UBM on top of. We use --num-iters 13 because after we get + # the transform (12th iter is the last), any further training is pointless. + # this decision is based on fisher_english + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ + --splice-opts "--left-context=3 --right-context=3" \ + --boost-silence $boost_sil \ + $numLeavesMLLT $numGaussMLLT data/${train_set}_hires \ + data/langp/tri5_ali/ exp/tri5_ali_sp exp/nnet3/tri3b +fi + +if [ $stage -le 6 ]; then + # To train a diagonal UBM we don't need very much data, so use the smallest subset. + steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-threads 12 --num-frames 200000 \ + data/${train_set}_hires 512 exp/nnet3/tri3b exp/nnet3/diag_ubm +fi + +if [ $stage -le 7 ]; then + # iVector extractors can be sensitive to the amount of data, but this one has a + # fairly small dim (defaults to 100) so we don't use all of it, we use just the + # 100k subset (just under half the data). + steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ + data/${train_set}_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; +fi + +if [ $stage -le 8 ]; then + # We extract iVectors on all the train_nodup data, which will be what we + # train the system on. + + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ + data/${train_set}_max2_hires exp/nnet3/extractor exp/nnet3/ivectors_$train_set || exit 1; + +fi + +exit 0; diff --git a/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh b/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh new file mode 100755 index 00000000000..c3a6e1c0952 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +. ./cmd.sh +set -e +stage=1 +train_stage=-10 +generate_alignments=true # false if doing ctc training +speed_perturb=true +snrs="20:15:10" +num_data_reps=3 +ali_dir=exp/ +db_string="'air' 'rwcp' 'rvb2014'" # RIR dbs to be used in the experiment + # only dbs used for ASpIRE submission system have been used here +RIR_home=db/RIR_databases/ # parent directory of the RIR databases files +download_rirs=true # download the RIR databases from the urls or assume they are present in the RIR_home directory + + + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +# perturbed data preparation +train_set=train +if [ "$speed_perturb" == "true" ]; then + if [ $stage -le 1 ]; then + #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment + # _sp stands for speed-perturbed + for datadir in train; do + utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1 + utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2 + utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2 + utils/validate_data_dir.sh --no-feats data/${datadir}_tmp + rm -r data/temp1 data/temp2 + + featdir=plp_perturbed + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp_pitch/${datadir}_tmp $featdir + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir + fi + + steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir || exit 1; + utils/fix_data_dir.sh data/${datadir}_tmp + + utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0 + utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0 + utils/fix_data_dir.sh data/${datadir}_sp + rm -r data/temp0 data/${datadir}_tmp + done + fi + + train_set=train_sp + if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then + #obtain the alignment of the perturbed data + steps/align_fmllr.sh \ + --nj 70 --cmd "$train_cmd" \ + --boost-silence $boost_sil \ + data/$train_set data/langp/tri5_ali exp/tri5 exp/tri5_ali_sp || exit 1 + touch exp/tri5_ali_sp/.done + fi +fi + +if [ $stage -le 3 ]; then + mfccdir=mfcc_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{1,2,3,4}/$USER/kaldi-data/egs/kaldi-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$mfccdir/storage $mfccdir/storage + fi + + # the 100k_nodup directory is copied seperately, as + # we want to use exp/tri2_ali_100k_nodup for lda_mllt training + # the main train directory might be speed_perturbed + for dataset in $train_set ; do + utils/copy_data_dir.sh data/$dataset data/${dataset}_hires + + # scale the waveforms, this is useful as we don't use CMVN + data_dir=data/${dataset}_hires + cat $data_dir/wav.scp | python -c " +import sys, os, subprocess, re, random +scale_low = 1.0/8 +scale_high = 2.0 +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) +"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1; + mv $data_dir/wav.scp_scaled $data_dir/wav.scp + + steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; + steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; + + # Remove the small number of utterances that couldn't be extracted for some + # reason (e.g. too short; no such file). + utils/fix_data_dir.sh data/${dataset}_hires; + done + +fi + +# check if the required tools are present +$KALDI_ROOT/egs/aspire/s5/local/multi_condition/check_version.sh || exit 1; +mkdir -p exp/nnet3_multicondition +if [ $stage -le 4 ]; then + # prepare the impulse responses + local/multi_condition/prepare_impulses_noises.sh --log-dir exp/make_reverb/log \ + --db-string "$db_string" \ + --download-rirs $download_rirs \ + --RIR-home $RIR_home \ + data/impulses_noises || exit 1; +fi + +if [ $stage -le 5 ]; then + # corrupt the training data to generate multi-condition data + for data_dir in train_sp; do + num_reps=$num_data_reps + reverb_data_dirs= + for i in `seq 1 $num_reps`; do + cur_dest_dir=" data/temp_${data_dir}_${i}" + $KALDI_ROOT/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh --random-seed $i \ + --snrs "$snrs" --log-dir exp/make_corrupted_wav \ + data/${data_dir} data/impulses_noises $cur_dest_dir + reverb_data_dirs+=" $cur_dest_dir" + done + utils/combine_data.sh --extra-files utt2uniq data/${data_dir}_mc data/${data_dir} $reverb_data_dirs + rm -rf $reverb_data_dirs + done +fi + +if [ $stage -le 6 ]; then + # copy the alignments for the newly created utterance ids + ali_dirs= + for i in `seq 1 $num_data_reps`; do + local/multi_condition/copy_ali_dir.sh --utt-prefix "rev${i}_" exp/tri5_ali_sp exp/tri5_ali_sp_temp_$i || exit 1; + ali_dirs+=" exp/tri5_ali_sp_temp_$i" + done + local/multi_condition/copy_ali_dir.sh exp/tri5_ali_sp exp/tri5_ali_sp_copy || exit 1; + ali_dirs+=" exp/tri5_ali_sp_copy" + utils/combine_ali_dirs.sh --num-jobs 32 \ + data/train_sp_mc exp/tri5_ali_sp_mc $ali_dirs || exit 1; + rm -rf $ali_dirs +fi + +train_set=train_sp_mc +if [ $stage -le 7 ]; then + mfccdir=mfcc_reverb + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{1,2,3,4}/$USER/kaldi-data/egs/babel_reverb-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$mfccdir/storage $mfccdir/storage + fi + for data_dir in $train_set; do + utils/copy_data_dir.sh data/$data_dir data/${data_dir}_hires + steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${data_dir}_hires \ + exp/make_reverb_hires/${data_dir} $mfccdir || exit 1; + steps/compute_cmvn_stats.sh data/${data_dir}_hires exp/make_reverb_hires/${data_dir} $mfccdir || exit 1; + utils/fix_data_dir.sh data/${data_dir}_hires + utils/validate_data_dir.sh data/${data_dir}_hires + done +fi + +# ivector extractor training +if [ $stage -le 8 ]; then + # We need to build a small system just because we need the LDA+MLLT transform + # to train the diag-UBM on top of. We use --num-iters 13 because after we get + # the transform (12th iter is the last), any further training is pointless. + # this decision is based on fisher_english + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ + --splice-opts "--left-context=3 --right-context=3" \ + --boost-silence $boost_sil \ + $numLeavesMLLT $numGaussMLLT data/${train_set}_hires \ + data/langp/tri5_ali exp/tri5_ali_sp_mc exp/nnet3_multicondition/tri3b +fi + +if [ $stage -le 9 ]; then + # To train a diagonal UBM we don't need very much data, so use the smallest subset. + steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \ + data/${train_set}_hires 512 exp/nnet3_multicondition/tri3b exp/nnet3_multicondition/diag_ubm +fi + +if [ $stage -le 10 ]; then + # iVector extractors can be sensitive to the amount of data, but this one has a + # fairly small dim (defaults to 100) so we don't use all of it, we use just the + # 100k subset (just under half the data). + steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ + data/${train_set}_hires exp/nnet3_multicondition/diag_ubm exp/nnet3_multicondition/extractor || exit 1; +fi + +if [ $stage -le 11 ]; then + # We extract iVectors on all the train_nodup data, which will be what we + # train the system on. + + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ + data/${train_set}_max2_hires exp/nnet3_multicondition/extractor exp/nnet3_multicondition/ivectors_$train_set || exit 1; + +fi + +exit 0; diff --git a/egs/babel/s5d/local/nnet3/run_lstm.sh b/egs/babel/s5d/local/nnet3/run_lstm.sh new file mode 100755 index 00000000000..f7d06501569 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_lstm.sh @@ -0,0 +1,155 @@ +#!/bin/bash + +# Copyright 2015 Johns Hopkins University (Author: Daniel Povey). +# 2015 Vijayaditya Peddinti +# 2015 Xingyu Na +# 2015 Pegah Ghahrmani +# 2016 Xiaohui Zhang +# Apache 2.0. + + +# this is a basic lstm script +# LSTM script runs for more epochs than the TDNN script +# and each epoch takes twice the time + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call lstm/train.sh with --gpu false + +stage=0 +train_stage=-10 +has_fisher=true +affix= +speed_perturb=true +multicondition=true +common_egs_dir= +reporting_email= + +# LSTM options +splice_indexes="-2,-1,0,1,2 0 0" +lstm_delay=" -1 -2 -3 " +label_delay=5 +num_lstm_layers=3 +cell_dim=1024 +hidden_dim=1024 +recurrent_projection_dim=256 +non_recurrent_projection_dim=256 +chunk_width=20 +chunk_left_context=40 +chunk_right_context=0 + + +# training options +num_epochs=8 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=2 +num_jobs_final=6 +momentum=0.5 +num_chunk_per_minibatch=100 +samples_per_iter=20000 +remove_egs=true + +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh +. ./cmd.sh + +if ! cuda-compiled; then + cat < transcript2"; +$noise_word = shift @ARGV; + +while() { + $_ =~ m:^(\S+) (.+): || die "bad line $_"; + $utt = $1; + $trans = $2; + print "$utt"; + foreach $w (split (" ",$trans)) { + $w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. . + $w =~ s:\\::g; # Remove backslashes. We don't need the quoting. + $w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts. + $w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts. + if($w =~ m:^\[\<\w+\]$: || # E.g. [\]$: || # E.g. [door_slam>], this means a door slammed in the next word. Delete. + $w =~ m:\[\w+/\]$: || # E.g. [phone_ring/], which indicates the start of this phenomenon. + $w =~ m:\[\/\w+]$: || # E.g. [/phone_ring], which indicates the end of this phenomenon. + $w eq "~" || # This is used to indicate truncation of an utterance. Not a word. + $w eq ".") { # "." is used to indicate a pause. Silence is optional anyway so not much + # point including this in the transcript. + next; # we won't print this word. + } elsif($w =~ m:\[\w+\]:) { # Other noises, e.g. [loud_breath]. + print " $noise_word"; + } elsif($w =~ m:^\<([\w\']+)\>$:) { + # e.g. replace with and. (the <> means verbal deletion of a word).. but it's pronounced. + print " $1"; + } elsif($w eq "--DASH") { + print " -DASH"; # This is a common issue; the CMU dictionary has it as -DASH. +# } elsif($w =~ m:(.+)\-DASH$:) { # E.g. INCORPORATED-DASH... seems the DASH gets combined with previous word +# print " $1 -DASH"; + } else { + print " $w"; + } + } + print "\n"; +} diff --git a/egs/babel/s5d/local/optimize/OptimizeParams.pm b/egs/babel/s5d/local/optimize/OptimizeParams.pm new file mode 100644 index 00000000000..d9fb3647ddd --- /dev/null +++ b/egs/babel/s5d/local/optimize/OptimizeParams.pm @@ -0,0 +1,631 @@ +# Author: Jason Eisner, Univ. of Pennsylvania +# +# $Revision: 3.11 $ of $Date: 2006/04/12 08:53:23 $ + +# !!! should add root-finding methods with derivative (newton-raphson: +# use rtsafe, section 9.4) and in multiple dimensions (sections 9.5, 9.6). + +package OptimizeParams; +use strict; + +BEGIN { + use Exporter (); + use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); + $VERSION = do { my @r = (q$Revision: 3.11 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # must be all one line, for MakeMaker + + @ISA = qw(Exporter); + @EXPORT_OK = qw(&powell &easybrent &easydbrent &easyzbrent + &mnbrak &brent &dbrent &zbrent + $machine_epsilon $inf &basisvectors); +} + +# A sample program with simple examples on a one-dimensional function. +# +# #!/usr/local/bin/perl5 -w +# +# use OptimizeParams qw(&powell &easybrent &easydbrent &zbrent); +# use strict 'vars'; +# +# sub f { sin(($_[0]-12.34567)/8)**2-0.5 } # function +# sub df { sin(2*($_[0]-12.34567)/8)/8 } # derivative +# sub fdf { my($temp)=($_[0]-12.34567)/8; # (function, derivative) computed at one go +# (sin($temp)**2-0.5, sin(2*$temp)/8) } +# +# # Three ways to find (x,f(x)) at minimum of function, namely (12.34567,-0.5) +# print join(" ",easybrent(0,1,\&f)), "\n"; +# print join(" ",easydbrent(0,1,\&f,\&df)), "\n"; +# print join(" ",easydbrent(0,1,\&fdf)), "\n"; +# +# # A fourth way, using a multidimensional optimizer even though f happens +# # to be 1-dimensional. The vector [0] is our starting guess. +# my($xvec,$fx) = powell(\&f,[0]); +# print join(" ",@$xvec,$fx), "\n"; +# +# # Find zero of function, namely 6.06 +# my($x)=zbrent(\&f,0,13); print $x," ",&f($x),"\n"; + +# ---------------------------------------------------------------------- + +use vars @EXPORT_OK; +$inf=exp(1e307); # could just use the bareword inf, which seems to work but generates warnings with -w +$machine_epsilon = 1; $machine_epsilon /= 2 while 1 + $machine_epsilon/2 > 1; + +sub FMAX { # (maximum) + $_[0] > $_[1] ? $_[0] : $_[1]; +} + +sub SIGN { + $_[1] >= 0 ? abs($_[0]) : -abs($_[0]); +} + + +# Direction Set (Powell's) Methods in Multidimensions +# From Numerical Recipes in C, Section 10.5, p. 417ff. Ported to Perl. +# +# Minimization of a function of n variables [for which the gradient is +# not known]. Required arguments are (a reference to) the function +# and (a reference to) a length-n vector holding the coordinates of +# the starting point. Optional arguments are a fractional tolerance in +# the output value (used as a stopping criterion), a fractional +# tolerance in the input value (used as a stopping criterion on +# one-dimensional searches), and (a reference to) a list of n +# (references to) such vectors, holding an initial set of directions. +# Return values are a reference to a vector holding the coordinates at +# the minimum; the value of the function at that minimum; the number +# of iterations taken; and the final set of directions. +# +# This Perl version has a few different representational conventions. +# It's now the ROWS of $xi (not the columns) that hold the direction vectors. +# And the coordinates are 0-indexed, not 1-indexed. +# The $itol argument is new. + +sub powell { + my($funcref,$p,$ftol,$iftol,$xi) = @_; + my($n) = scalar @$p; # Number of dimensions. + my($ITMAX)=200; # Maximum allowed iterations. + + # Defaults for optional arguments + $ftol = $machine_epsilon unless defined $ftol; + $iftol = 2.0e-4 unless defined $iftol; # in the C version, this is TOL (defined at linmin) + $xi = &basisvectors($n) unless (defined $xi); + + my($fret) = &$funcref(@$p); + my(@pt) = @$p; # Save the initial point. + my($iter); + for($iter=1;;++$iter) { + my($fp) = $fret; + my($ibig) = 0; + my($del) = 0; # Will be the biggest function decrease. + my($i); + for ($i=0;$i<$n;$i++) { # In each iteration, loop over all directions in the set. + my($xit) = \@{$xi->[$i]}; # Copy the direction, + my($fptt) = $fret; + $fret = &linmin($p,$xit,$funcref,$iftol); # minimize along it, + if (abs($fptt-$fret) > $del) { # and record it if it is the largest decrease so far. + $del=abs($fptt-$fret); + $ibig=$i; + } + } + if (2*abs($fp-$fret) <= $ftol*(abs($fp)+abs($fret))) { # Termination criterion. + return($p,$fret,$iter,$xi); + } + die "$0: powell exceeding maximum of $ITMAX iterations" if ($iter==$ITMAX); + + { + my($xit); + my(@ptt); + my($j); + for ($j=0;$j<$n;$j++) { # Construct the extrapolated point and the average direction moved. Save the old starting point. + $ptt[$j] = 2*$p->[$j] - $pt[$j]; + $xit->[$j] = $p->[$j] - $pt[$j]; + $pt[$j] = $p->[$j]; + } + my($fptt) = &$funcref(@ptt); + if ($fptt < $fp) { + my($t) = 2 * ($fp-2*$fret+$fptt) * ($fp-$fret-$del)**2 - $del*($fp-$fptt)**2; + if ($t < 0) { + $fret = &linmin($p,$xit,$funcref); + $xi->[$ibig] = $xi->[$n-1]; + $xi->[$n-1] = $xit; + } + } + } + } # Back for another iteration + + die "$0: internal error in powell: should never have reached this line"; +} + +sub basisvectors { # returns the basis vectors in the given dimension (a reference to a list of references to lists) + my($n) = @_; + my($vects); + my($i,$j); + for ($i=0;$i<$n;$i++) { + for ($j=0;$j<$n;$j++) { + $vects->[$i][$j] = ($i==$j ? 1 : 0); + } + } + return $vects; +} + + + +{ + my($ncom); # "Global" variables for linmin to communicate with f1dim. + my(@pcom, @xicom, $nrfuncref); + + # Routine called by powell. + # From Numerical Recipes in C, Section 10.5, p. 419. Ported to Perl. + # + # Given an n-dimensional point $p and an n-dimensional direction + # vector $xi (both references to lists), moves and resets $p to + # where the function $funcref takes on a minimum along the direction + # $xi from $p, and replaces $xi by the actual vector displacement that + # $p was moved. Returns the value of $funcref at $p. This is actually + # all accomplished by calling the routines mnbrak and brent. + # $iftol is a tolerance on the input value, passed to brent. + + sub linmin { + my($p,$xi,$funcref,$iftol) = @_; + + print STDERR "$0: linmin: searching from (",join(", ",@$p),") in direction (",join(", ",@$xi),")\n"; + + $ncom = @$p; # Define the global variables. + $nrfuncref = $funcref; + @pcom = @$p; + @xicom = @$xi; + + my($ax) = 0; # Initial guess for brackets. + my($xx) = 1; + my($bx); + ($ax,$xx,$bx) = &mnbrak($ax,$xx,\&f1dim); + my($xmin,$fret) = &brent($ax,$xx,$bx,\&f1dim,$iftol); + my($j); + for ($j=0;$j<$ncom;$j++) { + $p->[$j] += ($xi->[$j] *= $xmin); + } + return $fret; + } + + # Function minimized by linmin. + + sub f1dim { + my($x) = @_; + my(@xt); + my($j); + for($j=0; $j<$ncom;$j++) { + $xt[$j] = $pcom[$j] + $x * $xicom[$j]; + } + return &$nrfuncref(@xt); + } +} + + + +# Easy way to call mnbrak and brent together in order to minimize +# a function. +# +# ax and bx are any distinct points; we'll look for a minimum in the +# downhill direction on the line through (ax,f(ax)) and (bx,f(bx)). +# +# Return value is the same as brent, namely (x,f(x)). But we might +# fail to find a minimum! If the function never increases again so +# far as we can tell -- it plateaus, or decreases toward infinity, or +# increases in a range that mnbrak doesn't sample -- then we'll return +# (+/-inf, minimum value we found). Here the +/- is according to +# which direction we searched in, and the minimum value is f(x) for +# the last finite x we considered; this value may or may not be +# finite, but should indicate the asymptotic behavior of the function. +# +# Just as in brent, the tolerance $tol can be omitted. + +sub easybrent { + my($ax,$bx,$funcref,$tol) = @_; + my($newa,$newb,$newc,$fa,$fb,$fc) = &mnbrak($ax,$bx,$funcref); + return ($newc,$fb) if ($newc==$inf || $newc==-$inf); + &brent($newa,$newb,$newc,$funcref,$tol); +} + +# Easy way to call mnbrak and dbrent together in order to minimize +# a function whose derivative is known. +# ax and bx are any distinct points; we'll look for a minimum in the +# downhill direction on the line through (ax,f(ax)) and (bx,f(bx)). +# +# See easybrent for return value convention when we fail. +# +# Just as in dbrent, the tolerance $tol can be omitted. So can +# $dfuncref, if $funcref returns a pair of values -- both the function +# and its derivative. + +sub easydbrent { + my($ax,$bx,$funcref,$dfuncref,$tol) = @_; + my($newa,$newb,$newc,$fa,$fb,$fc) = &mnbrak($ax,$bx,$funcref); + return ($newc,$fb) if ($newc==$inf || $newc==-$inf); + &dbrent($newa,$newb,$newc,$funcref,$dfuncref,$tol); + # If we want to check output against brent: + # my(@ans1)=&dbrent($newa,$newb,$newc,$funcref,$dfuncref); + # my(@ans2)=&brent($newa,$newb,$newc,$funcref); + # die "dbrent $ans1[0], brent $ans2[0]\n" unless &main::near($ans1[0]+1e6,$ans2[0]+1e6); + # @ans1; +} + +# Easy way to TRY to bracket a root and then call zbrent to find the +# root. The calling convention is similar to easybrent: we are given +# two starting points. If they have different signs, we just call +# zbrent. If they have the same sign and are both positive, we search +# in the downhill direction for a negative value (using mnbrak +# together with a modified golden-section minimizer (section 10.1) +# that stops as soon as it crosses zero). Similarly, if they have the +# same sign and are both positive, we search uphill for a positive +# value. + +sub easyzbrent { + my($ax,$bx,$funcref) = @_; + die "Not implemented yet; must call zbrent directly" +} + + +# Parabolic Interpolation and Brent's Method in one dimension +# From Numerical Recipes in C, Section 10.2, p. 404. Ported to Perl. +# +# Given a continuous function of one variable referenced by $funcref, +# and given a bracketing triplet of abcissas $ax, $bx, $cx as returned +# by mnbrak, this routine isolates the minimum to a fractional +# precision of about $tol using Brent's method. Returns (x, f(x)) at +# the minimum. $tol is set to a good default if omitted. +# +# See easybrent for an easier way to call this. + +sub brent { + my($ax, $bx, $cx, $funcref, $tol) = @_; + $tol = sqrt($machine_epsilon) unless defined $tol; + my($e) = 0.0; # This will be the distance moved on the step before last. + my($ITMAX) = 100; # The maximum allowed number of iterations. + my($CGOLD) = 0.3819660; # The golden ratio. [Actually, 1-golden ratio.] + my($ZEPS) = 1.0e-10; + + my($a) =($ax < $cx ? $ax : $cx); # a and b must be in ascending order, but input abscissas need not be. + my($b) =($ax > $cx ? $ax : $cx); + my($x,$w,$v); $x=$w=$v=$bx; # Initializations ... + die "brent: inputs out of order\n" unless $a < $x && $x < $b; # probably should also check f(x) < f(a),f(b) + my($fw,$fv,$fx); ($fw)=($fv)=($fx)=&$funcref($x); + my($d,$u,$fu); + + my($iter); + for ($iter=1; $iter<=$ITMAX; $iter++) { # Main program loop. + my($xm) = 0.5*($a+$b); + my($tol1)=$tol*abs($x)+$ZEPS; + my($tol2)=2.0*$tol1; + return ($x,$fx) if (abs($x-$xm) <= ($tol2-0.5*($b-$a))); # Test for done here. + if (abs($e) > $tol1) { # Construct a trial parabolic fit. + my($r) = ($x-$w)*($fx-$fv); + my($q) = ($x-$v)*($fx-$fw); + my($p) = ($x-$v)*$q - ($x-$w)*$r; + $q=2.0*($q-$r); + $p = -$p if $q > 0; + $q = abs($q); + my($etemp)=$e; + $e=$d; + if (abs($p) >= abs(0.5*$q*$etemp) || $p <= $q*($a-$x) || $p >= $q*($b-$x)) { + $d = $CGOLD*($e = ($x >= $xm ? $a-$x : $b-$x)); + } + # The above conditions determine the acceptability of the parabolic + # fit. Here we take the golden section step into the larger of the two + # segments. + else { + $d=$p/$q; # Take the parabolic step. + $u=$x+$d; + $d = &SIGN($tol1,$xm-$x) if ($u-$a < $tol2 || $b-$u < $tol2); + } + } else { + $d=$CGOLD*($e=($x >= $xm ? $a-$x : $b-$x)); + } + $u = (abs($d) >= $tol1 ? $x+$d : $x+&SIGN($tol1,$d)); + ($fu) = &$funcref($u); # This is the one function evaluation per iteration. + if ($fu <= $fx) { # Now decide what to do with our function evaluation. + ($u >= $x ? $a : $b) = $x; + ($v, $w, $x) = ($w, $x, $u); # Housekeeping follows: + ($fv, $fw, $fx) = ($fw, $fx, $fu); + } else { + ($u < $x ? $a : $b) = $u; + if ($fu <= $fw || $w == $x) { + $v=$w; + $w=$u; + $fv=$fw; + $fw=$fu; + } elsif ($fu <= $fv || $v == $x || $v == $w) { + $v = $u; + $fv = $fu; + } + } # Done with housekeeping. Back for another iteration. + } + die "$0: brent: Maximum number of iterations ($ITMAX) exceeded"; +} + +# One-Dimensional Search with First Derivatives +# From Numerical Recipes in C, Section 10.3, p. 405. Ported to Perl. +# +# Given a continuous function of one variable referenced by $funcref, +# and its derivative referenced by $dfuncref, and given a bracketing +# triplet of abcissas $ax, $bx, $cx as returned by mnbrak, this +# routine isolates the minimum to a fractional precision of about $tol +# using a modification of Brent's method that uses derivatives. +# Returns (x, f(x)) at the minimum. $tol is set to a good default if +# omitted. +# +# See easydbrent for an easier way to call this. + +sub dbrent { + my($ax, $bx, $cx, $funcref, $dfuncref, $tol) = @_; + $tol = sqrt($machine_epsilon) unless defined $tol; + + my($e) = 0.0; # This will be the distance moved on the step before last. + my($ITMAX) = 100; # The maximum allowed number of iterations. + my($ZEPS) = 1.0e-10; + + my($a) =($ax < $cx ? $ax : $cx); # a and b must be in ascending order, but input abscissas need not be. + my($b) =($ax > $cx ? $ax : $cx); + my($w,$v,$x,$u); $w=$v=$x=$bx; # Initializations ... + die "dbrent: inputs out of order\n" unless $a < $x && $x < $b; # probably should also check f(x) < f(a),f(b) + my($fx,$dx)=&$funcref($x); + $dx=&$dfuncref($x) unless defined $dx; # if $funcref only returned one value in previous line + my($fw,$fv,$fu); $fw=$fv=$fx; + my($dw,$dv,$du); $dw=$dv=$dx; # All our housekeeping chores are doubled by the necessity of moving derivative values around as well as function values. + my($d); + + my($iter); + for ($iter=1; $iter<=$ITMAX; $iter++) { # Main program loop. + my($xm) = 0.5*($a+$b); + my($tol1)=$tol*abs($x)+$ZEPS; + my($tol2)=2.0*$tol1; + # print "a $a b $b x $x xm $xm\n"; + return ($x,$fx) if (abs($x-$xm) <= ($tol2-0.5*($b-$a))); # Test for done here. + if (abs($e) > $tol1) { # Construct a trial parabolic fit. + my($d1)=2.0*($b-$a); # Initialize these d's to an out-of-bracket value + my($d2)=$d1; + $d1 = ($w-$x)*$dx/($dx-$dw) if ($dw != $dx); # Secant method with one point. + $d2 = ($v-$x)*$dx/($dx-$dv) if ($dv != $dx); # And the other. + # Which of these two estimates of d shall we take? + # We will insist that they be within the bracket, and on + # the side pointed to by the derivative at x: + my($u1)=$x+$d1; + my($u2)=$x+$d2; + my($ok1) = ($a-$u1)*($u1-$b) > 0 && $dx*$d1 <= 0; + my($ok2) = ($a-$u2)*($u2-$b) > 0 && $dx*$d2 <= 0; + my($olde) = $e; # Movement on the step before last. + $e = $d; + if ($ok1 || $ok2) { # Take only an acceptable d, and if both are acceptable, then take the smallest one. + if ($ok1 && $ok2) { + $d=(abs($d1) < abs($d2) ? $d1 : $d2); + } elsif ($ok1) { + $d=$d1; + } else { + $d=$d2; + } + if (abs($d) <= abs(0.5*$olde)) { + $u=$x+$d; + $d=&SIGN($tol1,$xm-$x) if ($u-$a < $tol2 || $b-$u < $tol2); + } else { # Bisect, not golden section. + $d=0.5*($e=($dx >= 0 ? $a-$x : $b-$x)); # Decide which segment by the sign of the derivative. + } + } else { + $d=0.5*($e=($dx >= 0 ? $a-$x : $b-$x)); + } + } else { + $d=0.5*($e=($dx >= 0 ? $a-$x : $b-$x)); + } + if (abs($d) >= $tol1) { + $u=$x+$d; + ($fu,$du)=&$funcref($u); + } else { + $u=$x+&SIGN($tol1,$d); + ($fu,$du)=&$funcref($u); + return ($x,$fx) if ($fu > $fx); # If the minimum step in the downhill direction takes us uphill, then we are done. + } + # Now all the housekeeping, sigh. + $du=&$dfuncref($u) unless defined $du; # if $funcref only returned one value just above + if ($fu <= $fx) { + ($u >= $x ? $a : $b) = $x; + ($v,$fv,$dv)=($w,$fw,$dw); + ($w,$fw,$dw)=($x,$fx,$dx); + ($x,$fx,$dx)=($u,$fu,$du); + } else { + ($u < $x ? $a : $b) = $u; + if ($fu <= $fw || $w==$x) { + ($v,$fv,$dv)=($w,$fw,$dw); + ($w,$fw,$dw)=($u,$fu,$du); + } elsif ($fu < $fv || $v == $x || $v == $w) { + ($v,$fv,$dv)=($u,$fu,$du); + } + } + } + die "$0: dbrent: Maximum number of iterations ($ITMAX) exceeded\n"; + # Alternative: + # warn "$0: dbrent: Maximum number of iterations ($ITMAX) exceeded. Trying brent ...\n"; + # &brent($ax,$bx,$cx,$funcref,$tol); +} + + +# Routine for Initially Bracketing a Minimum. +# From Numerical Recipes in C, Section 10.1, p. 400. Ported to Perl. +# +# Given a continuous function referenced by $funcref, and distinct +# initial points $ax and $bx, this routine searches in the downhill +# direction (defined by the function as evaluated at the initial +# points) and returns new points $ax, $bx, $cx that bracket a minimum +# of the function [in the sense that b is between a and c, and f(b) is +# less than both f(a) and f(c)]. Also returned are the function values +# at the three points, $fa, $fb, and $fc. +# +# JME: If $cx is +inf (resp. -inf), this means that we searched in the +# positive (resp. negative) direction and the function just decreased +# forever (either to a plateau or without bound - look at $fb to see +# the last finite value). At least, it decreased at all the points +# where we sampled it - we might have skipped right over a spike. So +# either there is no minimum in the direction we searched, or we +# missed it; in either case our return values won't bracket any minimum +# and the caller should either give up or try something else! +# +# JME: Note that it's also possible that $cx remains finite, but that +# the minimum $fb that we bracket is -$inf (and typically $fc will be +# -$inf too). +# +# JME: f(b) is now required to be STRICTLY less than f(a) and f(c). +# This avoids counting an "extended" point of inflection as a minimum. +# I imagine the minimization routines would nonetheless be willing to +# find such if it's in the interval (should check...), but requiring +# us to search past it here is important for the previous paragraph: +# if the function value is eventually -inf forever due to overflow, we +# still keep searching forever until the abcissa is also +/- inf, +# rather than saying we've hit a plateau and that's enough to stop. +# +# It's ok if &$funcref returns multiple values; we'll evaluate it in +# list context and use only the first value. This is useful because +# of the calling convention for dbrent; e.g., easydbrent relies on it. + +sub mnbrak { + my($ax, $bx, $funcref) = @_; + my($GOLD) = 1.618034; + my($GLIMIT) = 100.0; + my($TINY) = 1.0e-20; + + die "mnbrak: $ax and $bx must be different\n" if $ax==$bx; # JME: added + my($fa) = &$funcref($ax); + my($fb) = &$funcref($bx); + if ($fb > $fa) { + # Switch roles of a and b so that we can go downhill in the direction + # from a to b. + ($ax, $bx) = ($bx, $ax); + ($fa, $fb) = ($fb, $fa); + } + + my($cx) = $bx + $GOLD*($bx-$ax); # First guess for c. + my($fc) = &$funcref($cx); + + # Keep looping here until we bracket. + while ($fb >= $fc && $cx != $inf && $cx != -$inf) { # JME: added the inf tests, and changed >= to > to make sure we keep searching all the way to inf if necessary in order to get $ax $bx $cx strictly in order + # print("ax $ax bx $bx cx $cx // fa $fa fb $fb fc $fc\n"), + + # Compute u by parabolic extrapolation from a, b, c. + # $TINY is used to prevent any possible division by zero. + my($r) = ($bx-$ax)*($fb-$fc); + my($q) = ($bx-$cx)*($fb-$fa); + my($u) = $bx -(($bx-$cx)*$q - ($bx-$ax)*$r)/(2.0*&SIGN(&FMAX(abs($q-$r),$TINY),$q-$r)); + my($ulim) = $bx + $GLIMIT*($cx-$bx); + my($fu); + # We won't go farther than this. Test various possibilities: + if (($bx - $u)*($u - $cx) > 0) { # Parabolic u is (strictly) between b and c: try it. + ($fu) = &$funcref($u); + if ($fu < $fc) { # Got a minimum between b and c. + ($ax,$bx) = ($bx,$u); + ($fa,$fb) = ($fb,$fu); + return($ax, $bx, $cx, $fa, $fb, $fc) if ($ax-$bx)*($bx-$cx)>0 && $fb < $fa && $fb < $fc; + die "mnbrak: oops, trying to return $ax $bx $cx out of order, or else middle value of $fa $fb $fc is not smallest\n"; + } elsif ($fu > $fb) { # Got a minimum between a and u. + $cx = $u; + $fc = $fu; + return($ax, $bx, $cx, $fa, $fb, $fc) if ($ax-$bx)*($bx-$cx)>0 && $fb < $fa && $fb < $fc; + die "mnbrak: oops, trying to return $ax $bx $cx out of order, or else middle value of $fa $fb $fc is not smallest\n"; + } + $u = $cx + $GOLD*($cx-$bx); # Parabolic fit was no use. Use default magnification. + ($fu) = &$funcref($u); + } elsif (($cx-$u)*($u-$ulim) > 0) { # Parabolic fit is between c and its allowed limit + ($fu) = &$funcref($u); + if ($fu < $fc) { + ($bx, $cx, $u) = ($cx, $u, $u+$GOLD*($u-$cx)); # JME: formerly $cx+$GOLD*($cx-$bx), but that seems to have been a bug since the new u might not be beyond the new cx. + ($fb, $fc, $fu) = ($fc, $fu, &$funcref($u)); + } + } elsif (($u-$ulim)*($ulim-$cx) > 0) { # Limit parabolic u to maximum allowed value. JME: Changed >= to > so that we are guaranteed $u > $cx strictly. See comment at top of loop. + $u=$ulim; + ($fu) = &$funcref($u); + } else { # Reject parabolic u, use default magnification. + $u=$cx+$GOLD*($cx-$bx); + ($fu)=&$funcref($u); + } + ($ax,$bx,$cx) = ($bx,$cx,$u); # Eliminate oldest point and continue. + ($fa,$fb,$fc) = ($fb,$fc,$fu); + } + return($ax, $bx, $cx, $fa, $fb, $fc) if ($ax-$bx)*($bx-$cx)>0 && $fb <= $fa && ($fb <= $fc || $cx==$inf || $cx==-$inf); + die "mnbrak: oops, trying to return $ax $bx $cx out of order, or else middle value of $fa $fb $fc is not smallest but we didn't run into infinity with cx=$fc\n"; +} + + +# Using the Van Wijngaarden-Dekker-Brent method, find the root of a +# function f (referenced by $funcref) between x1 and x2, where f(x1) +# and f(x2) must have different signs. The root will be refined until +# its accuracy is $tol (which defaults to the machine epsilon if +# omitted). +# +# See easyzbrent for a sometimes easier way to call this. + +sub zbrent { + my($funcref, $x1, $x2, $tol) = @_; + $tol = $machine_epsilon unless defined $tol; + + my($ITMAX) = 100; # The maximum allowed number of iterations. + my($EPS) = $machine_epsilon; # Machine floating-point precision. (Defined as 3.0e-8 in C version.) + + my($a,$b,$c)=($x1,$x2,$x2); + my($d,$e,$min1,$min2); + my($fa,$fb) = (&$funcref($a), &$funcref($b)); + my($p,$q,$r,$s,$tol1,$xm); + + die "zbrent: root must be bracketed between x1=$x1 and x2=$x2, but f(x1)=$fa, f(x2)=$fb" if $fb*$fa > 0; + + my($fc)=$fb; + my($iter); + for ($iter=1;$iter<=$ITMAX;$iter++) { + if ($fb*$fc > 0) { + $c=$a; # Rename a, b, c and adjust bounding interval d. + $fc=$fa; + $e=$d=$b-$a; + } + if (abs($fc) < abs($fb)) { + $a=$b; + $b=$c; + $c=$a; + $fa=$fb; + $fb=$fc; + $fc=$fa; + } + $tol1=2*$EPS*abs($b)+0.5*$tol; # Convergence check. + $xm=0.5*($c-$b); + return $b if (abs($xm) <= $tol1 || $fb == 0); + if (abs($e) >= $tol1 && abs($fa) > abs($fb)) { + $s=$fb/$fa; # Attempt inverse quadratic interpolation. + if ($a == $c) { + $p=2*$xm*$s; + $q=1-$s; + } else { + $q=$fa/$fc; + $r=$fb/$fc; + $p=$s*(2*$xm*$q*($q-$r)-($b-$a)*($r-1)); + $q=($q-1)*($r-1)*($s-1); + } + $q = -$q if ($p > 0); # Check whether in bounds. + $p=abs($p); + $min1=3*$xm*$q-abs($tol1*$q); + $min2=abs($e*$q); + if (2*$p < ($min1 < $min2 ? $min1 : $min2)) { + $e=$d; # Accept interpolation. + $d=$p/$q; + } else { + $d=$xm; # Interpolation failed, use bisection. + $e=$d; + } + } else { # Bounds decreasing too slowly, use bisection. + $d=$xm; + $e=$d; + } + $a=$b; # Move last best guess to $a. + $fa=$fb; + if (abs($d) > $tol1) { # Evaluate new trial root. + $b += $d; + } else { + $b += ($xm > 0 ? abs($tol1) : -abs($tol1)); + } + $fb=&$funcref($b); + } + die "$0: zbrent: Maximum number of iterations ($ITMAX) exceeded"; +} + +1; diff --git a/egs/babel/s5d/local/optimize2.pl b/egs/babel/s5d/local/optimize2.pl new file mode 100755 index 00000000000..ead70129ec8 --- /dev/null +++ b/egs/babel/s5d/local/optimize2.pl @@ -0,0 +1,152 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +use FindBin; +use lib "$FindBin::RealBin/optimize/"; +use OptimizeParams qw(&powell &easybrent &easydbrent &zbrent); +use Data::Dumper; +use Scalar::Util qw(looks_like_number); + +use 5.010; + +my @cmd_array = (); +my %opts = (); +my $output_dir = ""; +my $result_regexp = "(.*)"; +my $cmd; +my $ftol = 3e-2; +my $iftol = 1e-1; + +while (@ARGV) { + my $parm = shift @ARGV; + if ($parm eq "--var") { + my $var = shift; + die "$0: The variable $var does not contain starting value" unless $var =~ /.*=.*/; + my @F = split "=", $var; + die "$0: The variable $var has more than one assignments" unless @F == 2; + die "$0: Multiple varable $F[0] definition" if defined $opts{$F[0]}; + $opts{$F[0]} = $F[1]; + } elsif ($parm eq "--output-dir") { + $output_dir = shift; + } elsif ($parm eq "--ftol") { + $ftol = shift; + die "$0: ftol parameter has to be a floating-point number" unless looks_like_number($ftol); + } elsif ($parm eq "--iftol") { + $iftol = shift; + die "$0: iftol parameter has to be a floating-point number" unless looks_like_number($ftol); + } elsif ($parm eq "--result-regexp") { + $result_regexp = shift; + } else { + push @cmd_array, $parm; + while (@ARGV) { + push @cmd_array, shift @ARGV; + } + } + +} + + +sub substitute { + my $cmd_proto = $_[0]; + my %valhash = %{$_[1]}; + + + my $cmd_out = $cmd_proto; + + foreach my $key (keys %valhash) { + #print $elem . "($key, " . $valhash{$key}. ")->"; + my $prev_cmd_out = $cmd_out; + $cmd_out =~ s/\b$key\b/$valhash{$key}/g; + die "$0: The variable $key is not used in the command." if $prev_cmd_out eq $cmd_out; + #print $elem . "\n"; + } + + return $cmd_out; +} + +sub f { + state $iter = 0; + my @params = @_; + my $i = 0; + + my %curr_opts; + foreach my $v (sort keys %opts) { + $curr_opts{$v} = abs($params[$i]); + $i += 1; + } + + my $result; + my $k = join(" ", substitute( $cmd, \%curr_opts)); + print "$0: Debug: $k\n"; + open(my $fh, '-|', "(set -e -o pipefail; $k) 2>&1") or die $!; + while (my $line=<$fh>) { + print $line; + chomp $line; + if ($line =~ /$result_regexp/) { + print "$0: Line $line matches the regexp \"$result_regexp\"\n"; + $result = $line; + $result =~ s/$result_regexp/$1/g; + } + } + close($fh) or die "$0: The command didn't finish successfully: $!\n"; + + my $exit = $? >> 8; + if ( $exit != 0) { + die "$0: The command return status indicates failure: $exit\n"; + } + + if (not defined $result) { + die "$0: Matching the regexp on the command output regexp didn't yield any results"; + } + print "$0: Iteration $iter: " . join(" ", "[", @params, "] =>", $result) . "\n"; + + $iter += 1; + return -1.0 * $result+0.0; +} + + +print "$0: Optimizing with " . join(" ", %opts) . "\n"; +#print Dumper(\@cmd_array); + +$cmd = join(" ", @cmd_array); + +die "$0: Empty command \"$cmd\"" unless $cmd; +die "$0: Empty command \"$cmd\"" if $cmd =~ /^\s*$/; + +my @params; +foreach my $key (sort keys %opts) { + push @params, $opts{$key}; +} + +#my($xvec,$fx) = (\@params, 1); +my($xvec,$fx) = powell(\&f,\@params, $ftol, $iftol); +print "$0: Optimization finished with: " . join(" ",@$xvec, -$fx), "\n"; + + +@params=@{$xvec}; +foreach my $v (sort keys %opts) { + $opts{$v} = abs(shift @params); +} +$cmd=substitute($cmd, \%opts); + +{ + open(my $param_file, "> $output_dir/params") || die "Cannot open file $output_dir/params: $!"; + print $param_file "$_=$opts{$_}\n" for (sort keys %opts); + print $param_file "criterion=", -$fx; + close($param_file); +} + +{ + open(my $param_file, "> $output_dir/command.sh"); + print $param_file "$cmd\n"; + close($param_file); +} + +{ + open(my $param_file, "> $output_dir/params.sh"); + print $param_file "declare -A params;\n"; + print $param_file "params[$_]=$opts{$_}\n" for (sort keys %opts); + close($param_file); +} + diff --git a/egs/babel/s5d/local/prepare_acoustic_training_data.pl b/egs/babel/s5d/local/prepare_acoustic_training_data.pl new file mode 100755 index 00000000000..bc7c2812831 --- /dev/null +++ b/egs/babel/s5d/local/prepare_acoustic_training_data.pl @@ -0,0 +1,484 @@ +#!/usr/bin/env perl +use Getopt::Long; + +######################################################################## +# +# Script to prepare the Babel acoustic training data for Kaldi. +# +# - Place transcripts in a file named "text" +# Each line contains: utteranceID word1 word2 ... +# +# - Place the utterance-to-speaker map in a file named "utt2spk" +# Each line contains: utteranceID speakerID +# speakerID MUST BE be a prefix of the utteranceID +# Kaldi code does not require it, but some training scripts do. +# +# - Place the utterance-to-segment map in a file named "segments" +# Each line contains: utteranceID recordingID startTime endTime +# +# - Place the recordingID-to-waveformFile map in "wav.scp" +# Each line contains: recordingIB Input_pipe_for_reading_waveform| +# +# - Place the speaker-utterance map in a file named "spk2utt" +# Each line contains: speakerID utteranceID_1 utteranceID_2 ... +# This is the inverse of the utt2spk mapping +# +# Note 1: the utteranceIDs in the first 3 files must match exactly, and +# the recordingIDSs in the last 2 files must match exactly. +# +# Note 2: Babel data formats and file-naming conventions are assumed. +# +# - The transcriptions and waveforms are in subdirectories named +# audio/.sph +# transcription/.txt +# There is 1 pair of files per recording, with extensions as above +# +# - The audio is in NIST sphere format, so shp2pipe may be used, e.g. +# BABEL_BP_101_11694_20111204_205320_inLine \ +# /export/babel/sanjeev/kaldi-trunk/tools/sph2pipe_v2.5/sph2pipe \ +# -f wav -p -c 1 \ +# BABEL_BP_101_11694_20111204_205320_inLine.sph| +# +# - The filename contains speaker information, e.g. +# BABEL_BP_101_37210_20111102_170037_O1_scripted.sph -> 37210_A +# BABEL_BP_101_37210_20111102_172955_inLine.sph -> 37210_A +# BABEL_BP_101_37210_20111102_172955_outLine.sph -> 37210_B +# Specifically, the inLine speaker is the same as scripted +# +# - The transcription file has time marks in square brackets, e.g. +# [0.0] +# +# [7.05] +# 啊 听 听唔听到 啊 你 而家 仲未 上课 系 嘛 +# [14.07] +# +# - If a vocabulary is provided, map all OOV tokens to an OOV symbol, +# and write out an OOV list with counts to a file named "oovCounts" +# +# If one or more word-fragment markers are provided, this script +# checks if an OOV token can be made in-vocabulary by stripping off +# the markers one by one from either end of the token. +# +# The default settings are +# + $vocabFile = ""; # No vocab file; nothing is mapped to OOV + $OOV_symbol = ""; # Default OOV symbol + $fragMarkers = ""; # No characters are word-fragment markers +# +# - Babel transcriptions contain 4 kinds of untranscribed words +# +# (()) designates unintelligible words +# designates a word in another language +# designates a sequence of pre-recorded words +# designates two simultaneous foreground speakers +# +# This script maps them to OOV. They are not included in oovCounts +# +# - Babel transcriptions also contain a few non-linguistics tokens +# +# map to a vocal noise symbol +# map to a vocal noise symbol +# map to a vocal noise symbol +# map to a vocal noise symbol +# +# map to a nonvocal noise symbol +# map to a nonvocal noise symbol +# map to a nonvocal noise symbol +# map to a nonvocal noise symbol +# +# designates silence > 1 sec. +# + $vocalNoise = ""; + $nVoclNoise = ""; + $silence = ""; + $icu_transform=""; + $get_whole_transcripts = "false"; +# +######################################################################## + +print STDERR "$0 " . join(" ", @ARGV) . "\n"; +GetOptions("fragmentMarkers=s" => \$fragMarkers, + "oov=s" => \$OOV_symbol, + "vocab=s" => \$vocabFile, + "icu-transform=s" => \$icu_transform, + "get-whole-transcripts=s" => \$get_whole_transcripts + ); + +if ($#ARGV == 1) { + $inDir = $ARGV[0]; + $outDir = $ARGV[1]; + print STDERR ("$0: $inDir $outDir\n"); + if($vocabFile) { + print STDERR ("\tLimiting transcriptions to words in $vocabFile\n"); + print STDERR ("\tMapping OOV tokens to \"$OOV_symbol\"\n"); + print STDERR ("\tif they remain OOV even after removing [$fragMarkers] from either end\n") if ($fragMarkers); + } + print STDERR ("$0 ADVICE: Use full path for the Input Directory\n") unless ($inDir=~m:^/:); +} else { + print STDERR ("Usage: $0 [--options] InputDir OutputDir\n"); + print STDERR ("\t--vocab File containing the permitted vocabulary\n"); + print STDERR ("\t--oov Use this symbol for OOV words (default )\n"); + print STDERR ("\t--fragmentMarkers Remove these from ends of words to minimize OOVs (default none)\n"); + print STDERR ("\t--get-whole-transcripts (true|false) Do not remove utterances containing no speech\n"); + exit(1); +} + +######################################################################## +# Read and save the vocabulary and map anything not in the vocab +######################################################################## + +if ($vocabFile) { + open (VOCAB, $vocabFile) + || die "Unable to open vocabulary file $vocabFile"; + $numWords = 0; + while () { + next unless (m:^([^\s]+):); + $numWords++ unless (exists $inVocab{$1}); # Don't count word repetitions + $inVocab{$1} = 1; # commonly found in lexicons + } + close(VOCAB); + print STDERR ("Read $numWords unique words from $vocabFile\n"); +} + +######################################################################## +# First read segmentation information from all the transcription files +######################################################################## + +$TranscriptionDir = "$inDir/transcription"; +if (-d $TranscriptionDir) { + @TranscriptionFiles = `ls ${TranscriptionDir}/*.txt`; + if ($#TranscriptionFiles >= 0) { + printf STDERR ("$0: Found %d .txt files in $TranscriptionDir\n", ($#TranscriptionFiles +1)); + $numFiles = $numUtterances = $numWords = $numOOV = $numSilence = 0; + while ($filename = shift @TranscriptionFiles) { + $fileID = $filename; # To capture the base file name + $fileID =~ s:.+/::; # remove path prefix + $fileID =~ s:\.txt\s*$::; # remove file extension + # For each transcription file, extract and save segmentation data + $numUtterancesThisFile = 0; + $prevTimeMark = -1.0; + $text = ""; + if ( $icu_transform ) { + $inputspec="uconv -f utf8 -t utf8 -x \"$icu_transform\" $filename |"; + } else { + $inputspec=$filename; + } + open (TRANSCRIPT, $inputspec) || die "Unable to open $filename"; + while ($line=) { + chomp $line; + if ($line =~ m:^\s*\[([0-9]+\.*[0-9]*)\]\s*$:) { + $thisTimeMark = $1; + if ($thisTimeMark < $prevTimeMark) { + print STDERR ("$0 ERROR: Found segment with negative duration in $filename\n"); + print STDERR ("\tStart time = $prevTimeMark, End time = $thisTimeMark\n"); + print STDERR ("\tThis could be a sign of something seriously wrong!\n"); + print STDERR ("\tFix the file by hand or remove it from the directory, and retry.\n"); + exit(1); + } + if ($prevTimeMark<0) { + # Record the first timemark and continue + $prevTimeMark = $thisTimeMark; + next; + } + ################################################## + # Create an utteranceID using fileID & start time + # - Assume Babel file naming conventions + # - Remove prefix: program_phase_language + # - inLine = scripted = spkr A, outLine = B + # - Move A/B so that utteranceIDs sort by spkr + # - Assume utterance start time < 10000 sec. + ################################################## + $utteranceID = $fileID; + $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; + $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; + $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; + $utteranceID .= sprintf ("_%06i", (100*$prevTimeMark)); + ################################################## + # Then save segmentation, transcription, spkeaerID + ################################################## + if (exists $transcription{$utteranceID}) { + # utteranceIDs should be unique, but this one is not! + # Either time marks in the transcription file are bad, + # or something went wrong in generating the utteranceID + print STDERR ("$0 WARNING: Skipping duplicate utterance $utteranceID\n"); + } + elsif ($text eq "") { + # Could be due to text filtering done below + # Output information to STDOUT to enable > /dev/null + print STDOUT ("$0: Skipping empty transcription $utteranceID\n"); + } else { + $transcription{$utteranceID} = $text; + $startTime{$utteranceID} = $prevTimeMark; + $endTime{$utteranceID} = $thisTimeMark; + if ($utteranceID =~ m:([^_]+_[AB]).*:) { + $speakerID{$utteranceID} = $1; + } else { + # default: one speaker per audio file + $speakerID{$utteranceID} = $fileID; + } + $baseFileID{$utteranceID} = $fileID; + $numUtterancesThisFile++; + $numUtterances++; + $text = ""; + } + $prevTimeMark = $thisTimeMark; + } else { + @tokens = split(/\s+/, $line); + $text = ""; + while ($w = shift(@tokens)) { + # First, some Babel-specific transcription filtering + if (($w eq "")||($w eq "")||($w eq "")||($w eq "~")) { + next; + } elsif (($w eq "")||($w eq "")||($w eq "")||($w eq "")) { + $text .= " $vocalNoise"; + $numWords++; + } elsif (($w eq "")||($w eq "")||($w eq "")||($w eq "")){ + $text .= " $nVoclNoise"; + $numWords++; + } elsif (($w eq "(())")||($w eq "")||($w eq "")||($w eq "")) { + $text .= " $OOV_symbol"; + $oovCount{$w}++; + $numOOV++; + $numWords++; + } elsif ($w eq "") { + $text .= " $silence"; + $numSilence++; + } else { + # This is a just regular spoken word + if ($vocabFile && (! $inVocab{$w}) && $fragMarkers) { + print "Not in vocab: $w\n"; + # $w is a potential OOV token + # Remove fragMarkers to see if $w becomes in-vocabulary + while ($w =~ m:^(\S+[$fragMarkers]|[$fragMarkers]\S+)$:) { + if ($w =~ m:^(\S+)[$fragMarkers]$:) { + $w = $1; + last if ($inVocab{$w}); + } elsif ($w =~m:^[$fragMarkers](\S+)$:) { + $w = $1; + last if ($inVocab{$w}); + } else { + die "Logically, the program should never reach here!"; + } + } + } + # If still an OOV, replace $w by $OOV_symbol + if ($vocabFile && (! $inVocab{$w})) { + # $w is definitely an OOV token + if (exists $oovCount{$w}) { + $oovCount{$w}++; + } else { + $oovCount{$w} = 1; + } + $w = $OOV_symbol; + $numOOV++; + } + $text .= " $w"; + $numWords++; + } + } + $text =~ s:^\s+::; # Remove leading white space, if any + # Transcriptions must contain real words to be useful in training + if ($get_whole_transcripts ne "true") { + $text =~ s:^(($OOV_symbol|$vocalNoise|$nVoclNoise|$silence)[ ]{0,1})+$::; + } + } + } + close(TRANSCRIPTION); + if ($numUtterancesThisFile>0) { + $lastTimeMarkInFile{$fileID} = $prevTimeMark; + $numUtterancesInFile{$fileID} = $numUtterancesThisFile; + $numUtterancesThisFile = 0; + } + $numFiles++; + } + print STDERR ("$0: Recorded $numUtterances non-empty utterances from $numFiles files\n"); + } else { + print STDERR ("$0 ERROR: No .txt files found $TranscriptionDir\n"); + exit(1); + } +} else { + print STDERR ("$0 ERROR: No directory named $TranscriptionDir\n"); + exit(1); +} + +######################################################################## +# Then verify existence of corresponding audio files and their durations +######################################################################## + +$AudioDir = "$inDir/audio"; +if (-d $AudioDir) { + @AudioFiles = `ls ${AudioDir}/*.sph`; + if ($#AudioFiles >= 0) { + printf STDERR ("$0: Found %d .sph files in $AudioDir\n", ($#AudioFiles +1)); + $numFiles = 0; + while ($filename = shift @AudioFiles) { + $fileID = $filename; + $fileID =~ s:.+/::; # remove path prefix + $fileID =~ s:\.sph\s*::; # remove file extension + if (exists $numUtterancesInFile{$fileID}) { + # Some portion of this file has training transcriptions + @Info = `head $filename`; + $SampleCount = -1; + $SampleRate = 8000; #default + while ($#Info>=0) { + $line = shift @Info; + $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):); + $SampleRate = $1 if ($line =~ m:sample_rate -i (\d+):); + } + if ($SampleCount<0) { + # Unable to extract a valid duration from the sphere header + print STDERR ("Unable to extract duration: skipping file $filename"); + } else { + $waveformName{$fileID} = $filename; chomp $waveformName{$fileID}; + $duration{$fileID} = $SampleCount/$SampleRate; + $numFiles++; + } + } else { + # Could be due to text filtering resulting in an empty transcription + # Output information to STDOUT to enable > /dev/null + print STDOUT ("$0: No transcriptions for audio file ${fileID}.sph\n"); + } + } + print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); + } else { + print STDERR ("$0 NOTICE: No .sph files in $AudioDir\n"); + } + + @AudioFiles = `ls ${AudioDir}/*.wav`; + if ($#AudioFiles >= 0) { + $soxi=`which soxi` or die "$0: Could not find soxi binary -- do you have sox installed?\n"; + chomp $soxi; + printf STDERR ("$0: Found %d .wav files in $AudioDir\n", ($#AudioFiles +1)); + $numFiles = 0; + while ($filename = shift @AudioFiles) { + $fileID = $filename; + $fileID =~ s:.+/::; # remove path prefix + $fileID =~ s:\.wav\s*::; # remove file extension + if (exists $numUtterancesInFile{$fileID}) { + # Some portion of this file has training transcriptions + $duration = `$soxi -D $filename`; + if ($duration <=0) { + # Unable to extract a valid duration from the sphere header + print STDERR ("Unable to extract duration: skipping file $filename"); + } else { + if (exists $waveformName{$fileID} ) { + print STDERR ("$0 ERROR: duplicate fileID \"$fileID\" for files \"$filename\" and \"" . $waveformName{$fileID} ."\"\n"); + exit(1); + } + $waveformName{$fileID} = $filename; chomp $waveformName{$fileID}; + $duration{$fileID} = $duration; + $numFiles++; + } + } else { + # Could be due to text filtering resulting in an empty transcription + # Output information to STDOUT to enable > /dev/null + print STDOUT ("$0: No transcriptions for audio file ${fileID}.sph\n"); + } + } + print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); + } else { + print STDERR ("$0 NOTICE: No .wav files in $AudioDir\n"); + } + + if ( $#waveformName == 0 ) { + print STDERR ("$0 ERROR: No audio files found!"); + } +} else { + print STDERR ("$0 ERROR: No directory named $AudioDir\n"); + exit(1); +} + +######################################################################## +# Now all the needed information is available. Write out the 4 files. +######################################################################## + +unless (-d $outDir) { + print STDERR ("$0: Creating output directory $outDir\n"); + die "Failed to create output directory" if (`mkdir -p $outDir`); # i.e. if the exit status is not zero. +} +print STDERR ("$0: Writing 5 output files to $outDir\n"); + +$textFileName = "$outDir/text"; +open (TEXT, "> $textFileName") || die "$0 ERROR: Unable to write text file $textFileName\n"; + +$utt2spkFileName = "$outDir/utt2spk"; +open (UTT2SPK, "> $utt2spkFileName") || die "$0 ERROR: Unable to write utt2spk file $utt2spkFileName\n"; + +$segmentsFileName = "$outDir/segments"; +open (SEGMENTS, "> $segmentsFileName") || die "$0 ERROR: Unable to write segments file $segmentsFileName\n"; + +$scpFileName = "$outDir/wav.scp"; +open (SCP, "| sort -u > $scpFileName") || die "$0 ERROR: Unable to write wav.scp file $scpFileName\n"; +my $binary=`which sph2pipe` or die "Could not find the sph2pipe command"; chomp $binary; +$SPH2PIPE ="$binary -f wav -p -c 1"; +my $SOXBINARY =`which sox` or die "Could not find the sph2pipe command"; chomp $SOXBINARY; +$SOXFLAGS ="-r 8000 -c 1 -b 16 -t wav - downsample"; + +$spk2uttFileName = "$outDir/spk2utt"; +open (SPK2UTT, "> $spk2uttFileName") || die "$0 ERROR: Unable to write spk2utt file $spk2uttFileName\n"; + +$oovFileName = "$outDir/oovCounts"; +open (OOV, "| sort -nrk2 > $oovFileName") || die "$0 ERROR: Unable to write oov file $oovFileName\n"; + +$numUtterances = $numSpeakers = $numWaveforms = 0; +$totalSpeech = $totalSpeechSq = 0.0; +foreach $utteranceID (sort keys %transcription) { + $fileID = $baseFileID{$utteranceID}; + if (exists $waveformName{$fileID}) { + # There are matching transcriptions and audio + $numUtterances++; + $totalSpeech += ($endTime{$utteranceID} - $startTime{$utteranceID}); + $totalSpeechSq += (($endTime{$utteranceID} - $startTime{$utteranceID}) + *($endTime{$utteranceID} - $startTime{$utteranceID})); + print TEXT ("$utteranceID $transcription{$utteranceID}\n"); + print UTT2SPK ("$utteranceID $speakerID{$utteranceID}\n"); + print SEGMENTS ("$utteranceID $fileID $startTime{$utteranceID} $endTime{$utteranceID}\n"); + if (exists $uttList{$speakerID{$utteranceID}}) { + $uttList{$speakerID{$utteranceID}} .= " $utteranceID"; + } else { + $numSpeakers++; + $uttList{$speakerID{$utteranceID}} = "$utteranceID"; + } + next if (exists $scpEntry{$fileID}); + $numWaveforms++; + if ($waveformName{$fileID} =~ /.*\.sph/ ) { + $scpEntry{$fileID} = "$SPH2PIPE $waveformName{$fileID} |"; + } else { + $scpEntry{$fileID} = "$SOXBINARY $waveformName{$fileID} $SOXFLAGS |"; + } + } else { + print STDERR ("$0 WARNING: No audio file for transcription $utteranceID\n"); + } +} +foreach $fileID (sort keys %scpEntry) { + print SCP ("$fileID $scpEntry{$fileID}\n"); +} +foreach $speakerID (sort keys %uttList) { + print SPK2UTT ("$speakerID $uttList{$speakerID}\n"); +} +foreach $w (sort keys %oovCount) { + print OOV ("$w\t$oovCount{$w}\n"); +} +exit(1) unless (close(TEXT) && close(UTT2SPK) && close(SEGMENTS) && close(SCP) && close(SPK2UTT) && close(OOV)); + +print STDERR ("$0: Summary\n"); +print STDERR ("\tWrote $numUtterances lines each to text, utt2spk and segments\n"); +print STDERR ("\tWrote $numWaveforms lines to wav.scp\n"); +print STDERR ("\tWrote $numSpeakers lines to spk2utt\n"); +print STDERR ("\tHmmm ... $numSpeakers distinct speakers in this corpus? Unusual!\n") + if (($numSpeakers<($numUtterances/500.0)) || ($numSpeakers>($numUtterances/2.0))); +print STDERR ("\tTotal # words = $numWords (including $numOOV OOVs) + $numSilence $silence\n") + if ($vocabFile); +printf STDERR ("\tAmount of speech = %.2f hours (including some due to $silence)\n", $totalSpeech/3600.0); +if ($numUtterances>0) { + printf STDERR ("\tAverage utterance length = %.2f sec +/- %.2f sec, and %.2f words\n", + $totalSpeech /= $numUtterances, + sqrt(($totalSpeechSq/$numUtterances)-($totalSpeech*$totalSpeech)), + $numWords/$numUtterances); +} + +exit(0); + +######################################################################## +# Done! +######################################################################## diff --git a/egs/babel/s5d/local/prepare_extended_lexicon.sh b/egs/babel/s5d/local/prepare_extended_lexicon.sh new file mode 100644 index 00000000000..3cc5ca6c21f --- /dev/null +++ b/egs/babel/s5d/local/prepare_extended_lexicon.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +unk_fraction_boost=1.0 +num_sent_gen=12000000 +num_prons=1000000 +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +# Extend the original lexicon. +# Will creates the files data/local/extend/{lexiconp.txt,oov2prob}. +local/extend_lexicon.sh --cmd "$train_cmd" --cleanup false \ + --num-sent-gen $num_sent_gen --num-prons $num_prons \ + data/local/lexicon.txt data/local/lang_ext data/dev2h/text + + +extend_lexicon_param=() +[ -f data/local/extend/original_oov_rates ] || exit 1; +unk_fraction=`cat data/local/extend/original_oov_rates |\ + grep "token" | awk -v x=$unk_fraction_boost '{print $NF/100.0*x}'` +extend_lexicon_param=(--cleanup false --unk-fraction $unk_fraction \ + --oov-prob-file data/local/lang_ext/oov2prob) + +cp -r data/lang data/lang_ext +local/arpa2G.sh ${extend_lexicon_param[@]} \ + data/srilm/lm.gz data/lang_ext data/lang_ext + diff --git a/egs/babel/s5d/local/prepare_lexicon.pl b/egs/babel/s5d/local/prepare_lexicon.pl new file mode 100755 index 00000000000..ff128f07637 --- /dev/null +++ b/egs/babel/s5d/local/prepare_lexicon.pl @@ -0,0 +1,404 @@ +#!/usr/bin/env perl +use Getopt::Long; +use Data::Dumper; + +############################################################################### +# +# Convert a Babel-formatted dictionary to work with Kaldi, and optionally +# add non-speech "words" that appear in the transcription. e.g. +# +# Convert dictionary from entries of the form +# +# WORD Romanization pronunciation1 pronunciation2 ... +# +# where each pronunciation has syllable boundaries [.#] and tags _X, " or % +# +# Phone1 Phone2 _TAG . Phone1 Phone2 Phone3 _TAG +# +# and so on, e.g. +# +# 㓤 gat1 g 6 t _1 h O: t _3 k i: t _1 +# 兄妹 hing1mui2 h i: N _1 . m u:j _2 h i: N _1 . m u:j _6 +# +# to entries of the form +# +# 㓤 g_1 6_1 t_1 +# 㓤 h_3 O:_3 t_3 +# 㓤 k_1 i:_1 t_1 +# 兄妹 h_1 i:_1 N_1 m_2 u:j_2 +# 兄妹 h_1 i:_1 N_1 m_6 u:j_6 +# +# +# Write only one pronunciation per line +# Transfer any tags, prefixed by underscores, to phones in the syllable +# Remove the syllable boundary markers, given by periods or pound signs +# +# NOTE: The Romainzation is present only for some languages. See -r option. +# +# This script will create 5 new files +# +# - lexicon.txt: words from the original lexicon + some non-speech "words" +# + $OOV_symbol = ""; # Default OOV symbol: pronunciation + $vocalNoise = ""; # Vocal noise symvol: pronunciation + $nVoclNoise = ""; # Nonvocal noise: pronunciation + $silence = ""; # Silence > 1 second: pronunciation $sil + $icu_transform = ""; + $phonemap=""; +# +# - nonsilence_phones.txt: tagged phones from the new lexicon +# +# - optional_silence.txt: phones used to model silence in acoustic training +# + $sil = "SIL"; # Also the pronunciation of the word token $silence +# +# - silence_phones.txt: $sil and special phones for non-speech "words" +# +# - extra_questions.txt: sets of phones of the form *_TAG, one set per line +# +# The last file provides sets of phones that share a tag, so that questions can +# effectively be asked about the tag of a neighboring phone during clustering. +# +############################################################################### + +GetOptions("add=s" => \$nsWordsFile, + "oov=s" => \$OOV_symbol, + "romanized!" => \$romanized, + "sil=s" => \$sil, + "icu-transform=s" => \$icu_transform, + "phonemap=s" => \$phonemap + ); + +if ($#ARGV == 1) { + $inDict = $ARGV[0]; + $outDir = $ARGV[1]; + print STDERR ("$0: $inDict $outDir\n"); + print STDERR ("\tNon-speech words will be added from $nsWordsFile\n") if ($nsWordsFile); + print STDERR ("\tUnknown words will be represented by \"$OOV_symbol\"\n") unless ($OOV_symbol eq ""); + print STDERR ("\tRomanized forms of words expected in the dictionary\n") if ($romanized); + print STDERR ("\tThe optional silence phone will be \"$OOV_symbol\"\n") unless ($sil eq "SIL"); + print STDERR ("\tThe ICU transform for case-conversion will be: \"$icu_transform\"\n") if ($icu_transform); +} else { + print STDERR ("Usage: $0 [--options] BabelDictionary OutputDir\n"); + print STDERR ("\t--add Add these nonspeech words to lexicon\n"); + print STDERR ("\t--oov Use this symbol for OOV words (default )\n"); + print STDERR ("\t--romanized Dictionary contains (omissible) romanized word-forms\n"); + print STDERR ("\t--phonemap During reading the dictionary, perform the specified \n"); + print STDERR ("\t phoneme mapping. The format is: p1=p1' p2' p3';p2=p4'\n"); + print STDERR ("\t where p1 and p2 are existing phonemes and p1'..p4' are\n"); + print STDERR ("\t either new or existing phonemes\n"); + print STDERR ("\t--icu-transform ICU transform to be used during the ICU transliteration\n"); + exit(1); +} + +unless (-d $outDir) { + print STDERR ("$0: Creating output directory $outDir\n"); + die "Unable to create output directory $outDir" + if system("mkdir -p $outDir"); # mkdir returned with status != 0 +} +$outLex = "$outDir/lexicon.txt"; +$nspFile = "$outDir/nonsilence_phones.txt"; +$spFile = "$outDir/silence_phones.txt"; +$osFile = "$outDir/optional_silence.txt"; +$exqFile = "$outDir/extra_questions.txt"; + + +#The phonemap is in the form of "ph1=a b c;ph2=a f g;...." +%phonemap_hash; +if ($phonemap) { + $phonemap=join(" ", split(/\s+/, $phonemap)); + print $phonemap . "\n"; + @phone_map_instances=split(/;/, $phonemap); + foreach $instance (@phone_map_instances) { + ($phoneme, $tgt) = split(/=/, $instance); + $phoneme =~ s/^\s+|\s+$//g; + $tgt =~ s/^\s+|\s+$//g; + #print "$phoneme=>$tgt\n"; + @tgtseq=split(/\s+/,$tgt); + $phonemap_hash{$phoneme} = []; + push @{$phonemap_hash{$phoneme}}, @tgtseq; + } +} + +#print Dumper(\%phonemap_hash); + +############################################################################### +# Read input lexicon, write output lexicon, and save the set of phones & tags. +############################################################################### + + +open (INLEX, $inDict) + || die "Unable to open input dictionary $inDict"; + +open (OUTLEX, "| sort -u > $outLex") + || die "Unable to open output dictionary $outLex"; + +$numWords = $numProns = 0; +while ($line=) { + chomp; + ############################################### + # Romainzed forms necessitate \t\S+ below, else + # if ($line =~ m:^([^\t]+)(\t[^\t]+)+$:) { + ############################################### + if ( ($romanized && ($line =~ m:^([^\t]+)\t\S+((\t[^\t]+)+)$:)) || + ((!$romanized) && ($line =~ m:^([^\t]+)((\t[^\t]+)+)$:)) ) { + $word = $1; + + if ( $icu_transform ) { + $xform_word=`echo \"$word\" | uconv -f utf8 -t utf8 -x \"$icu_transform\"`; + chop $xform_word; + #print $xform_word; + #$xform_word="[$word]$xform_word"; + } else { + $xform_word=$word; + } + $prons = $2; + $prons =~ s:^\s+::; # Remove leading white-space + $prons =~ s:\s+$::; # Remove trailing white-space + @pron = split("\t", $prons); + for ($p=0; $p<=$#pron; ++$p) { + $new_pron = ""; + while ($pron[$p] =~ s:^([^\.\#]+)[\.\#]{0,1}::) { push (@syllables, $1); } + while ($syllable = shift @syllables) { + $syllable =~ s:^\s+::; + $syllable =~ s:\s+$::; + $syllable =~ s:\s+: :g; + @original_phones = split(" ", $syllable); + @substituted_original_phones=(); + + foreach $phone (@original_phones) { + if (defined $phonemap_hash{$phone} ) { + #print "Sub: $phone => " . join (' ', @{$phonemap_hash{$phone}}) . "\n"; + push @substituted_original_phones, @{$phonemap_hash{$phone}}; + } else { + push @substituted_original_phones, $phone; + } + } + #print join(' ', @original_phones) . "=>" . join(' ',@substituted_original_phones) . "\n"; + @original_phones = @substituted_original_phones; + + $sylTag = ""; + $new_phones = ""; + while ($phone = shift @original_phones) { + if ($phone =~ m:^\_\S+:) { + # It is a tag; save it for later + $is_original_tag{$phone} = 1; + $sylTag .= $phone; + } elsif ($phone =~ m:^[\"\%]$:) { + # It is a stress marker; save it like a tag + $phone = "_$phone"; + $is_original_tag{$phone} = 1; + $sylTag .= $phone; + } elsif ( $phone =~ m:_:) { + # It is a phone containing "_" (underscore) + $new_phone=$phone; + $new_phone=~ s/\_//g; + if (( $is_original_phone{$phone} ) and not defined( $substituted_phones{phone}) ) { + die "ERROR, the $new_phone and $phone are both existing phones, so we cannot do automatic map!"; + } else { + print STDERR "WARNING, phone $phone was replaced with $new_phone\n" unless $substituted_phones{$phone}; + } + $is_original_phone{$new_phone} = "$new_phone"; + $substituted_phones{$phone} = $new_phone; + $new_phones .= " $new_phone"; + } else { + # It is a phone + if ( $substituted_phones{phone} ) { + die "ERROR, the $new_phone and $phone are both existing phones, so we cannot do automatic map!"; + } + $is_original_phone{$phone} = "$phone"; + $new_phones .= " $phone"; + } + } + $new_phones =~ s:(\S+):$1${sylTag}:g; + $new_pron .= $new_phones . "\t"; # the tab added by Dan, to keep track of + # syllable boundaries. + $is_compound_tag{$sylTag} = 1; + while ($new_phones =~ s:^\s*(\S+)::) { $is_new_phone{$1} = 1; } + } + $new_pron =~ s:^\s+::; + print OUTLEX ("$xform_word\t$new_pron\n"); + $numProns++; + } + @pron = (); + $numWords++; + } else { + print STDERR ("$0 WARNING: Skipping unparsable line $. in $inDict\n"); + } +} +close(INLEX) + && print STDERR ("$0: Read $numWords entries from $inDict\n"); + +############################################################################### +# Read a list of non-speech words if given, and write their "pronunciations" +# - Such lexicon entries are typically created for , etc. +# - If provided explicitly, they each get their own private phone models +# - Otherwise, they are mapped to an OOV symbol with a shared phone +# - All such phones are grouped with the $sil phone for clustering purposes, +# which means that they remain context-independent and form a question set. +############################################################################### + +if ($nsWordsFile) { + open (NSW, $nsWordsFile) + || die "Unable to open non-speech words file $nsWordsFile"; + $numNSWords = 0; + while ($line=) { + next unless ($line =~ m:^\s*([^\s]+)\s*:); # Take the first word if present + print OUTLEX ("$1\t$1\n"); # The word itself is its pronunciation + $is_silence_phone{$1} = 1; # Add it to the list of silence phones + $numProns++; + $numNSWords++; + } + close(NSW) + && print STDERR ("$0: Adding $numNSWords non-speech words from $nsWordsFile to $outLex\n"); +} + +# Add the OOV symbol to the lexicon +print OUTLEX ("$OOV_symbol\t\n"); # The symbol is assumed not to be +$is_silence_phone{""} = 1; # a phone in the original lexicon :-) +$numProns++; + +# Add the vocal noise symbol to the lexicon +print OUTLEX ("$vocalNoise\t\n"); # The symbol is assumed not to be +$is_silence_phone{""} = 1; # a phone in the original lexicon :-) +$numProns++; + +# Add the nonvocal noise symbol to the lexicon +print OUTLEX ("$nVoclNoise\t\n"); # The symbol is assumed not to be +$is_silence_phone{""} = 1; # a phone in the original lexicon :-) +$numProns++; + +# Finally, add the silence symbol to the lexicon +print OUTLEX ("$silence\t$sil\n"); +$is_silence_phone{$sil} = 1; +$numProns++; + +close(OUTLEX) + && print STDERR ("$0: Wrote $numProns pronunciations to $outLex\n"); + +############################################################################### +# - nonsilence_phones.txt: tagged phones from the new lexicon, 1 phone/line +############################################################################### + +foreach $phone (sort keys %is_new_phone) { + $tagLess_phone = $phone; + $tagLess_phone =~ s:^([^_]+).*:$1:; # underscore marks tag beginnings + if ($is_original_phone{$tagLess_phone}) { + # save $phone for writing later to the NSP file + $is_original_phone{$tagLess_phone} .= " $phone"; + } else { + print STDERR ("$0 WARNING: Skipping unexpected tagged phone $phone.\n"); + print STDERR ("\tCheck if original lexicon has phones containing \"\_\"\n"); + die "Cannot continue"; + } +} + +open (NSP, "| sort > $nspFile") + || die "Unable to write nonsilence phones to $nspFile"; +$p = 0; +foreach $phone (sort keys %is_original_phone) { + $tagged_phones = $is_original_phone{$phone}; + $tagged_phones =~ s:^\S+\s*::; # Remove the original (untagged) phone + unless ($phone eq "") { + print NSP ("$tagged_phones\n"); # Write out the remaining (tagged) phones + $p++; + } +} + +close(NSP) + && print STDERR ("$0: Wrote $p (sets of) nonsilence phones to $nspFile\n"); + +if ( $p > (0.5*$numWords) ) { + print STDERR ("$0 WARNING: Original dictionary had $numWords words, and\n"); + print STDERR ("\t\t$p nonspeech phones were found! This is highly unusual.\n"); + print STDERR ("\t\tCheck if the dictionary contains other tab-separated values\n"); + print STDERR ("\t\tthat are being mistaken for pronunciations by this script.\n"); + print STDERR ("$0 ADVICE: Use --romanized for omitting romanized word forms\n") unless ($romanized); +} + +############################################################################### +# - silence_phones.txt: $sil and special phones for non-speech "words" +############################################################################### + +open (SPF, "| sort > $spFile") + || die "Unable to write silence phones to $spFile"; +$p = 0; +foreach $phone (keys %is_silence_phone) { + print SPF ("$phone\n"); + $p++; +} +close(SPF) + && print STDERR ("$0: Wrote $p silence phones to $spFile\n"); + +############################################################################### +# - optional_silence.txt: the reserved (?) phone +############################################################################### + +$is_optional_silence{$sil} = 1; +open (OSF, "| sort > $osFile") + || die "Unable to write optional silence phones to $osFile"; +$p = 0; +foreach $phone (keys %is_optional_silence) { + print OSF ("$phone\n"); + $p++; +} +close(OSF) + && print STDERR ("$0: Wrote $p optional silence phones to $osFile\n"); + +############################################################################### +# - extra_questions.txt: sets of phones of the form *_TAG, one set per line +############################################################################### + +open (EXQ, "| sort > $exqFile") + || die "Unable to write the extra questions file $exqFile"; + +# First make sets of all tagged phones that share the (single) original tags + +$numExtraQs = 0; +foreach $tag (sort keys %is_original_tag) { + $question = ""; + foreach $phone (sort keys %is_new_phone) { + $question .= " $phone" if ($phone =~ m:$tag:); + } + $question =~ s:^\s+::; + print EXQ ("$question\n") unless ($question eq ""); + $numExtraQs++; +} +print STDERR ("$0: Found $numExtraQs unique individual tags in $inDict\n"); + +# It is possible to go overboard by creating questions with all 2^K possible +# subsets of the original tags. E.g. ($phone=~m:$tag1:)||($phone=~m:$tag2:) +# Do this by hand if it is linguistically meaningful for some language +# It is not worth doing this generically for all languages and tag sets. + +# If each syllable has only one tag, then questions with conjunctions of tags +# such as ($phone=~m:$tag1:)&&($phone=~m:$tag2:) will yield empty questions +# However, if syllables carry multiple tags, e.g. tone and stress, then one +# could similarly go overboard with conjunctions of overlapping tags. +# This too is not worth doing generically for all languages and tag sets. + +# Instead, just make sets of all tagged phones with the same new (compound) tag + +foreach $tag (sort keys %is_compound_tag) { + next if ($is_original_tag{$tag}); + $question = ""; + foreach $phone (sort keys %is_new_phone) { + $question .= " $phone" if ($phone =~ m:$tag:); + } + $question =~ s:^\s+::; + print EXQ ("$question\n") unless ($question eq ""); + $numExtraQs++; +} + +# Finally, add the silence phones as a set for use as a clustering question + +$question = ""; +foreach $phone (sort keys %is_silence_phone) { + $question .= " $phone"; +} +$question =~ s:^\s+::; +print EXQ ("$question\n") unless ($question eq ""); +$numExtraQs++; + +close(EXQ) + && print STDERR ("$0: Wrote $numExtraQs extra questions (incl compound tags and sil) to $exqFile\n"); diff --git a/egs/babel/s5d/local/prepare_stm.pl b/egs/babel/s5d/local/prepare_stm.pl new file mode 100755 index 00000000000..b4daec585e3 --- /dev/null +++ b/egs/babel/s5d/local/prepare_stm.pl @@ -0,0 +1,345 @@ +#!/usr/bin/env perl +use Getopt::Long; +use Encode; + +################################################################################ +# +# Script to prepare a NIST .stm file for scoring ASR output. Based on the files +# that are naturally created for Kaldi acoustic training: +# +# - data/segments: contains segmentID, recordingID, start-time & end-time +# +# - data/wav.scp: contains recordingID & waveform-name (or sph2pipe command) +# +# - data/utt2spk: contains segmentID % speakerID +# +# - data/text: contains segment ID and transcription +# +# The .stm file has lines of the form +# +# waveform-name channel speakerID start-time end-time [] transcription +# +# Clearly, most of the information needed for creating the STM file is present +# in the four Kaldi files mentioned above, except channel --- its value will be +# obtained from the sph2pipe command if present, or will default to "1" --- and +# from a separate demographics.tsv file. (A feature to add later?) +# +# Note: Some text filtering is done by this script, such as removing non-speech +# tokens from the transcription, e.g. , , etc. + + $fragMarkers = ""; # If given by the user, they are stripped from words + +# But two types of tokens are retained as is, if present. +# + $Hesitation = ""; # which captures hesitations, filled pauses, etc. + $OOV_symbol = ""; # which our system outputs occasionally. +# +# Note: The .stm file must be sorted by filename and channel in ASCII order and +# by the start=time in numerical order. NIST recommends the unix command +# "sort +0 -1 +1 -2 +3nb -4" +# +# This script will also produce an auxilliary file named reco2file_and_channel +# which is used by Kaldi scripts to produce output in .ctm format for scoring. +# So any channel ID assigned here will be consistent between ref and output. +# +# If the training text is Viterbi-aligned to the speech to obtain time marks, +# it should be straightforward to modify this script to produce a .ctm file: +# +# waveform-file channel start-time duration word +# +# which lists the transcriptions with word-level time marks. +# +# Note: A .ctm file must be sorted via "sort +0 -1 +1 -2 +2nb -3" +# +################################################################################ +GetOptions("fragmentMarkers=s" => \$fragMarkers, "hesitationToken=s" => \$Hesitation,"oovToken=s" => \$OOV_symbol); + +if ($#ARGV == 0) { + $inDir = $ARGV[0]; + print STDERR ("$0: Making stm file from information in $inDir\n"); + print STDERR ("\tRemoving [$fragMarkers]+ from ends of tokens\n") if ($fragMarkers); + print STDERR ("\tPreserving hesitation tokens $Hesitation\n") unless ($Hesitation eq ""); + print STDERR ("\tUsing $OOV_symbol as the OOV symbol\n") unless ($OOV_symbol eq ""); +} else { + print STDERR ("Usage: $0 [--options] DataDir\n"); + print STDERR ("\t--fragmentMarkers Strip these from ends of each token (default: none)\n"); + print STDERR ("\t--hesitationToken Preserve when deleting non-speech tokens (default: )\n"); + print STDERR ("\t--oovToken Use to replace hard-coded OOVs (default: )\n"); + exit(1); +} + +$segmentsFile = "$inDir/segments"; +$scpFile = "$inDir/wav.scp"; +$utt2spkFile = "$inDir/utt2spk"; +$textFile = "$inDir/text"; +$stmFile = "$inDir/stm"; +$charStmFile = "$inDir/char.stm"; +$reco2ctmFile = "$inDir/reco2file_and_channel"; + +################################################################################ +# Read the segmentIDs, file-IDs, start- and end-times from the segments file +################################################################################ + +my $num_failed_parses=0; +my $num_failed_parses_max=10; + +die "Current version of script requires a segments file" unless (-e $segmentsFile); + +open(SEGMENTS, $segmentsFile) + || die "Unable to read segments file $segmentsFile"; +$numSegments = 0; +while ($line=) { + @tokens = split(/\s+/, $line); + unless ($#tokens == 3) { + $num_failed_parses+=1; + print STDERR "$0: Couldn't parse line $. in $segmentsFile\n" + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses le $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached. Not warning anymore\n" + if ($num_failed_parses eq $num_failed_parses_max); + next; + } + $segmentID = shift @tokens; + if (exists $fileID{$segmentID}) { + print STDERR ("$0: Skipping duplicate segment ID $segmentID in $segmentsFile\n"); + next; + } + $fileID{$segmentID} = shift @tokens; + $startTime{$segmentID} = shift @tokens; + $endTime{$segmentID} = shift @tokens; + ++$numSegments; +} +close(SEGMENTS); +print STDERR ("$0: Read info about $numSegments segment IDs from $segmentsFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Read the waveform filenames from the wav.scp file. (Parse sph2pipe command.) +################################################################################ + +open(SCP, $scpFile) + || die "Unable to open scp file $scpFile\n"; +$numRecordings = 0; +$num_failed_parses=0; +while ($line=) { + chomp; + if ($line =~ m:^\s*(\S+)\s+(.+)$:) { + $recordingID = $1; + $waveformFile = $2; + } else { + $num_failed_parses+=1; + print STDERR ("$0: Couldn't parse line $. in $scpFile\n") + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses le $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached. Not warning anymore\n" + if ($num_failed_parses eq $num_failed_parses_max); + next; + } + if (exists $waveform{$recordingID}) { + print STDERR ("$0: Skipping duplicate recording ID $recordingID in $scpFile\n"); + # BUG ALERT: This check may need to be turned off for multi-channel recordings, + # since the same recording may appear with with different channels? + next; + } + if ($waveformFile =~ m:^\S+$:) { + # This is a single filename, no shp2pipe or gunzip for reading waveforms + $waveform{$recordingID} = $waveformFile; + } elsif (($waveformFile =~ m:(sph2pipe|gunzip|gzip|cat|zcat)\s+:) && + ($waveformFile =~ m:\s+(\S+)\s*\|$:)) { + # HACK ALERT: the filename is *assumed* to be at the END of the command + $waveform{$recordingID} = $1; + $channel{$recordingID} = $1 if ($waveformFile =~ m:sph2pipe\s+.*\-c\s+(\S+)\s+.+:); + } elsif (($waveformFile =~ m:(sox)\s+:) && + ($waveformFile =~ m:\s+(\S+)\s*\|$:)) { + # HACK ALERT: the first element that does ends with '.wav' is assumed to + # be the original filename + @elems=split(/\s+/, $waveformFile); + foreach $elem (@elems) { + if ($elem =~ m/.*\.wav/) { + $filename=$elem; + last; + } + } + die ("$0: Couldn't parse waveform filename on line $. in $scpFile\n\t$line\n") if not defined $filename; + die ("$0: Filename $filename does not exist: in $scpFile\n\t$line\n") unless (-e $filename); + + $waveform{$recordingID} = $filename; + #$channel{$recordingID} = $filename; + } else { + print STDERR ("$0: Couldn't parse waveform filename on line $. in $scpFile\n\t$line\n"); + next; + } + $waveform{$recordingID} =~ s:.+/::; # remove path prefix + $waveform{$recordingID} =~ s:\.(sph|wav)\s*$::; # remove file extension + $channel{$recordingID} = 1 # Default + unless (exists $channel{$recordingID}); + ++$numRecordings; +} +close(SCP); +print STDERR ("$0: Read filenames for $numRecordings recording IDs from $scpFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Read speaker information from the utt2spk file +################################################################################ + +open(UTT2SPK, $utt2spkFile) + || die "Unable to read utt2spk file $utt2spkFile"; +$numSegments = 0; +$num_failed_parses = 0; +while ($line=) { + @tokens = split(/\s+/, $line); + if (! ($#tokens == 1)) { + $num_failed_parses+=1; + print STDERR ("$0: Couldn't parse line $. in $utt2spkFile\n") + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses le $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached. Not warning anymore\n" + if ($num_failed_parses eq $num_failed_parses_max); + next; + } + $segmentID = shift @tokens; + if (exists $speakerID{$segmentID}) { + print STDERR ("$0: Skipping duplicate segment ID $segmentID in $utt2spkFile\n"); + next; + } + $speakerID{$segmentID} = shift @tokens; + ++$numSegments; +} +close(UTT2SPK); +print STDERR ("$0: Read speaker IDs for $numSegments segments from $utt2spkFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Read the transcriptions from the text file +################################################################################ + +open(TEXT, $textFile) + || die "Unable to read text file $textFile"; +$numSegments = $numWords = 0; +$num_failed_parses = 0; +while ($line=) { + chomp; + if ($line =~ m:^(\S+)\s+(.+)$:) { + $segmentID = $1; + $text = $2; + } else { + $num_failed_parses+=1; + print STDERR ("$0: Couldn't parse line $. in $textFile\n") + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses <= $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached ($num_failed_parses/$num_failed_parses_max). Not warning anymore\n" + if ($num_failed_parses == $num_failed_parses_max); + next; + } + if (exists $transcription{$segmentID}) { + print STDERR ("$0: Skipping duplicate segment ID $segmentID in $segmentsFile\n"); + next; + } + $transcription{$segmentID} = ""; + @tokens = split(/\s+/, $text); + # This is where one could filter the transcription as necessary. + # E.g. remove noise tokens, mark non-scoring segments, etc. + # HACK ALERT: Current version does this is an ad hoc manner! + while ($w = shift(@tokens)) { + # Substitute OOV tokens specific to the Babel data + $w = $OOV_symbol if ($w eq "(())"); + # Remove fragMarkers, if provided, from either end of the word + $w =~ s:(^[$fragMarkers]|[$fragMarkers]$)::g if ($fragMarkers); + # Omit non-speech symbols such as , , etc. + $w =~ s:^<[^>]+>$:: unless (($w eq $OOV_symbol) || ($w eq $Hesitation)); + next if ($w eq ""); + $transcription{$segmentID} .= " $w"; + $numWords++; + } + $transcription{$segmentID} =~ s:^\s+::; # Remove leading white space + $transcription{$segmentID} =~ s:\s+$::; # Remove training white space + $transcription{$segmentID} =~ s:\s+: :g; # Normalize remaining white space + # Transcriptions containing no words, or only OOVs and hesitations are not scored + $transcription{$segmentID} = "IGNORE_TIME_SEGMENT_IN_SCORING" + if (($transcription{$segmentID} eq "") || + ($transcription{$segmentID} =~ m:^(($OOV_symbol|$Hesitation)\s*)+$:)); + ++$numSegments; +} +close(TEXT); +print STDERR ("$0: Read transcriptions for $numSegments segments ($numWords words) from $textFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Write the transcriptions in stm format to a file named stm +################################################################################ + +print STDERR ("$0: Overwriting existing stm file $stmFile\n") + if (-s $stmFile); +open(STM, "| sort +0 -1 +1 -2 +3nb -4 > $stmFile") + || die "Unable to write to stm file $stmFile"; +$numSegments = 0; +foreach $segmentID (sort keys %fileID) { + if (exists $waveform{$fileID{$segmentID}}) { + printf STM ("%s %s %s %.2f %.2f", + $waveform{$fileID{$segmentID}}, + $channel{$fileID{$segmentID}}, + $speakerID{$segmentID}, + $startTime{$segmentID}, + $endTime{$segmentID}); + printf STM (" <%s>", $attributes{$segmentID}) if (exists $attributes{$segmentID}); + printf STM (" %s\n", $transcription{$segmentID}); + ++$numSegments; + } else { + print STDERR ("$0: No waveform found for segment $segmentID, file $fileID{$segmentID}\n"); + } +} +close(STM); +print STDERR ("$0: Wrote reference transcriptions for $numSegments segments to $stmFile\n"); + +################################################################################ +# Write a character-separated stm file as well, for CER computation +################################################################################ + +print STDERR ("$0: Overwriting existing stm file $charStmFile\n") + if (-s $charStmFile); +open(STM, "$stmFile") + || die "Unable to read back stm file $stmFile"; +binmode STM,":encoding(utf8)"; +open(CHARSTM, "> $charStmFile") + || die "Unable to write to char.stm file $charStmFile"; +binmode CHARSTM,":encoding(utf8)"; +while ($line=) { + @tokens = split(/\s+/, $line); + # The first 5 tokens are filename, channel, speaker, start- and end-time + for ($n=0; $n<5; $n++) { + $w = shift @tokens; + print CHARSTM ("$w "); + } + # CER is used only for some scripts, e.g. CJK. So only non-ASCII characters + # in the remaining tokens should be split into individual tokens. + $w = join (" ", @tokens); + $w =~ s:([^\x00-\x7F])(?=[^\x00-\x7F]):$1 :g; # split adjacent non-ASCII chars + print CHARSTM ("$w\n"); +} +close(CHARSTM); +close(STM); +print STDERR ("$0: Wrote char.stm file $charStmFile\n"); + +################################################################################ +# Write the reco2file_and_channel file for use by Kaldi scripts +################################################################################ + +print STDERR ("$0: Overwriting existing reco2file_and_channel file $reco2ctmFile\n") + if (-s $reco2ctmFile); +open(RECO2CTM, "| sort > $reco2ctmFile") + || die "Unable to write to reco2file_and_channel file $reco2ctmFile"; +$numRecordings = 0; +foreach $recordingID (sort keys %waveform) { + printf RECO2CTM ("%s %s %s\n", $recordingID, $waveform{$recordingID}, $channel{$recordingID}); + ++$numRecordings; +} +close(RECO2CTM); +print STDERR ("$0: Wrote file_and_channel info for $numRecordings recordings to $reco2ctmFile\n"); + +print STDERR ("$0: Done!\n"); +exit(0); diff --git a/egs/babel/s5d/local/prepare_unicode_lexicon.py b/egs/babel/s5d/local/prepare_unicode_lexicon.py new file mode 100755 index 00000000000..ec2d9e64c37 --- /dev/null +++ b/egs/babel/s5d/local/prepare_unicode_lexicon.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python + +# Copyright 2016 Johns Hopkins University (Author: Matthew Wiesner) +# Apache 2.0 + +# ======= Prepare data/local directory for babel data with unicode tags ====== +# This script creates all files in the data/local directory for babel formats, +# except for the filtered_lexicon.txt file which is created by the +# make_lexicon_subset.sh script. +# +# This script basically takes the place of the prepare_lexicon.pl script. It +# creates the following files. +# +# 1. lexicon.txt (via local/lexicon/make_unicode_lexicon.py which happens prior +# to running this script. +# 2. nonsilence_phones.txt +# 3. silence_phones.txt +# 4. optional_silence.txt +# 5. extra_questions.txt +# ============================================================================ + +from __future__ import print_function +import codecs +import sys +import os +import argparse + +SKIP = ("", "''", "<", ">", "#") + + +# Extract a sorted set of distinct unicode graphemes from the lexicon +def extract_graphemes(table): + ''' + Extract a sorted set of distinct unicode graphemes from the lexicon. + + Usage: extract_graphemes(PATH_TO_LEXICON_TABLE) + + Arguments: + table -- path to the lexicon table output by make_unicode_lexicon.py + + Output: + unicode_graphemes -- the sorted set of distinct unicode graphemes + that occurred in the lexicon. + ''' + with codecs.open(table, "r", "utf-8") as fp: + + # Get relevant header columns for extracting graphemes used in lexicon + # -------------------------------------------------------------------- + header = fp.readline() + idx = [] + for i, j in enumerate(header.strip().split('\t')): + if j.startswith("MAP"): + idx.append(i) + + # -------------------------------------------------------------------- + # Extract all unique graphemes. Place into formats ... + # 1. unicode_graphemes = [g1, g2, g3, ... , gN] + # + # 2. Grapheme dict as keys for each base (without tags) grapheme along + # with all distinct graphmes starting with the base grapheme. + # phones_dict = {p1: p1_with_tags_1, p1_with_tags_2, ... , p2: ... } + # -------------------------------------------------------------------- + unicode_graphemes = [] + graphemes_dict = {} + for line in fp: + for i in idx: + grapheme = line.strip().split('\t')[i] + if grapheme not in SKIP: + unicode_graphemes.append(grapheme) + + # Create the sorted set of distinct unicode graphemes in the lexicon + unicode_graphemes = sorted(set(unicode_graphemes)) + for g in unicode_graphemes: + base_graph = g.split("_")[0] + if(base_graph not in graphemes_dict.keys()): + graphemes_dict[base_graph] = [] + + graphemes_dict[base_graph].append(g) + + return unicode_graphemes, graphemes_dict + + +def write_nonsilence_phones(graphemes_dict, nonsilence_phones, + extraspeech=None): + with codecs.open(nonsilence_phones, "w", "utf-8") as fp: + try: + with codecs.open(extraspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + fp.write("%s\n" % line_vals[1]) + except (IOError, TypeError): + pass + + # Write each base grapheme with all tags on the same line + for base_grapheme in sorted(graphemes_dict.keys()): + line = "" + for grapheme in graphemes_dict[base_grapheme]: + line += grapheme + " " + fp.write("%s\n" % line.strip()) + + +def write_extra_questions(unicode_graphemes, graphemes_dict, tags, + extra_questions, nonspeech=None, extraspeech=None): + with codecs.open(extra_questions, "w", "utf-8") as fp: + # Write all unique "phones" but graphemes in this case, plus to a + # single line. + + # Write the extraspeech + try: + with codecs.open(extraspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + fp.write("%s " % line_vals[1]) + except (IOError, TypeError): + pass + + for g in unicode_graphemes: + fp.write("%s " % g) + fp.write("\n") + + # Write the nonspeech + try: + with codecs.open(nonspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + fp.write("%s " % line_vals[1]) + fp.write("\n") + except (IOError, TypeError): + pass + + # Write all possible phone_tag combinations that occur in the lexicon + for tag in tags: + for g in graphemes_dict.keys(): + tagged_grapheme = "_".join([g, tag]) + if(tagged_grapheme in graphemes_dict[g]): + fp.write("%s " % tagged_grapheme) + fp.write("\n") + + +def main(): + # --------------- Extract unicode_graphemes from the table -------------- + if(len(sys.argv[1:]) == 0): + print("Usage: local/prepare_unicode_lexicon.txt " + " " ) + sys.exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument("table", help="Table containing all information about" + " how to map unicode graphemes to unicode descriptors") + parser.add_argument("lex_dir", help="Directory to which all files" + " should be written") + parser.add_argument("--nonspeech", help="File with map of nonspeech words", + action="store", default=None) + parser.add_argument("--extraspeech", help="File with map of extraspeech" + " words", action="store", default=None) + args = parser.parse_args() + unicode_graphemes, graphemes_dict = extract_graphemes(args.table) + + # ---------------- Prepare the directory data/local and a few files ------ + # Create the data/local directory if it does not yet exist + if not os.path.exists(args.lex_dir): + os.makedirs(args.lex_dir) + + # Write the slience_phones.txt file + with open(os.path.join(args.lex_dir, "silence_phones.txt"), "w") as fo: + with open(args.nonspeech, "r") as fi: + for line in fi: + line_vals = line.strip().split() + fo.write("%s\n" % line_vals[1]) + + # Write the optional_silence.txt file + with open(os.path.join(args.lex_dir, "optional_silence.txt"), "w") as fp: + fp.write("SIL\n") + + # --------------- Write the nonsilence_phones.txt file ------------------- + write_nonsilence_phones(graphemes_dict, + os.path.join(args.lex_dir, "nonsilence_phones.txt"), + extraspeech=args.extraspeech) + + # ------------------------- Extract tags --------------------------------- + tags = [] + for g in unicode_graphemes: + # Only consider graphemes with tags + g_tags = g.split("_") + if(len(g_tags) > 1): + tag = "_".join(g_tags[1:]) + if(tag not in tags): + tags.append(tag) + + # --------------- Write the extra questions file ------------------------- + write_extra_questions(unicode_graphemes, graphemes_dict, tags, + os.path.join(args.lex_dir, "extra_questions.txt"), + nonspeech=args.nonspeech, + extraspeech=args.extraspeech) + + +if __name__ == "__main__": + main() diff --git a/egs/babel/s5d/local/reestimate_langp.sh b/egs/babel/s5d/local/reestimate_langp.sh new file mode 100755 index 00000000000..ae70b6a8f46 --- /dev/null +++ b/egs/babel/s5d/local/reestimate_langp.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +unk="" +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +datadir=$1 +langdir=$2 +idict=$3 +amdir=$4 +odict=$5 +olocallang=$6 +olang=$7 + + +mkdir -p $odict +mkdir -p $olang +mkdir -p $olocallang +steps/get_prons.sh --cmd "$train_cmd" $datadir $langdir $amdir +utils/dict_dir_add_pronprobs.sh --max-normalize true $idict \ + $amdir/pron_counts_nowb.txt $amdir/sil_counts_nowb.txt \ + $amdir/pron_bigram_counts_nowb.txt $odict + +utils/prepare_lang.sh --phone-symbol-table $langdir/phones.txt \ + --share-silence-phones true \ + $odict "$unk" $olocallang $olang + diff --git a/egs/babel/s5d/local/resegment/evaluate_segmentation.pl b/egs/babel/s5d/local/resegment/evaluate_segmentation.pl new file mode 100755 index 00000000000..9d865cca8c9 --- /dev/null +++ b/egs/babel/s5d/local/resegment/evaluate_segmentation.pl @@ -0,0 +1,198 @@ +#!/usr/bin/env perl + +# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar +# Apache 2.0 + +################################################################################ +# +# This script was written to check the goodness of automatic segmentation tools +# It assumes input in the form of two Kaldi segments files, i.e. a file each of +# whose lines contain four space-separated values: +# +# UtteranceID FileID StartTime EndTime +# +# It computes # missed frames, # false positives and # overlapping frames. +# +################################################################################ + +if ($#ARGV == 1) { + $ReferenceSegmentation = $ARGV[0]; + $HypothesizedSegmentation = $ARGV[1]; + printf STDERR ("Comparing reference segmentation\n\t%s\nwith proposed segmentation\n\t%s\n", + $ReferenceSegmentation, + $HypothesizedSegmentation); +} else { + printf STDERR "This program compares the reference segmenation with the proposted segmentation\n"; + printf STDERR "Usage: $0 reference_segments_filename proposed_segments_filename\n"; + printf STDERR "e.g. $0 data/dev10h/segments data/dev10h.seg/segments\n"; + exit (0); +} + +################################################################################ +# First read the reference segmentation, and +# store the start- and end-times of all segments in each file. +################################################################################ + +open (SEGMENTS, "cat $ReferenceSegmentation | sort -k2,2 -k3n,3 -k4n,4 |") + || die "Unable to open $ReferenceSegmentation"; +$numLines = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $ReferenceSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + unless (exists $firstSeg{$fileID}) { + $firstSeg{$fileID} = $numLines; + $actualSpeech{$fileID} = 0.0; + $hypothesizedSpeech{$fileID} = 0.0; + $foundSpeech{$fileID} = 0.0; + $falseAlarm{$fileID} = 0.0; + $minStartTime{$fileID} = 0.0; + $maxEndTime{$fileID} = 0.0; + } + $refSegName[$numLines] = $field[0]; + $refSegStart[$numLines] = $field[2]; + $refSegEnd[$numLines] = $field[3]; + $actualSpeech{$fileID} += ($field[3]-$field[2]); + $minStartTime{$fileID} = $field[2] if ($minStartTime{$fileID}>$field[2]); + $maxEndTime{$fileID} = $field[3] if ($maxEndTime{$fileID}<$field[3]); + $lastSeg{$fileID} = $numLines; + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $ReferenceSegmentation\n"; + +################################################################################ +# Process hypothesized segments sequentially, and gather speech/nonspeech stats +################################################################################ + +open (SEGMENTS, "cat $HypothesizedSegmentation | sort -k2,2 -k1,1 |") + # Kaldi segments files are sorted by UtteranceID, but we re-sort them here + # so that all segments of a file are read together, sorted by start-time. + || die "Unable to open $HypothesizedSegmentation"; +$numLines = 0; +$totalHypSpeech = 0.0; +$totalFoundSpeech = 0.0; +$totalFalseAlarm = 0.0; +$numShortSegs = 0; +$numLongSegs = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $HypothesizedSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + $segStart = $field[2]; + $segEnd = $field[3]; + if (exists $firstSeg{$fileID}) { + # This FileID exists in the reference segmentation + # So gather statistics for this UtteranceID + $hypothesizedSpeech{$fileID} += ($segEnd-$segStart); + $totalHypSpeech += ($segEnd-$segStart); + if (($segStart>=$maxEndTime{$fileID}) || ($segEnd<=$minStartTime{$fileID})) { + # This entire segment is a false alarm + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + # This segment may overlap one or more reference segments + $p = $firstSeg{$fileID}; + while ($refSegEnd[$p]<=$segStart) { + ++$p; + } + # The overlap, if any, begins at the reference segment p + $q = $lastSeg{$fileID}; + while ($refSegStart[$q]>=$segEnd) { + --$q; + } + # The overlap, if any, ends at the reference segment q + if ($q<$p) { + # This segment sits entirely in the nonspeech region + # between the two reference speech segments q and p + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + if (($segEnd-$segStart)<0.20) { + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found short speech region $line\n"; + ++$numShortSegs; + } elsif (($segEnd-$segStart)>60.0) { + ++$numLongSegs; + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found long speech region $line\n"; + } + # There is some overlap with segments p through q + for ($s=$p; $s<=$q; ++$s) { + if ($segStart<$refSegStart[$s]) { + # There is a leading false alarm portion before s + $falseAlarm{$fileID} += ($refSegStart[$s]-$segStart); + $totalFalseAlarm += ($refSegStart[$s]-$segStart); + $segStart=$refSegStart[$s]; + } + $speechPortion = ($refSegEnd[$s]<$segEnd) ? + ($refSegEnd[$s]-$segStart) : ($segEnd-$segStart); + $foundSpeech{$fileID} += $speechPortion; + $totalFoundSpeech += $speechPortion; + $segStart=$refSegEnd[$s]; + } + if ($segEnd>$segStart) { + # There is a trailing false alarm portion after q + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } + } + } + } else { + # This FileID does not exist in the reference segmentation + # So all this speech counts as a false alarm + exit (1); + printf STDERR ("Unexpected fileID in hypothesized segments: %s", $fileID); + $totalFalseAlarm += ($segEnd-$segStart); + } + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $HypothesizedSegmentation\n"; + +################################################################################ +# Now that all hypothesized segments have been processed, compute needed stats +################################################################################ + +$totalActualSpeech = 0.0; +$totalNonSpeechEst = 0.0; # This is just a crude estimate of total nonspeech. +foreach $fileID (sort keys %actualSpeech) { + $totalActualSpeech += $actualSpeech{$fileID}; + $totalNonSpeechEst += $maxEndTime{$fileID} - $actualSpeech{$fileID}; + ####################################################################### + # Print file-wise statistics to STDOUT; can pipe to /dev/null is needed + ####################################################################### + printf STDOUT ("%s: %.2f min actual speech, %.2f min hypothesized: %.2f min overlap (%d\%), %.2f min false alarm (~%d\%)\n", + $fileID, + ($actualSpeech{$fileID}/60.0), + ($hypothesizedSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}*100/($actualSpeech{$fileID}+0.01)), + ($falseAlarm{$fileID}/60.0), + ($falseAlarm{$fileID}*100/($maxEndTime{$fileID}-$actualSpeech{$fileID}+0.01))); +} + +################################################################################ +# Finally, we have everything needed to report the segmentation statistics. +################################################################################ + +printf STDERR ("------------------------------------------------------------------------\n"); +printf STDERR ("TOTAL: %.2f hrs actual speech, %.2f hrs hypothesized: %.2f hrs overlap (%d\%), %.2f hrs false alarm (~%d\%)\n", + ($totalActualSpeech/3600.0), + ($totalHypSpeech/3600.0), + ($totalFoundSpeech/3600.0), + ($totalFoundSpeech*100/($totalActualSpeech+0.000001)), + ($totalFalseAlarm/3600.0), + ($totalFalseAlarm*100/($totalNonSpeechEst+0.000001))); +printf STDERR ("\t$numShortSegs segments < 0.2 sec and $numLongSegs segments > 60.0 sec\n"); +printf STDERR ("------------------------------------------------------------------------\n"); diff --git a/egs/babel/s5d/local/resegment/generate_segments.sh b/egs/babel/s5d/local/resegment/generate_segments.sh new file mode 100755 index 00000000000..95e88deb87d --- /dev/null +++ b/egs/babel/s5d/local/resegment/generate_segments.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0 + +set -o pipefail +set -e + +nj=8 +cmd=run.pl +stage=0 +segmentation_opts="--isolated-resegmentation --min-inter-utt-silence-length 1.0 --silence-proportion 0.05" +decoder_extra_opts="" +reference_rttm= +get_text=false # Get text corresponding to new segments in ${output_dir} + # Assuming text is in $data/$type directory. + # Does not work very well because the data does not get aligned to many training transcriptions. +noise_oov=false # Treat as noise instead of speech +beam=7.0 +max_active=1000 + +#debugging stuff +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +set -u + +if [ $# -ne 5 ]; then + echo "Usage: $0 [options] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --nj # Number of parallel jobs. " + echo " For the standard data directories of dev10h, dev2h and eval" + echo " this is taken from the lang.conf file" + echo " --segmentation-opts '--opt1 opt1val --opt2 opt2val' # options for segmentation.py" + echo " --reference-rttm # Reference RTTM file that will be used for analysis of the segmentation" + echo " --get-text (true|false) # Convert text from base data directory to correspond to the new segments" + echo + echo "e.g.:" + echo "$0 data/dev10h data/lang exp/tri4b_seg exp/tri4b_resegment_dev10h" + exit 1 +fi + +datadir=$1 # The base data directory that contains at least the files wav.scp and reco2file_and_channel +lang=$2 +model_dir=$3 # Segmentation model directory created using local/resegment/run_segmentation_train.sh +temp_dir=$4 # Temporary directory to store some intermediate files during segmentation +output_dir=$5 # The target directory + +############################################################################### +# +# Phone Decoder +# +############################################################################### + +mkdir -p $temp_dir +dirid=`basename $datadir` +total_time=0 +t1=$(date +%s) + +if [ $stage -le 0 ] ; then + steps/decode_nolats.sh ${decode_extra_opts+} --write-words false --write-alignments true \ + --cmd "$cmd" --nj $nj --beam $beam --max-active $max_active \ + $model_dir/phone_graph $datadir $model_dir/decode_${dirid} || exit 1 +fi + +if [ $stage -le 1 ]; then + [ ! -f $model_dir/decode_${dirid}/ali.1.gz ] && echo "File $model_dir/decode_${dirid}/ali.1.gz does not exist!" && exit 1 + $cmd JOB=1:$nj $model_dir/decode_${dirid}/log/predict.JOB.log \ + gunzip -c $model_dir/decode_${dirid}/ali.JOB.gz \| \ + ali-to-phones --per-frame=true $model_dir/final.mdl ark:- ark,t:- \| \ + utils/int2sym.pl -f 2- $lang/phones.txt \| \ + gzip -c '>' $temp_dir/pred.JOB.gz || exit 1 + + mkdir -p $temp_dir/pred + gunzip -c $temp_dir/pred.*.gz | \ + perl -ne '($file, $phones)=split / /, $_, 2; + open($fh, ">'$temp_dir/pred/'$file.pred" ) or die $!; + print {$fh} "$file $phones"; + close($fh);' || exit 1 + +fi +t2=$(date +%s) +total_time=$((total_time + t2 - t1)) +echo "SI decoding done in $((t2-t1)) seconds" + + +############################################################################### +# +# Resegmenter +# +############################################################################### + +if ! [ `cat $lang/phones/optional_silence.txt | wc -w` -eq 1 ]; then + echo "Error: this script only works if $lang/phones/optional_silence.txt contains exactly one entry."; + echo "You'd have to modify the script to handle other cases." + exit 1; +fi + +silphone=`cat $lang/phones/optional_silence.txt` +# silphone will typically be "sil" or "SIL". + +# 3 sets of phones: 0 is silence, 1 is noise, 2 is speech., +( +echo "$silphone 0" +if ! $noise_oov; then + grep -v -w $silphone $lang/phones/silence.txt \ + | awk '{print $1, 1;}' \ + | sed 's/SIL\(.*\)1/SIL\10/' \ + | sed 's/\(.*\)1/\12/' +else + grep -v -w $silphone $lang/phones/silence.txt \ + | awk '{print $1, 1;}' \ + | sed 's/SIL\(.*\)1/SIL\10/' +fi +cat $lang/phones/nonsilence.txt | awk '{print $1, 2;}' | sed 's/\(<.*>.*\)2/\11/' | sed 's/\(.*\)1/\12/' +) > $temp_dir/phone_map.txt + +mkdir -p $output_dir +mkdir -p $temp_dir/log + +local/resegment/segmentation.py --verbose 2 $segmentation_opts \ + $temp_dir/pred $temp_dir/phone_map.txt 2>$temp_dir/log/resegment.log | \ + sort > $output_dir/segments || exit 1 + +if [ ! -s $output_dir/segments ] ; then + echo "Zero segments created during segmentation process." + echo "That means something failed. Try the cause and re-run!" + exit 1 +fi + +t2=$(date +%s) +total_time=$((total_time + t2 - t1)) +echo "Resegment data done in $((t2-t1)) seconds" + +for file in reco2file_and_channel wav.scp ; do + [ ! -f $datadir/$file ] && echo "Expected file $datadir/$file to exist" && exit 1 + cp $datadir/$file $output_dir/$file +done + +# We'll make the speaker-ids be the same as the recording-ids (e.g. conversation +# sides). This will normally be OK for telephone data. +cat $output_dir/segments | awk '{print $1, $2}' > $output_dir/utt2spk || exit 1 +utils/utt2spk_to_spk2utt.pl ${output_dir}/utt2spk > $output_dir/spk2utt || exit 1 + + +dur_hours=`cat ${output_dir}/segments | awk '{num_secs += $4 - $3;} END{print (num_secs/3600);}'` +echo "Extracted segments of total length of $dur_hours hours audio" + +echo --------------------------------------------------------------------- +echo "Resegment data Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/local/resegment/segmentation.py b/egs/babel/s5d/local/resegment/segmentation.py new file mode 100755 index 00000000000..7c5c8665a16 --- /dev/null +++ b/egs/babel/s5d/local/resegment/segmentation.py @@ -0,0 +1,1508 @@ +#! /usr/bin/env python + +# Copyright 2014 Vimal Manohar +# Apache 2.0 + +import os, glob, argparse, sys, re, time +from argparse import ArgumentParser + +use_numpy = True +try: + import numpy as np +except ImportError: + use_numpy = False + +# Global stats for analysis taking RTTM file as reference +global_analysis_get_initial_segments = None +global_analysis_set_nonspeech_proportion = None +global_analysis_final = None + +def mean(l): + if len(l) > 0: + return float(sum(l)) / len(l) + return 0 + +# Analysis class +# Stores statistics like the confusion matrix, length of the segments etc. +class Analysis: + def __init__(self, file_id, frame_shift, prefix): + self.confusion_matrix = [0] * 9 + self.type_counts = [ [[] for j in range(0,9)] for i in range(0,3) ] + self.state_count = [ [] for i in range(0,9) ] + self.markers = [ [] for i in range(0,9) ] + self.phones = [ [] for i in range(0,9) ] + self.min_length = [0] * 9 + self.max_length = [0] * 9 + self.mean_length = [0] * 9 + self.percentile25 = [0] * 9 + self.percentile50 = [0] * 9 + self.percentile75 = [0] * 9 + self.file_id = file_id + self.frame_shift = frame_shift + self.prefix = prefix + + # Add the statistics of this object to another object a + # Typically used in a global object to accumulate stats + # from local objects + def add(self, a): + for i in range(0,9): + self.confusion_matrix[i] += a.confusion_matrix[i] + self.state_count[i] += a.state_count[i] + + # Print the confusion matrix + # The interpretation of 'speech', 'noise' and 'silence' are bound to change + # through the different post-processing stages. e.g at the end, speech and silence + # correspond respectively to 'in segment' and 'out of segment' + def write_confusion_matrix(self, write_hours = False, file_handle = sys.stderr): + sys.stderr.write("Total counts: \n") + + name = ['Silence as silence', \ + 'Silence as noise', \ + 'Silence as speech', \ + 'Noise as silence', \ + 'Noise as noise', \ + 'Noise as speech', \ + 'Speech as silence', \ + 'Speech as noise', \ + 'Speech as speech'] + + for j in range(0,9): + if self.frame_shift != None: + # The conventional usage is for frame_shift to have a value. + # But this function can handle other counts like the number of frames. + # This function is called to print in counts instead of seconds in + # functions like merge_segments + if write_hours: + # Write stats in hours instead of seconds + sys.stderr.write("File %s: %s : %s : %8.3f hrs\n" % + (self.file_id, self.prefix, name[j], + self.confusion_matrix[j] * self.frame_shift / 3600.0)) + else: + sys.stderr.write("File %s: %s : %s : %8.3f seconds\n" % + (self.file_id, self.prefix, name[j], + self.confusion_matrix[j] * self.frame_shift)) + # End if write_hours + else: + sys.stderr.write("File %s: %s : Confusion: Type %d : %8.3f counts\n" % + (self.file_id, self.prefix, j, self.confusion_matrix[j])) + # End if + # End for loop over 9 cells of confusion matrix + + # Print the total stats that are just row and column sums of + # 3x3 confusion matrix + def write_total_stats(self, write_hours = True, file_handle = sys.stderr): + sys.stderr.write("Total Stats: \n") + + name = ['Actual Silence', \ + 'Actual Noise', \ + 'Actual Speech'] + + for j in [0,1,2]: + if self.frame_shift != None: + # The conventional usage is for frame_shift to have a value. + # But this function can handle other counts like the number of frames. + # This function is called to print in counts instead of seconds in + # functions like merge_segments + if write_hours: + # Write stats in hours instead of seconds + sys.stderr.write("File %s: %s : %s : %8.3f hrs\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[3*j:3*j+3]) * self.frame_shift / 3600.0)) + else: + sys.stderr.write("File %s: %s : %s : %8.3f seconds\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[3*j:3*j+3]) * self.frame_shift)) + # End if write_hours + else: + sys.stderr.write("File %s: %s : %s : %8.3f counts\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[3*j:3*j+3]))) + # End if + # End for loop over 3 rows of confusion matrix + + name = ['Predicted Silence', \ + 'Predicted Noise', \ + 'Predicted Speech'] + + for j in [0,1,2]: + if self.frame_shift != None: + # The conventional usage is for frame_shift to have a value. + # But this function can handle other counts like the number of frames. + # This function is called to print in counts instead of seconds in + # functions like merge_segments + if write_hours: + # Write stats in hours instead of seconds + sys.stderr.write("File %s: %s : %s : %8.3f hrs\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[j:7+j:3]) * self.frame_shift / 3600.0)) + else: + sys.stderr.write("File %s: %s : %s : %8.3f seconds\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[j:7+j:3]) * self.frame_shift)) + # End if write_hours + else: + sys.stderr.write("File %s: %s : %s : %8.3f counts\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[j:7+j:3]))) + # End if + # End for loop over 3 columns of confusion matrix + + # Print detailed stats of lengths of each of the 3 types of frames + # in 8 kinds of segments + def write_type_stats(self, file_handle = sys.stderr): + for j in range(0,3): + # 3 types of frames. Silence, noise, speech. + # Typically, we store the number of frames of each type here. + for i in range(0,9): + # 2^3 = 8 kinds of segments like 'segment contains only silence', + # 'segment contains only noise', 'segment contains noise and speech'. + # For compatibility with the rest of the analysis code, + # the for loop is over 9 kinds. + max_length = max([0]+self.type_counts[j][i]) + min_length = min([10000]+self.type_counts[j][i]) + mean_length = mean(self.type_counts[j][i]) + if use_numpy: + try: + percentile25 = np.percentile(self.type_counts[j][i], 25) + except ValueError: + percentile25 = 0 + try: + percentile50 = np.percentile(self.type_counts[j][i], 50) + except ValueError: + percentile50 = 0 + try: + percentile75 = np.percentile(self.type_counts[j][i], 75) + except ValueError: + percentile75 = 0 + + file_handle.write("File %s: %s : TypeStats: Type %d %d: Min: %4d Max: %4d Mean: %4d percentile25: %4d percentile50: %4d percentile75: %4d\n" % (self.file_id, self.prefix, j, i, min_length, max_length, mean_length, percentile25, percentile50, percentile75)) + # End for loop over 9 different kinds of segments + # End for loop over 3 types of frames + + # Print detailed stats of each cell of the confusion matrix. + # The stats include different statistical measures like mean, max, min + # and median of the length of continuous regions of frames in + # each of the 9 cells of the confusion matrix + def write_length_stats(self, file_handle = sys.stderr): + for i in range(0,9): + self.max_length[i] = max([0]+self.state_count[i]) + self.min_length[i] = min([10000]+self.state_count[i]) + self.mean_length[i] = mean(self.state_count[i]) + if use_numpy: + try: + self.percentile25[i] = np.percentile(self.state_count[i], 25) + except ValueError: + self.percentile25[i] = 0 + try: + self.percentile50[i] = np.percentile(self.state_count[i], 50) + except ValueError: + self.percentile50[i] = 0 + try: + self.percentile75[i] = np.percentile(self.state_count[i], 75) + except ValueError: + self.percentile75[i] = 0 + + file_handle.write("File %s: %s : Length: Type %d: Min: %4d Max: %4d Mean: %4d percentile25: %4d percentile50: %4d percentile75: %4d\n" % (self.file_id, self.prefix, i, self.min_length[i], self.max_length[i], self.mean_length[i], self.percentile25[i], self.percentile50[i], self.percentile75[i])) + # End for loop over 9 cells + + # Print detailed stats of each cell of the confusion matrix. + # Similar structure to the above function. But this also prints additional + # details. Format is like this - + # Markers: Type : () () + # The hypothesized_phones can be looked at to see what phones are + # present in the hypothesis from start_frame for num_of_frames frames. + def write_markers(self, file_handle = sys.stderr): + file_handle.write("Start frames of different segments:\n") + for j in range(0,9): + if self.phones[j] == []: + file_handle.write("File %s: %s : Markers: Type %d: %s\n" % (self.file_id, self.prefix, j, str(sorted([str(self.markers[j][i])+' ('+ str(self.state_count[j][i])+ ')' for i in range(0, len(self.state_count[j]))],key=lambda x:int(x.split()[0]))))) + else: + file_handle.write("File %s: %s : Markers: Type %d: %s\n" % (self.file_id, self.prefix, j, str(sorted([str(self.markers[j][i])+' ('+ str(self.state_count[j][i])+') ( ' + str(self.phones[j][i]) + ')' for i in range(0, len(self.state_count[j]))],key=lambda x:int(x.split()[0]))))) + # End for loop over 9 cells + +# Function to read a standard IARPA Babel RTTM file +# as structure in Jan 16, 2014 +def read_rttm_file(rttm_file, temp_dir, frame_shift): + file_id = None + this_file = [] + ref_file_handle = None + reference = {} + for line in open(rttm_file).readlines(): + splits = line.strip().split() + type1 = splits[0] + if type1 == "SPEAKER": + continue + if splits[1] != file_id: + # A different file_id. Need to open a different file to write + if this_file != []: + # If this_file is empty, no reference RTTM corresponding to the file_id + # is read. This will happen at the start of the file_id. Otherwise it means a + # contiguous segment of previous file_id is processed. So write it to the file. + # corresponding to the previous file_id + try: + ref_file_handle.write(' '.join(this_file)) + # Close the previous file if any + ref_file_handle.close() + this_file = [] + except AttributeError: + # Ignore AttributeError. It is expected. + 1==1 + # End if + + file_id = splits[1] + if (file_id not in reference): + # First time seeing this file_id. Open a new file for writing. + reference[file_id] = 1 + try: + ref_file_handle = open(temp_dir+"/"+file_id+".ref", 'w') + except IOError: + sys.stderr.write("Unable to open " + temp_dir+"/"+file_id+".ref for writing\n") + sys.exit(1) + ref_file_handle.write(file_id + "\t") + else: + # This file has been seen before but not in the previous iteration. + # The file has already been closed. So open it for append. + try: + this_file = open(temp_dir+"/"+file_id+".ref").readline().strip().split()[1:] + ref_file_handle = open(temp_dir+"/"+file_id+".ref", 'a') + except IOError: + sys.stderr.write("Unable to open " + temp_dir+"/"+file_id+".ref for appending\n") + sys.exit(1) + # End if + # End if + + i = len(this_file) + category = splits[6] + word = splits[5] + start_time = int(float(splits[3])/frame_shift + 0.5) + duration = int(float(splits[4])/frame_shift + 0.5) + if i < start_time: + this_file.extend(["0"]*(start_time - i)) + if type1 == "NON-LEX": + if category == "other": + # is taken as Silence + this_file.extend(["0"]*duration) + else: + this_file.extend(["1"]*duration) + if type1 == "LEXEME": + this_file.extend(["2"]*duration) + if type1 == "NON-SPEECH": + this_file.extend(["1"]*duration) + + ref_file_handle.write(' '.join(this_file)) + ref_file_handle.close() + +# Stats class to store some basic stats about the number of +# times the post-processor goes through particular loops or blocks +# of code in the algorithm. This is just for debugging. +class Stats: + def __init__(self): + self.inter_utt_nonspeech = 0 + self.merge_nonspeech_segment = 0 + self.merge_segments = 0 + self.split_segments = 0 + self.silence_only = 0 + self.noise_only = 0 + + def print_stats(self): + sys.stderr.write("Inter-utt nonspeech: %d\n" % self.inter_utt_nonspeech) + sys.stderr.write("Merge nonspeech segment: %d\n" % self.merge_nonspeech_segment) + sys.stderr.write("Merge segment: %d\n" % self.merge_segments) + sys.stderr.write("Split segments: %d\n" % self.split_segments) + sys.stderr.write("Noise only: %d\n" % self.noise_only) + sys.stderr.write("Silence only: %d\n" % self.silence_only) + + def reset(self): + self.inter_utt_nonspeech = 0 + self.merge_nonspeech_segment = 0 + self.merge_segments = 0 + self.split_segments = 0 + self.silence_only = 0 + self.noise_only = 0 + +# Timer class to time functions +class Timer: + def __enter__(self): + self.start = time.clock() + return self + def __exit__(self, *args): + self.end = time.clock() + self.interval = self.end - self.start + +# The main class for post-processing a file. +# This does the segmentation either looking at the file isolated +# or by looking at both classes simultaneously +class JointResegmenter: + def __init__(self, P, A, f, options, phone_map, stats = None, reference = None): + + # Pointers to prediction arrays and Initialization + self.P = P # Predicted phones + self.B = [ i for i in A ] # Original predicted classes + self.A = A # Predicted classes + self.file_id = f # File name + self.N = len(A) # Length of the prediction (= Num of frames in the audio file) + self.S = [False] * self.N # Array of Start boundary markers + self.E = [False] * (self.N+1) # Array of End boundary markers + + self.phone_map = phone_map + self.options = options + + # Configuration + + self.frame_shift = options.frame_shift + # Convert length in seconds to frames + self.max_frames = int(options.max_segment_length / options.frame_shift) + self.hard_max_frames = int(options.hard_max_segment_length / options.frame_shift) + self.min_inter_utt_nonspeech_length = int(options.min_inter_utt_silence_length / options.frame_shift) + if ( options.remove_noise_only_segments == "false" ): + self.remove_noise_segments = False + elif ( options.remove_noise_only_segments == "true" ): + self.remove_noise_segments = True + + # End of Configuration + + # Define Frame Type Constants + self.THIS_SILENCE = ("0","1","2") + self.THIS_NOISE = ("3","4","5") + self.THIS_SPEECH = ("6", "7", "8") + self.THIS_SPEECH_THAT_SIL = ("6",) + self.THIS_SPEECH_THAT_NOISE = ("7",) + self.THIS_SIL_CONVERT_THAT_SIL = ("9",) + self.THIS_SIL_CONVERT_THAT_NOISE = ("10",) + self.THIS_SIL_CONVERT = ("9","10","11") + self.THIS_SILENCE_CONVERT = ("9","10","11") + self.THIS_NOISE_CONVERT_THAT_SIL = ("12",) + self.THIS_NOISE_CONVERT_THAT_NOISE = ("13",) + self.THIS_NOISE_CONVERT = ("12","13","14") + self.THIS_NOISE_OR_SILENCE = self.THIS_NOISE + self.THIS_SILENCE + self.THIS_SILENCE_OR_NOISE = self.THIS_NOISE + self.THIS_SILENCE + self.THIS_CONVERT = self.THIS_SILENCE_CONVERT + self.THIS_NOISE_CONVERT + self.THIS_SILENCE_PLUS = self.THIS_SILENCE + self.THIS_SILENCE_CONVERT + self.THIS_NOISE_PLUS = self.THIS_NOISE + self.THIS_NOISE_CONVERT + self.THIS_SPEECH_PLUS = self.THIS_SPEECH + self.THIS_CONVERT + + if stats != None: + self.stats = stats + + self.reference = None + if reference != None: + if len(reference) < self.N: + self.reference = reference + ["0"] * (self.N - len(reference)) + assert (len(self.reference) == self.N) + else: + self.reference = reference + + # This function restricts the output to length N + def restrict(self, N): + self.B = self.B[0:N] + self.A = self.A[0:N] + self.S = self.S[0:N] + self.E = self.E[0:N+1] + if sum(self.S) == sum(self.E) + 1: + self.E[N] = True + self.N = N + + # Main resegment function that calls other functions + def resegment(self): + with Timer() as t: + self.get_initial_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: get_initial_segments took %f sec\n" % (self.file_id, t.interval)) + with Timer() as t: + self.set_nonspeech_proportion() + if self.options.verbose > 1: + sys.stderr.write("For %s: set_nonspeech_proportion took %f sec\n" % (self.file_id, t.interval)) + with Timer() as t: + self.merge_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: merge took %f sec\n" % (self.file_id, t.interval)) + with Timer() as t: + self.split_long_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: split took %f sec\n" % (self.file_id, t.interval)) + if self.remove_noise_segments: + with Timer() as t: + self.remove_noise_only_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: remove took %f sec\n" % (self.file_id, t.interval)) + elif self.min_inter_utt_nonspeech_length > 0.0: + # This is the typical one with augmented training setup + self.remove_silence_only_segments() + + if self.options.verbose > 1: + sys.stderr.write("For file %s\n" % self.file_id) + self.stats.print_stats() + sys.stderr.write("\n") + self.stats.reset() + + def get_initial_segments(self): + for i in range(0, self.N): + if (i > 0) and self.A[i-1] != self.A[i]: + # This frame is different from the previous frame. + if self.A[i] in self.THIS_SPEECH: + # This frame is speech. + if self.A[i-1] in self.THIS_SPEECH: + # Both this and the previous frames are speech + # But they are different. e.g. "8 7" + # So this is the end of the previous region and + # the beginning of the next region + self.S[i] = True + self.E[i] = True + else: + # The previous frame is non-speech, but not this one. + # So this frame is the beginning of a new segment + self.S[i] = True + else: + # This frame is non-speech + if self.A[i-1] in self.THIS_SPEECH: + # Previous frame is speech, but this one is not. + # So this frame is the end of the previous segment + self.E[i] = True + elif i == 0 and self.A[i] in self.THIS_SPEECH: + # The frame is speech. So this is the start of a new segment. + self.S[i] = True + if self.A[self.N-1] in self.THIS_SPEECH: + # Handle the special case where the last frame of file is not nonspeech + self.E[self.N] = True + assert(sum(self.S) == sum(self.E)) + + ########################################################################### + # Analysis section + self.C = ["0"] * self.N + C = self.C + a = Analysis(self.file_id, self.frame_shift,"Analysis after get_initial_segments") + + if self.reference != None: + count = 0 + for i in range(0,self.N): + if self.reference[i] == "0" and self.A[i] in self.THIS_SILENCE: + C[i] = "0" + elif self.reference[i] == "0" and self.A[i] in self.THIS_NOISE: + C[i] = "1" + elif self.reference[i] == "0" and self.A[i] in self.THIS_SPEECH: + C[i] = "2" + elif self.reference[i] == "1" and self.A[i] in self.THIS_SILENCE: + C[i] = "3" + elif self.reference[i] == "1" and self.A[i] in self.THIS_NOISE: + C[i] = "4" + elif self.reference[i] == "1" and self.A[i] in self.THIS_SPEECH: + C[i] = "5" + elif self.reference[i] == "2" and self.A[i] in self.THIS_SILENCE: + C[i] = "6" + elif self.reference[i] == "2" and self.A[i] in self.THIS_NOISE: + C[i] = "7" + elif self.reference[i] == "2" and self.A[i] in self.THIS_SPEECH: + C[i] = "8" + if i > 0 and C[i-1] != C[i]: + a.state_count[int(C[i-1])].append(count) + a.markers[int(C[i-1])].append(i - count) + a.phones[int(C[i-1])].append(' '.join(set(self.P[i-count:i]))) + count = 1 + else: + count += 1 + + for j in range(0,9): + a.confusion_matrix[j] = sum([C[i] == str(j) for i in range(0,self.N)]) + + global_analysis_get_initial_segments.add(a) + + if self.reference != None and self.options.verbose > 0: + a.write_confusion_matrix() + a.write_length_stats() + if self.reference != None and self.options.verbose > 1: + a.write_markers() + ########################################################################### + + def set_nonspeech_proportion(self): + num_speech_frames = 0 + in_segment = False + + # Active frames are the frames that are either segment starts + # or segment ends + active_frames = [] + for n in range(0, self.N + 1): + if self.E[n]: + assert(in_segment) + in_segment = False + active_frames.append(n) + if n < self.N and self.S[n]: + assert(not in_segment) + in_segment = True + active_frames.append(n) + if n < self.N: + if in_segment: + # Count the number of speech frames + num_speech_frames += 1 + assert (not in_segment) + if num_speech_frames == 0: + sys.stderr.write("%s: Warning: no speech found for recording %s\n" % (sys.argv[0], self.file_id)) + + # Set the number of non-speech frames to be added depending on the + # silence proportion. The target number of frames in the segments + # is computed as below: + target_segment_frames = int(num_speech_frames / (1.0 - self.options.silence_proportion)) + + # The number of frames currently in the segments + num_segment_frames = num_speech_frames + + count = 0 + while num_segment_frames < target_segment_frames: + count += 1 + changed = False + for i in range(0, len(active_frames)): + # At each active frame, try include a nonspeech frame into + # segment. Thus padding the speech segments with some + # non-speech frames. These converted non-speech frames are + # labelled 9...14 depending on whether they were originally + # 0...5 respectively + n = active_frames[i] + if self.E[n] and n < self.N and not self.S[n]: + # This must be the beginning of a non-speech region. + # Include some of this non-speech in the segments + assert (self.A[n] not in self.THIS_SPEECH) + + # Convert the non-speech frame to be included in segment + self.A[n] = str(int(self.B[n]) + 9) + if self.B[n-1] != self.B[n]: + # In this frame there is a transition from + # one type of non-speech (0, 1 ... 5) to another + # So its the start of a segment. Also add it to the + # end of the active frames list + self.S[n] = True + active_frames.append(n+1) + else: + # We need to extend the segment end since we have + # included a non-speeech frame. Remove the current segment end mark + # and one to the next frame + self.E[n] = False + active_frames[i] = n + 1 + self.E[n+1] = True + # Increment the number of frames in the segments + num_segment_frames += 1 + changed = True + if n < self.N and self.S[n] and n > 0 and not self.E[n]: + # This must be the beginning of a speech region. + # Include some non-speech before it into the segments + assert (self.A[n-1] not in self.THIS_SPEECH) + self.A[n-1] = str(int(self.B[n-1]) + 9) + if self.B[n-1] != self.B[n]: + self.E[n] = True + active_frames.append(n-1) + else: + self.S[n] = False + active_frames[i] = n - 1 + self.S[n-1] = True + num_segment_frames += 1 + changed = True + if num_segment_frames >= target_segment_frames: + break + if not changed: # avoid an infinite loop. if no changes, then break. + break + if num_segment_frames < target_segment_frames: + proportion = float(num_segment_frames - num_speech_frames) / num_segment_frames + sys.stderr.write("%s: Warning: for recording %s, only got a proportion %f of non-speech frames, versus target %f\n" % (sys.argv[0], self.file_id, proportion, self.options.silence_proportion)) + + ########################################################################### + # Analysis section + self.C = ["0"] * self.N + C = self.C + a = Analysis(self.file_id, self.frame_shift,"Analysis after set_nonspeech_proportion") + + if self.reference != None: + count = 0 + for i in range(0,self.N): + if self.reference[i] == "0" and self.A[i] in (self.THIS_SILENCE + self.THIS_NOISE): + C[i] = "0" + elif self.reference[i] == "0" and self.A[i] in self.THIS_CONVERT: + C[i] = "1" + elif self.reference[i] == "0" and self.A[i] in self.THIS_SPEECH: + C[i] = "2" + elif self.reference[i] == "1" and self.A[i] in (self.THIS_SILENCE + self.THIS_NOISE): + C[i] = "3" + elif self.reference[i] == "1" and self.A[i] in self.THIS_CONVERT: + C[i] = "4" + elif self.reference[i] == "1" and self.A[i] in self.THIS_SPEECH: + C[i] = "5" + elif self.reference[i] == "2" and self.A[i] in (self.THIS_SILENCE + self.THIS_NOISE): + C[i] = "6" + elif self.reference[i] == "2" and self.A[i] in self.THIS_CONVERT: + C[i] = "7" + elif self.reference[i] == "2" and self.A[i] in self.THIS_SPEECH: + C[i] = "8" + if i > 0 and C[i-1] != C[i]: + a.state_count[int(C[i-1])].append(count) + a.markers[int(C[i-1])].append(i - count) + a.phones[int(C[i-1])].append(' '.join(set(self.P[i-count:i]))) + count = 1 + else: + count += 1 + + for j in range(0,9): + a.confusion_matrix[j] = sum([C[i] == str(j) for i in range(0,self.N)]) + + global_analysis_set_nonspeech_proportion.add(a) + + if self.reference != None and self.options.verbose > 0: + a.write_confusion_matrix() + a.write_length_stats() + if self.reference != None and self.options.verbose > 1: + a.write_markers() + ########################################################################### + + def merge_segments(self): + # Get list of frames which have segment start and segment end + # markers into separate lists + segment_starts = [i for i, val in enumerate(self.S) if val] + segment_ends = [i for i, val in enumerate(self.E) if val] + assert (sum(self.S) == sum(self.E)) + + if self.options.verbose > 3: + sys.stderr.write("Length of segment starts before non-speech adding: %d\n" % len(segment_starts)) + + if self.min_inter_utt_nonspeech_length > 0.0: + segment_starts = list(set([0] + segment_starts + segment_ends + [self.N])) + segment_starts.sort() + segment_starts.pop() + segment_ends= list(set([0] + segment_starts + segment_ends + [self.N])) + segment_ends.sort() + segment_ends.pop(0) + if self.options.verbose > 3: + sys.stderr.write("Length of segment starts after non-speech adding: %d\n" % len(segment_starts)) + for i in segment_starts: + self.S[i] = True + for i in segment_ends: + self.E[i] = True + + # Just a check. There must always be equal number of segment starts + # and segment ends + assert (len(segment_starts) == len(segment_ends)) + + # A boundary is a frame which is both a segment start and a segment end + # The list of boundaries is obtained in the following step along with + # a few statistics like the type of segment on either side of the boundary + # and the length of the segment on either side of it + boundaries = [] + i = 0 + j = 0 + while i < len(segment_starts) and j < len(segment_ends): + if segment_ends[j] < segment_starts[i]: + # The segment end marker is before the segment start marker. + # This means that this segment end marker corresponds to a segment + # that is before the one indicated by the segment start marker. + # So advance the segment end pointer to the next segment end to + # check if that is a 'boundary' + j += 1 + elif segment_ends[j] > segment_starts[i]: + # The segment end marker is after the segment start marker. + # This means that this segment end marker would corresponds + # to segment indicated by the segment start marker. + # So advance the segment start pointer to the next segment start to + # check if that is a 'boundary' + i += 1 + else: + assert(i < len(segment_starts) and j < len(segment_ends)) + # A boundary: + # Find the segment score as the min of lengths of the segments + # to the left and to the right. + # This segment score will be used to prioritize merging of + # the segment with its neighbor + assert ((j + 1) < len(segment_ends)) + segment_score = min(segment_starts[i] - segment_starts[i-1], \ + segment_ends[j+1] - segment_ends[j]) + # Also find the type of tranisition of the segments at the boundary. + # This is also used to prioritize the merging of the segment + boundaries.append((segment_ends[j], segment_score, \ + self.transition_type(segment_ends[j]))) + + # Sort the boundaries based on segment score + boundaries.sort(key = lambda x: x[1]) + # Then sort based on the type of transition by keeping it still + # sorted within each transition type based on segment score + boundaries.sort(key = lambda x: x[2]) + i += 1 + j += 1 + # End if + # End while loop + + # Begin merging of segments by removing the start and end mark + # at the boundary to be merged + count = 0 + for b in boundaries: + count += 1 + segment_length = 0 + + if self.min_inter_utt_nonspeech_length > 0.0 and not self.E[b[0]]: + # This will happen only if the boundary is at the end of + # a non-speech region that has already been merged or removed + # b[0] will then not be an end mark. + continue + + # Count the number of frames in the segment to the + # left of the boundary + p = b[0] - 1 + while p >= 0: + if self.S[p]: + break + p -= 1 + # End if + # End while loop + p_left = p + segment_length += b[0] - p + + # Count the number of frames in the segment to the + # right of the boundary + p = b[0] + 1 + while p <= self.N: + if self.E[p]: + break + p += 1 + assert (self.min_inter_utt_nonspeech_length == 0 or p == self.N or self.S[p] or self.A[p] in self.THIS_SILENCE_OR_NOISE) + + if self.min_inter_utt_nonspeech_length > 0 and self.A[b[0]] in self.THIS_SILENCE_OR_NOISE: + assert(b[2] == 6 or b[2] == 7) + if (p - b[0]) > self.min_inter_utt_nonspeech_length: + # This is a non-speech segment that is longer than the minimum + # inter-utterance non-speech length. + # Therefore treat this non-speech as inter-utterance non-speech and + # remove it from the segments + self.S[b[0]] = False + self.E[p] = False + + # Count the number of times inter utt non-speech + # length is greater than the set threshold + # This is the number of times the silence is + # not merged with adjacent speech + self.stats.inter_utt_nonspeech += 1 + + # This is boundary is no longer valid. + # So we can continue to the next boundary + continue + # End if + + # This non-speech segment is less than the minimum inter-utterance + # non-speech length. It is possible to merge this segment + # with the adjacent ones as long as the length of the + # segment after merging to see if its within limits. + p_temp = p + p += 1 + while p <= self.N: + if self.E[p]: + break + p += 1 + # End while loop + segment_length += p - b[0] + if segment_length < self.max_frames: + # Merge the non-speech segment with the segments + # on either sides + + # Count the number of times segment merge happens + self.stats.merge_nonspeech_segment += 1 + + if p_temp < self.N: + self.S[p_temp] = False + self.E[p_temp] = False + self.S[b[0]] = False + self.E[b[0]] = False + continue + else: + # The merged segment length is longer than max_frames. + # Therefore treat this non-speech as inter-utterance non-speech and + # remove it from the segments + self.S[b[0]] = False + self.E[p_temp] = False + continue + # End if + elif self.min_inter_utt_nonspeech_length > 0 and (b[2] == 8 or b[2] == 9): + assert(p_left == 0) + if b[0] - p_left > self.min_inter_utt_nonspeech_length: + self.S[p_left] = False + self.E[b[0]] = False + continue + # End if + # End if + segment_length += p - b[0] + + if segment_length < self.max_frames: + self.stats.merge_segments += 1 + self.S[b[0]] = False + self.E[b[0]] = False + # End if + # End for loop over boundaries + + assert (sum(self.S) == sum(self.E)) + + ########################################################################### + # Analysis section + + if self.reference != None and self.options.verbose > 3: + a = self.segmentation_analysis("Analysis after merge_segments") + a.write_confusion_matrix() + + if self.reference != None and self.options.verbose > 4: + a.write_type_stats() + # End if + + if self.reference != None and self.options.verbose > 4: + a.write_markers() + # End if + # End if + ########################################################################### + # End function merge_segments + + def split_long_segments(self): + assert (sum(self.S) == sum(self.E)) + for n in range(0, self.N): + if self.S[n]: + p = n + 1 + while p <= self.N: + if self.E[p]: + break + p += 1 + segment_length = p - n + if segment_length > self.hard_max_frames: + # Count the number of times long segments are split + self.stats.split_segments += 1 + + num_pieces = int((float(segment_length) / self.hard_max_frames) + 0.99999) + sys.stderr.write("%s: Warning: for recording %s, " \ + % (sys.argv[0], self.file_id) \ + + "splitting segment of length %f seconds into %d pieces " \ + % (segment_length * self.frame_shift, num_pieces) \ + + "(--hard-max-segment-length %f)\n" \ + % self.options.hard_max_segment_length) + frames_per_piece = int(segment_length / num_pieces) + for i in range(1,num_pieces): + q = n + i * frames_per_piece + self.S[q] = True + self.E[q] = True + if p - 1 > n: + n = p - 1 + assert (sum(self.S) == sum(self.E)) + # End function split_long_segments + + def remove_silence_only_segments(self): + for n in range(0, self.N): + # Run through to find a segment start + if self.S[n]: + p = n + saw_nonsilence = False + # From the segment start, go till the segment end to see + # if there is speech in it + while p <= self.N: + if self.E[p] and p != n: + break + if p < self.N and self.A[p] not in self.THIS_SILENCE: + saw_nonsilence = True + p += 1 + # End of while loop through the segment + assert (p > self.N or self.E[p]) + if not saw_nonsilence: + # Count the number of silence only segments + self.stats.silence_only += 1 + + self.S[n] = False + self.E[p] = False + # End if + if p - 1 > n: + # Go to the end of the segment since that segment is + # already processed + n = p - 1 + # End if + if self.reference != None and self.options.verbose > 3: + a = self.segmentation_analysis("Analysis after remove_silence_only_segments") + a.write_confusion_matrix() + + if self.reference != None and self.options.verbose > 4: + a.write_type_stats() + # End if + + if self.reference != None and self.options.verbose > 4: + a.write_markers() + # End if + # End if + # End function remove_silence_only_segments + + def remove_noise_only_segments(self): + for n in range(0, self.N): + if self.S[n]: + p = n + saw_speech = False + while p <= self.N: + if self.E[p] and p != n: + break + if self.A[p] in self.THIS_SPEECH: + saw_speech = True + p += 1 + assert (self.E[p]) + if not saw_speech: + # Count the number of segments with no speech + self.stats.noise_only += 1 + self.S[n] = False + self.E[p] = False + # End if + if p - 1 > n: + n = p - 1 + # End if + # End if + # End for loop over frames + + ########################################################################### + # Analysis section + + if self.reference != None and self.options.verbose > 3: + a = self.segmentation_analysis("Analysis after remove_noise_only_segments") + a.write_confusion_matrix() + + if self.reference != None and self.options.verbose > 4: + a.write_type_stats() + # End if + + if self.reference != None and self.options.verbose > 4: + a.write_markers() + # End if + # End if + ########################################################################### + # End function remove_noise_only_segments + + # Return the transition type from frame j-1 to frame j + def transition_type(self, j): + assert (j > 0) + assert (self.A[j-1] != self.A[j] or self.A[j] in self.THIS_CONVERT) + if self.A[j-1] in (self.THIS_SPEECH_THAT_NOISE + self.THIS_SPEECH_THAT_SIL) and self.A[j] in (self.THIS_SPEECH_THAT_NOISE + self.THIS_SPEECH_THAT_SIL): + return 0 + if self.A[j-1] in self.THIS_SPEECH and self.A[j] in self.THIS_SPEECH: + return 1 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT_THAT_SIL + self.THIS_NOISE_CONVERT_THAT_NOISE) and self.A[j] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT_THAT_SIL + self.THIS_NOISE_CONVERT_THAT_NOISE): + return 2 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT) and self.A[j] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT): + return 3 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT + self.THIS_SIL_CONVERT_THAT_SIL + self.THIS_SIL_CONVERT_THAT_NOISE) and self.A[j] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT + self.THIS_SIL_CONVERT_THAT_SIL + self.THIS_SIL_CONVERT_THAT_NOISE): + return 4 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_CONVERT) and self.A[j] in (self.THIS_SPEECH + self.THIS_CONVERT): + return 5 + if self.A[j-1] in self.THIS_SPEECH_PLUS and self.A[j] in (self.THIS_SPEECH_PLUS + self.THIS_NOISE): + return 6 + if self.A[j-1] in self.THIS_SPEECH_PLUS and self.A[j] in (self.THIS_SPEECH_PLUS + self.THIS_SILENCE): + return 7 + if self.A[j-1] in (self.THIS_SPEECH_PLUS + self.THIS_NOISE) and self.A[j] in self.THIS_SPEECH_PLUS: + return 8 + if self.A[j-1] in (self.THIS_SPEECH_PLUS + self.THIS_SILENCE) and self.A[j] in self.THIS_SPEECH_PLUS: + return 9 + assert (False) + + # Output the final segments + def print_segments(self, out_file_handle = sys.stdout): + # We also do some sanity checking here. + segments = [] + + assert (self.N == len(self.S)) + assert (self.N + 1 == len(self.E)) + + max_end_time = 0 + n = 0 + while n < self.N: + if self.E[n] and not self.S[n]: + sys.stderr.write("%s: Error: Ending segment before starting it: n=%d\n" % (sys.argv[0], n)) + if self.S[n]: + p = n + 1 + while p < self.N and not self.E[p]: + assert (not self.S[p]) + p += 1 + assert (p == self.N or self.E[p]) + segments.append((n,p)) + max_end_time = p + if p < self.N and self.S[p]: + n = p - 1 + else: + n = p + n += 1 + + if len(segments) == 0: + sys.stderr.write("%s: Warning: no segments for recording %s\n" % (sys.argv[0], self.file_id)) + sys.exit(1) + + ############################################################################ + # Analysis section + + self.C = ["0"] * self.N + C = self.C + a = Analysis(self.file_id, self.frame_shift,"Analysis final") + + if self.reference != None: + count = 0 + in_seg = False + for i in range(0,self.N): + if in_seg and self.E[i]: + in_seg = False + if i == 0 and self.S[i]: + in_seg = True + if not in_seg and self.S[i]: + in_seg = True + if self.reference[i] == "0" and not in_seg: + C[i] = "0" + elif self.reference[i] == "0" and in_seg: + C[i] = "2" + elif self.reference[i] == "1" and not in_seg: + C[i] = "3" + elif self.reference[i] == "1" and in_seg: + C[i] = "5" + elif self.reference[i] == "2" and not in_seg: + C[i] = "6" + elif self.reference[i] == "2" and in_seg: + C[i] = "8" + if i > 0 and C[i-1] != C[i]: + a.state_count[int(C[i-1])].append(count) + a.markers[int(C[i-1])].append(i - count) + a.phones[int(C[i-1])].append(' '.join(set(self.P[i-count:i]))) + count = 1 + else: + count += 1 + + for j in range(0,9): + a.confusion_matrix[j] = sum([C[i] == str(j) for i in range(0,self.N)]) + + if self.options.verbose > 0: + a.write_confusion_matrix() + a.write_length_stats() + if self.options.verbose > 1: + a.write_markers() + + global_analysis_final.add(a) + ############################################################################ + + # we'll be printing the times out in hundredths of a second (regardless of the + # value of $frame_shift), and first need to know how many digits we need (we'll be + # printing with "%05d" or similar, for zero-padding. + max_end_time_hundredths_second = int(100.0 * self.frame_shift * max_end_time) + num_digits = 1 + i = 1 + while i < max_end_time_hundredths_second: + i *= 10 + num_digits += 1 + format_str = r"%0" + "%d" % num_digits + "d" # e.g. "%05d" + + for start, end in segments: + assert (end > start) + start_seconds = "%.2f" % (self.frame_shift * start) + end_seconds = "%.2f" % (self.frame_shift * end) + start_str = format_str % (start * self.frame_shift * 100.0) + end_str = format_str % (end * self.frame_shift * 100.0) + utterance_id = "%s%s%s%s%s" % (self.file_id, self.options.first_separator, start_str, self.options.second_separator, end_str) + # Output: + out_file_handle.write("%s %s %s %s\n" % (utterance_id, self.file_id, start_seconds, end_seconds)) + + # Some intermediate stage analysis of the segmentation + def segmentation_analysis(self, title = "Analysis"): + # In this analysis, we are trying to find in each segment, + # the number of frames that are speech, noise and silence + # in the reference RTTM + + # First get the segment start and segment ends + # Note that they are in sync by construction + segment_starts = [i for i in range(0,self.N) if self.S[i]] + segment_ends = [i for i in range(0,self.N+1) if self.E[i]] + + D = {} + for i,st in enumerate(segment_starts): + en = segment_ends[i] + types = {} + for val in self.reference[st:en]: + # The segment is defined by the indices st:en + # Count the number of frames in the segment that + # are silence, speech and noise in the reference. + types[val] = types.get(val,0) + 1 + # End for loop over a particular segment + # Make a tuple out of the counts of the types of frames + D[st] = (en, types.get("0",0), types.get("1", 0), types.get("2", 0)) + # End for loop over all segments + + a = Analysis(self.file_id, None, title) + for st, info in D.items(): + en = info[0] + + if info[1] > 0 and info[2] == 0 and info[3] == 0: + # All frames silence + a.confusion_matrix[0] += 1 + a.state_count[0].append((en-st,)+info[1:]) + a.type_counts[0][0].append(info[1]) + a.type_counts[1][0].append(info[2]) + a.type_counts[2][0].append(info[3]) + a.markers[0].append(st) + elif info[1] == 0 and info[2] > 0 and info[3] == 0: + # All frames noise + a.confusion_matrix[1] += 1 + a.state_count[1].append((en-st,)+info[1:]) + a.type_counts[0][1].append(info[1]) + a.type_counts[1][1].append(info[2]) + a.type_counts[2][1].append(info[3]) + a.markers[1].append(st) + elif info[1] == 0 and info[2] == 0 and info[3] > 0: + # All frames speech + a.confusion_matrix[2] += 1 + a.state_count[2].append((en-st,)+info[1:]) + a.type_counts[0][2].append(info[1]) + a.type_counts[1][2].append(info[2]) + a.type_counts[2][2].append(info[3]) + a.markers[2].append(st) + elif info[1] > 0 and info[2] > 0 and info[3] == 0: + # Segment contains both silence and noise + a.confusion_matrix[3] += 1 + a.state_count[3].append((en-st,)+info[1:]) + a.type_counts[0][3].append(info[1]) + a.type_counts[1][3].append(info[2]) + a.type_counts[2][3].append(info[3]) + a.markers[3].append(st) + elif info[1] > 0 and info[2] == 0 and info[3] > 0: + # Segment contains both silence and speech + a.confusion_matrix[4] += 1 + a.type_counts[0][4].append(info[1]) + a.type_counts[1][4].append(info[2]) + a.type_counts[2][4].append(info[3]) + a.state_count[4].append((en-st,)+info[1:]) + a.markers[4].append(st) + elif info[1] == 0 and info[2] > 0 and info[3] > 0: + # Segment contains both noise and speech + a.confusion_matrix[5] += 1 + a.state_count[5].append((en-st,)+info[1:]) + a.type_counts[0][5].append(info[1]) + a.type_counts[1][5].append(info[2]) + a.type_counts[2][5].append(info[3]) + a.markers[5].append(st) + elif info[1] > 0 and info[2] > 0 and info[3] > 0: + # Segment contains silence, noise and speech + a.confusion_matrix[6] += 1 + a.state_count[6].append((en-st,)+info[1:]) + a.type_counts[0][6].append(info[1]) + a.type_counts[1][6].append(info[2]) + a.type_counts[2][6].append(info[3]) + a.markers[6].append(st) + else: + # Should never be here + assert (False) + # End if + # End for loop over all stats + return a + # End function segmentation_analysis + +def map_prediction(A1, A2, phone_map, speech_cap = None, f = None): + if A2 == None: + B = [] + # Isolated segmentation + prev_x = None + len_x = 0 + i = 0 + for x in A1: + if prev_x == None or x == prev_x: + len_x += 1 + else: + assert (len_x > 0) + #sys.stderr.write("PHONE_LENGTH %s %d %s %d\n" % (prev_x, len_x, f, i - len_x)) + if phone_map[prev_x] == "0": + B.extend(["0"] * len_x) + elif (speech_cap != None and len_x > speech_cap) or phone_map[prev_x] == "1": + B.extend(["4"] * len_x) + elif phone_map[prev_x] == "2": + B.extend(["8"] * len_x) + # End if + len_x = 1 + # End if + prev_x = x + i += 1 + # End for + try: + assert (len_x > 0) + except AssertionError as e: + repr(e) + sys.stderr.write("In file %s\n" % f) + sys.exit(1) + + if phone_map[prev_x] == "0": + B.extend(["0"] * len_x) + elif (speech_cap != None and len_x > speech_cap) or phone_map[prev_x] == "1": + B.extend(["4"] * len_x) + elif phone_map[prev_x] == "2": + B.extend(["8"] * len_x) + # End if + return B + # End if (isolated segmentation) + + # Assuming len(A1) > len(A2) + # Otherwise A1 and A2 must be interchanged before + # passing to this function + B1 = [] + B2 = [] + for i in range(0, len(A2)): + if phone_map[A1[i]] == "0" and phone_map[A2[i]] == "0": + B1.append("0") + B2.append("0") + if phone_map[A1[i]] == "0" and phone_map[A2[i]] == "1": + B1.append("1") + B2.append("3") + if phone_map[A1[i]] == "0" and phone_map[A2[i]] == "2": + B1.append("2") + B2.append("6") + if phone_map[A1[i]] == "1" and phone_map[A2[i]] == "0": + B1.append("3") + B2.append("1") + if phone_map[A1[i]] == "1" and phone_map[A2[i]] == "1": + B1.append("4") + B2.append("4") + if phone_map[A1[i]] == "1" and phone_map[A2[i]] == "2": + B1.append("5") + B2.append("7") + if phone_map[A1[i]] == "2" and phone_map[A2[i]] == "0": + B1.append("6") + B2.append("2") + if phone_map[A1[i]] == "2" and phone_map[A2[i]] == "1": + B1.append("7") + B2.append("5") + if phone_map[A1[i]] == "2" and phone_map[A2[i]] == "2": + B1.append("8") + B2.append("8") + for i in range(len(A2), len(A1)): + if phone_map[A1[i]] == "0": + B1.append("0") + B2.append("0") + if phone_map[A1[i]] == "1": + B1.append("3") + B2.append("1") + if phone_map[A1[i]] == "2": + B1.append("6") + B2.append("2") + return (B1, B2) + +def main(): + parser = ArgumentParser(description='Get segmentation arguments') + parser.add_argument('--verbose', type=int, \ + dest='verbose', default=0, \ + help='Give higher verbose for more logging (default: %(default)s)') + parser.add_argument('--silence-proportion', type=float, \ + dest='silence_proportion', default=0.05, \ + help="The amount of silence at the sides of segments is " \ + + "tuned to give this proportion of silence. (default: %(default)s)") + parser.add_argument('--frame-shift', type=float, \ + dest='frame_shift', default=0.01, \ + help="Time difference between adjacent frame (default: %(default)s)s") + parser.add_argument('--max-segment-length', type=float, \ + dest='max_segment_length', default=10.0, \ + help="Maximum segment length while we are marging segments (default: %(default)s)") + parser.add_argument('--hard-max-segment-length', type=float, \ + dest='hard_max_segment_length', default=15.0, \ + help="Hard maximum on the segment length above which the segment " \ + + "will be broken even if in the middle of speech (default: %(default)s)") + parser.add_argument('--first-separator', type=str, \ + dest='first_separator', default="-", \ + help="Separator between recording-id and start-time (default: %(default)s)") + parser.add_argument('--second-separator', type=str, \ + dest='second_separator', default="-", \ + help="Separator between start-time and end-time (default: %(default)s)") + parser.add_argument('--remove-noise-only-segments', type=str, \ + dest='remove_noise_only_segments', default="true", choices=("true", "false"), \ + help="Remove segments that have only noise. (default: %(default)s)") + parser.add_argument('--min-inter-utt-silence-length', type=float, \ + dest='min_inter_utt_silence_length', default=1.0, \ + help="Minimum silence that must exist between two separate utterances (default: %(default)s)"); + parser.add_argument('--channel1-file', type=str, \ + dest='channel1_file', default="inLine", \ + help="String that matches with the channel 1 file (default: %(default)s)") + parser.add_argument('--channel2-file', type=str, \ + dest='channel2_file', default="outLine", \ + help="String that matches with the channel 2 file (default: %(default)s)") + parser.add_argument('--isolated-resegmentation', \ + dest='isolated_resegmentation', \ + action='store_true', help="Do not do joint segmentation (default: %(default)s)") + parser.add_argument('--max-length-diff', type=float, \ + dest='max_length_diff', default=1.0, \ + help="Maximum difference in the lengths of the two channels for joint " \ + + "segmentation to be done (default: %(default)s)") + parser.add_argument('--reference-rttm', dest='reference_rttm', \ + help="RTTM file to compare and get statistics (default: %(default)s)") + parser.add_argument('--speech-cap-length', type=float, default=None, \ + help="Maximum length in seconds of a particular speech phone prediction." \ + + "\nAny length above this will be considered as noise") + parser.add_argument('prediction_dir', \ + help='Directory where the predicted phones (.pred files) are found') + parser.add_argument('phone_map', \ + help='Phone Map file that maps from phones to classes') + parser.add_argument('output_segments', nargs='?', default="-", \ + help='Output segments file') + parser.usage=':'.join(parser.format_usage().split(':')[1:]) \ + + 'e.g. : %(prog)s exp/tri4b_whole_resegment_dev10h/pred exp/tri4b_whole_resegment_dev10h/phone_map.txt data/dev10h.seg/segments' + options = parser.parse_args() + + sys.stderr.write(' '.join(sys.argv) + "\n") + if not ( options.silence_proportion \ + > 0.01 and options.silence_proportion < 0.99 ): + sys.stderr.write("%s: Error: Invalid silence-proportion value %f\n" \ + % options.silence_proportion) + sys.exit(1) + + if not ( options.remove_noise_only_segments == "false" or options.remove_noise_only_segments == "true" ): + sys.stderr.write("%s: Error: Invalid value for remove-noise-only segments %s. Must be true or false.\n" \ + % options.remove_noise_only_segments) + sys.exit(1) + + if options.output_segments == '-': + out_file = sys.stdout + else: + try: + out_file = open(options.output_segments, 'w') + except IOError as e: + sys.stderr.write("%s: %s: Unable to open file %s\n" % (sys.argv[0], e, options.output_segments)) + sys.exit(1) + # End if + + phone_map = {} + try: + for line in open(options.phone_map).readlines(): + phone, cls = line.strip().split() + phone_map[phone] = cls + except IOError as e: + repr(e) + sys.exit(1) + + prediction_dir = options.prediction_dir + channel1_file = options.channel1_file + channel2_file = options.channel2_file + + temp_dir = prediction_dir + "/../rttm_classes" + os.system("mkdir -p %s" % temp_dir) + if options.reference_rttm != None: + read_rttm_file(options.reference_rttm, temp_dir, options.frame_shift) + else: + temp_dir = None + + stats = Stats() + + pred_files = dict([ (f.split('/')[-1][0:-5], False) \ + for f in glob.glob(os.path.join(prediction_dir, "*.pred")) ]) + + global global_analysis_get_initial_segments + global_analysis_get_initial_segments = Analysis("TOTAL_Get_Initial_Segments", options.frame_shift, "Global Analysis after get_initial_segments") + + global global_analysis_set_nonspeech_proportion + global_analysis_set_nonspeech_proportion = Analysis("TOTAL_set_nonspeech_proportion", options.frame_shift, "Global Analysis after set_nonspeech_proportion") + + global global_analysis_final + global_analysis_final= Analysis("TOTAL_Final", options.frame_shift, "Global Analysis Final") + + speech_cap = None + if options.speech_cap_length != None: + speech_cap = int( options.speech_cap_length / options.frame_shift ) + # End if + + for f in pred_files: + if pred_files[f]: + continue + if re.match(".*_"+channel1_file, f) is None: + if re.match(".*_"+channel2_file, f) is None: + sys.stderr.write("%s does not match pattern .*_%s or .*_%s\n" \ + % (f,channel1_file, channel2_file)) + sys.exit(1) + else: + f1 = f + f2 = f + f1 = re.sub("(.*_)"+channel2_file, r"\1"+channel1_file, f1) + else: + f1 = f + f2 = f + f2 = re.sub("(.*_)"+channel1_file, r"\1"+channel2_file, f2) + + if options.isolated_resegmentation or f2 not in pred_files or f1 not in pred_files: + pred_files[f] = True + try: + A = open(os.path.join(prediction_dir, f+".pred")).readline().strip().split()[1:] + except IndexError: + sys.stderr.write("Incorrect format of file %s/%s.pred\n" % (prediction_dir, f)) + sys.exit(1) + + B = map_prediction(A, None, phone_map, speech_cap, f) + + if temp_dir != None: + try: + reference = open(os.path.join(temp_dir, f+".ref")).readline().strip().split()[1:] + except IOError: + reference = None + else: + reference = None + r = JointResegmenter(A, B, f, options, phone_map, stats, reference) + r.resegment() + r.print_segments(out_file) + else: + if pred_files[f1] and pred_files[f2]: + continue + pred_files[f1] = True + pred_files[f2] = True + try: + A1 = open(os.path.join(prediction_dir, f1+".pred")).readline().strip().split()[1:] + except IndexError: + sys.stderr.write("Incorrect format of file %s/%s.pred\n" % (prediction_dir, f1)) + sys.exit(1) + try: + A2 = open(os.path.join(prediction_dir, f2+".pred")).readline().strip().split()[1:] + except IndexError: + sys.stderr.write("Incorrect format of file %s/%s.pred\n" % (prediction_dir, f2)) + sys.exit(1) + + if len(A1) < len(A2): + A3 = A1 + A1 = A2 + A2 = A3 + + f3 = f1 + f1 = f2 + f2 = f3 + # End if + + if (len(A1) - len(A2)) > options.max_length_diff / options.frame_shift: + sys.stderr.write( \ + "%s: Warning: Lengths of %s and %s differ by more than %f. " \ + % (sys.argv[0], f1,f2, options.max_length_diff) \ + + "So using isolated resegmentation\n") + B1 = map_prediction(A1, None, phone_map, speech_cap) + B2 = map_prediction(A2, None, phone_map, speech_cap) + else: + B1,B2 = map_prediction(A1, A2, phone_map, speech_cap) + # End if + + if temp_dir != None: + try: + reference1 = open(os.path.join(temp_dir, f1+".ref")).readline().strip().split()[1:] + except IOError: + reference1 = None + else: + reference1 = None + r1 = JointResegmenter(A1, B1, f1, options, phone_map, stats, reference1) + r1.resegment() + r1.print_segments(out_file) + + if temp_dir != None: + try: + reference2 = open(os.path.join(temp_dir, f2+".ref")).readline().strip().split()[1:] + except IOError: + reference2= None + else: + reference2 = None + r2 = JointResegmenter(A1, B2, f2, options, phone_map, stats, reference2) + r2.resegment() + r2.restrict(len(A2)) + r2.print_segments(out_file) + # End if + # End for loop over files + + if options.reference_rttm != None: + global_analysis_get_initial_segments.write_confusion_matrix(True) + global_analysis_get_initial_segments.write_total_stats(True) + global_analysis_get_initial_segments.write_length_stats() + global_analysis_set_nonspeech_proportion.write_confusion_matrix(True) + global_analysis_set_nonspeech_proportion.write_total_stats(True) + global_analysis_set_nonspeech_proportion.write_length_stats() + global_analysis_final.write_confusion_matrix(True) + global_analysis_final.write_total_stats(True) + global_analysis_final.write_length_stats() + +if __name__ == '__main__': + with Timer() as t: + main() + sys.stderr.write("\nSegmentation done!\nTook %f sec\n" % t.interval) + diff --git a/egs/babel/s5d/local/resegment/train_segmentation.sh b/egs/babel/s5d/local/resegment/train_segmentation.sh new file mode 100755 index 00000000000..511c451993e --- /dev/null +++ b/egs/babel/s5d/local/resegment/train_segmentation.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0 + +nj=16 # nj for training subset of whole +cmd=run.pl # How to run the parallel tasks +boost_sil=1.0 +ext_alidir= # Use this alignment directory instead for getting new one + +# End of configuration + +. utils/parse_options.sh + +set -o pipefail +set -e +set -u +if [ $# -ne 4 ]; then + echo "Usage: $0 [options] " + echo " e.g.:" + echo "$0 exp/tri4 data/train data/lang exp/tri4b_seg" + echo " Options (selection. For full options, see the script itself):" + echo " --nj # Number of parallel jobs" + echo " --cmd # How to run the parallel tasks" + exit 1 +fi + +in_model_dir=$1 # Model used for alignment +train_data_dir=$2 +lang=$3 +out_model_dir=$4 + +if [ ! -d $train_data_dir ] ; then + echo "$0: Unable to find directory $train_data_dir." + echo "$0: Run run-0-fillers.sh or run-1-main.sh first to prepare data directory" + exit 1 +fi + +# Align train_whole_sub3 using tri4 models and train a LDA + MLLT model +# on it. +alidir=${in_model_dir}_train_seg_ali + +if [ ! -z $ext_alidir ] && [ -s $ext_alidir/ali.1.gz ]; then + alidir=$ext_alidir +elif [ ! -f $alidir/.done ]; then + steps/align_fmllr.sh --nj $nj --cmd "$cmd" --boost-silence $boost_sil \ + $train_data_dir $lang $in_model_dir $alidir || exit 1; + touch $alidir/.done +fi + +if [ ! -f $out_model_dir/.done ]; then + steps/train_lda_mllt.sh --cmd "$cmd" --realign-iters "" --boost-silence $boost_sil \ + 1000 10000 $train_data_dir $lang $alidir $out_model_dir || exit 1; + touch $out_model_dir/.done +fi + +if [ ! -f $out_model_dir/graph.done ]; then + # Make the phone decoding-graph. + steps/make_phone_graph.sh $lang $alidir $out_model_dir || exit 1; + utils/mkgraph.sh $lang $out_model_dir $out_model_dir/graph | \ + tee $out_model_dir/mkgraph.log || exit 1 + touch $out_model_dir/graph.done +fi diff --git a/egs/babel/s5d/local/rttm_to_text.pl b/egs/babel/s5d/local/rttm_to_text.pl new file mode 100755 index 00000000000..d33c71e2f17 --- /dev/null +++ b/egs/babel/s5d/local/rttm_to_text.pl @@ -0,0 +1,151 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use utf8; + +use Data::Dumper; + +sub float_gt { + my ($A, $B) = @_; + #print Dumper(\@_); + + if ( ($A - $B) < 1e-12 ) { + return 0; + } elsif ($A > $B ) { + return 1; + } else { + return 0; + } +} + +binmode(STDOUT, ":utf8"); +binmode(STDERR, ":utf8"); + +my $datadir=$ARGV[0]; +my $rttm_filename=$ARGV[1]; + + +my $filename=""; +my %rttm; +my @times; + + +open(rttm_f, "<:utf8", $rttm_filename) or die "Cannot open the RTTM file"; +while ( ) { + chop; + my @elems = split; + my $_filename= $elems[1]; + my $_time=$elems[3]; + my $_dur=$elems[4]; + my $_text=$elems[5]; + + #We could simply pull-out the vector of times + #from the hash, but in case the RTTM is not sorted + #there might be some other problem somewhere + #(as the RTTMs are normally sorted). So instead of being + #"smart", let's make the user notice! + if ( exists($rttm{$_filename}) ) { + die "The RTTM file is not sorted!"; + } + + if ( $filename ne $_filename ) { + if ( $filename ne "" ) { + #print $filename . "\n"; + my @tmp = @times; + $rttm{$filename} = \@tmp; + #if ($filename eq "BABEL_BP_101_10470_20111118_172644_inLine" ) { + # print "$filename\n"; + # print Dumper($rttm{$filename}); + #} + #print Dumper($rttm{"BABEL_BP_101_10470_20111118_172644_inLine"}); + } + + @times = (); + $filename = $_filename; + } + + #I don't really know what is the distinction between all + #of these. Let's throw away the SPEAKER, as it does not + #really contain information that is to be found in the transcript + #and keep the others + if ( $elems[0] eq "LEXEME") { + push @times, [$_time, $_time + $_dur, $_text]; + } elsif ( $elems[0] eq "NON-SPEECH" ) { + push @times, [$_time, $_time + $_dur, $_text]; + } elsif ( $elems[0] eq "NON-LEX" ) { + push @times, [$_time, $_time + $_dur, $_text]; + } elsif ( $elems[0] eq "SPEAKER") { + ; + } else { + #This is just a safety precaution if a new flag/type appears. + die "Unknown first element $elems[0] of line '" . join(" ", @elems) . "'\n"; + } + + #We compare the two last entries of the #times vector, if they + #are ordered properly. Again, this is just a safety recaution + #In a well-formed RTTM, this is normal. + if ( (@times > 1) && float_gt($times[-2][1], $times[-1][0]) ) { + #print Dumper(\@times); + my $A = $times[-2][0]; + my $B = $times[-1][0]; + my $Aend = $times[-2][1]; + my $Bend = $times[-1][1]; + + #print "WARNING: Elements in the RTTM file are not sorted for FILENAME $filename!\n"; + #print $times[-2][0] . " " . $times[-2][1] - $times[-2][0]. " " . $times[-2][2] . "\n"; + #print $times[-1][0] . " " . $times[-1][1] - $times[-1][0]. " " . $times[-1][2] . "\n"; + #print "\n"; + + my @sorted = sort {$a <=> $b} ($A, $B, $Aend, $Bend); + #print Dumper(\@sorted); + $times[-1][0] = $sorted[0]; + $times[-1][1] = $sorted[2]; #We omit the gap between these two words + $times[-2][0] = $sorted[2]; + $times[-2][1] = $sorted[3]; + + } +} +if ( $filename ne "" ) { + #print $filename . "\n"; + $rttm{$filename} = \@times; +} +close(rttm_f); + +open(segments_f, "<:utf8", "$datadir/segments") or die "Cannot open file $datadir/segments"; +while ( ) { + chop; + my ($segmentname, $filename, $start, $end) = split; + + if (! exists $rttm{$filename} ) { + print "Filename $filename does not exists in the RTTM file\n"; + die; + } + my @times = @{$rttm{$filename}}; + my $i; + my $j; + + + #if ($segmentname ne "10470_A_20111118_172644_000000" ) { + # next; + #} + + #print $filename . "\n"; + + #print Dumper(\@times); + $i = 0; + #print $start . " " . $times[$i][0] . " " . $times[$i][1] . "\n"; + while (($i < @times) && ( $times[$i][1] < $start ) ) { $i += 1; }; + $j = $i; + while (($j < @times) && ( $times[$j][0] < $end ) ) { $j += 1; }; + + print $segmentname . " "; + while ( $i < $j ) { + #print Dumper($times[$i]); + print $times[$i][2] . " "; + $i += 1; + } + print "\n"; + #die +} +close(segments_f); diff --git a/egs/babel/s5d/local/run_cleanup_segmentation.sh b/egs/babel/s5d/local/run_cleanup_segmentation.sh new file mode 100755 index 00000000000..324d796b1b1 --- /dev/null +++ b/egs/babel/s5d/local/run_cleanup_segmentation.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# 2016 Johns Hopkins University (author: Daniel Povey) +# Apache 2.0 + +# This script demonstrates how to re-segment training data selecting only the +# "good" audio that matches the transcripts. +# The basic idea is to decode with an existing in-domain acoustic model, and a +# biased language model built from the reference, and then work out the +# segmentation from a ctm like file. + +# For nnet3 and chain results after cleanup, see the scripts in +# local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh + +# GMM Results for speaker-independent (SI) and speaker adaptive training (SAT) systems on dev and test sets +# [will add these later]. + +set -e +set -o pipefail +set -u + +stage=0 +cleanup_stage=0 +data=data/train +cleanup_affix=cleaned +srcdir=exp/tri5 +langdir=data/langp/tri5 +nj=100 +decode_nj=16 +decode_num_threads=4 + +. ./path.sh +. ./cmd.sh +. utils/parse_options.sh + +cleaned_data=${data}_${cleanup_affix} + +dir=${srcdir}_${cleanup_affix}_work +cleaned_dir=${srcdir}_${cleanup_affix} + +if [ $stage -le 1 ]; then + # This does the actual data cleanup. + steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage --nj $nj --cmd "$train_cmd" \ + $data $langdir $srcdir $dir $cleaned_data +fi + +if [ $stage -le 2 ]; then + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + $cleaned_data $langdir $srcdir ${srcdir}_ali_${cleanup_affix} +fi + +if [ $stage -le 3 ]; then + steps/train_sat.sh --cmd "$train_cmd" \ + 5000 100000 $cleaned_data $langdir ${srcdir}_ali_${cleanup_affix} ${cleaned_dir} +fi diff --git a/egs/babel/s5d/local/run_kws_stt_task.sh b/egs/babel/s5d/local/run_kws_stt_task.sh new file mode 100755 index 00000000000..71981a5641b --- /dev/null +++ b/egs/babel/s5d/local/run_kws_stt_task.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#Simple BABEL-only script to be run on generated lattices (to produce the +#files for scoring and for NIST submission + +set -e +set -o pipefail +set -u + +#Begin options +min_lmwt=8 +max_lmwt=12 +cer=0 +skip_kws=false +skip_stt=false +skip_scoring=false +extra_kws=false +cmd=run.pl +max_states=150000 +wip=0.5 #Word insertion penalty +#End of options + +if [ $(basename $0) == score.sh ]; then + skip_kws=true +fi + +echo $0 "$@" +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo " e.g.: $0 data/dev10h data/lang exp/tri6/decode_dev10h" + exit 1; +fi + +data_dir=$1; +lang_dir=$2; +decode_dir=$3; + +##NB: The first ".done" files are used for backward compatibility only +##NB: should be removed in a near future... +if ! $skip_stt ; then + if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then + local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + $data_dir $lang_dir $decode_dir + + if ! $skip_scoring ; then + local/score_stm.sh --cmd "$cmd" --cer $cer \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ + $data_dir $lang_dir $decode_dir + fi + touch $decode_dir/.done.score + fi +fi + +if ! $skip_kws ; then + [ ! -f $data_dir/extra_kws_tasks ] && exit 0 + + for extraid in `cat $data_dir/extra_kws_tasks` ; do + if [ ! -f $decode_dir/.done.kws.$extraid ] ; then + local/kws_search.sh --cmd "$cmd" --extraid $extraid \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --skip-scoring true\ + --max-lmwt ${max_lmwt} --indices-dir $decode_dir/kws_indices \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kws.$extraid + fi + if [[ ! $extraid =~ .*oov.* ]] && [ ! -f $decode_dir/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$decode_cmd" --extraid ${extraid} \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/kws_indices --skip-scoring $skip_scoring \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kwset.$extraid + fi + + if ! $skip_scoring ; then + [ -f $decode_dir/.done.kws.${extraid}.scored ] && continue; + local/kws_search.sh --cmd "$cmd" --extraid $extraid --stage 4 \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --skip-scoring false\ + --max-lmwt ${max_lmwt} --indices-dir $decode_dir/kws_indices \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kws.${extraid}.scored + fi + done +fi diff --git a/egs/babel/s5d/local/run_kws_stt_task2.sh b/egs/babel/s5d/local/run_kws_stt_task2.sh new file mode 100755 index 00000000000..9c10bfe6da5 --- /dev/null +++ b/egs/babel/s5d/local/run_kws_stt_task2.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#Simple BABEL-only script to be run on generated lattices (to produce the +#files for scoring and for NIST submission + +set -e +set -o pipefail +set -u + +#Begin options +min_lmwt=8 +max_lmwt=12 +cer=0 +skip_kws=false +skip_stt=false +skip_scoring=false +extra_kws=false +cmd=run.pl +max_states=150000 +wip=0.5 #Word insertion penalty +#End of options + +if [ $(basename $0) == score.sh ]; then + skip_kws=true +fi + +echo $0 "$@" +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo " e.g.: $0 data/dev10h data/lang exp/tri6/decode_dev10h" + exit 1; +fi + +data_dir=$1; +lang_dir=$(echo "$2" | perl -pe 's/\/$//g') +decode_dir=$3; + +##NB: The first ".done" files are used for backward compatibility only +##NB: should be removed in a near future... +if ! $skip_stt ; then + if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then + local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + $data_dir $lang_dir $decode_dir + + if ! $skip_scoring ; then + local/score_stm.sh --cmd "$cmd" --cer $cer \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ + $data_dir $lang_dir $decode_dir + fi + touch $decode_dir/.done.score + fi +fi + +if ! $skip_kws ; then + [ ! -f $data_dir/extra_kws_tasks ] && exit 0 + + idata=$(basename $data_dir) + idir=$(dirname $data_dir) + + idataset=${idata%%.*} + idatatype=${idata#*.} + + if [ "$idata" == "$idataset" ]; then + syll_data_dir=$idir/${idataset}.syll + phn_data_dir=$idir/${idataset}.phn + else + syll_data_dir=$idir/${idataset}.syll.${idatatype} + phn_data_dir=$idir/${idataset}.phn.${idatatype} + fi + + if [ -d ${syll_data_dir} ] && [ ! -f ${decode_dir}/syllabs/.done ] ; then + local/syllab/lattice_word2syll.sh --cmd "$cmd --mem 8G" \ + $data_dir $lang_dir ${lang_dir}.syll $decode_dir ${decode_dir}/syllabs + touch ${decode_dir}/syllabs/.done + fi + + if [ -d ${phn_data_dir} ] && [ ! -f ${decode_dir}/phones/.done ] ; then + local/syllab/lattice_word2syll.sh --cmd "$cmd --mem 8G" \ + $data_dir $lang_dir ${lang_dir}.phn $decode_dir ${decode_dir}/phones + touch ${decode_dir}/phones/.done + fi + + + + for extraid in `cat $data_dir/extra_kws_tasks | grep -v oov` ; do + if [ ! -f $decode_dir/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$decode_cmd" --extraid ${extraid} \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/kws_indices --skip-scoring $skip_scoring \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kwset.$extraid + fi + + if [ -f ${decode_dir}/syllabs/kwset_${extraid}_${min_lmwt}/f4de/metrics.txt ]; then + touch $decode_dir/syllabs/.done.kwset.$extraid + fi + + if [ -f ${decode_dir}/phones/kwset_${extraid}_${min_lmwt}/f4de/metrics.txt ]; then + touch $decode_dir/phones/.done.kwset.$extraid + fi + + if [ -f ${decode_dir}/syllabs/.done ] && [ ! -f $decode_dir/syllabs/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$cmd" --extraid ${extraid} --model $decode_dir/../final.mdl\ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/syllabs/kws_indices --skip-scoring $skip_scoring \ + ${lang_dir}.syll $syll_data_dir $decode_dir/syllabs + touch $decode_dir/syllabs/.done.kwset.$extraid + fi + + + if [ -f ${decode_dir}/phones/.done ] && [ ! -f $decode_dir/phones/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$cmd" --extraid ${extraid} --model $decode_dir/../final.mdl\ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/phones/kws_indices --skip-scoring $skip_scoring \ + ${lang_dir}.phn $phn_data_dir $decode_dir/phones + touch $decode_dir/phones/.done.kwset.$extraid + fi + done +fi diff --git a/egs/babel/s5d/local/score.sh b/egs/babel/s5d/local/score.sh new file mode 120000 index 00000000000..7a34ba5b0d7 --- /dev/null +++ b/egs/babel/s5d/local/score.sh @@ -0,0 +1 @@ +run_kws_stt_task.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/score_combine.sh b/egs/babel/s5d/local/score_combine.sh new file mode 100755 index 00000000000..7e8af85b2d8 --- /dev/null +++ b/egs/babel/s5d/local/score_combine.sh @@ -0,0 +1,181 @@ +#!/bin/bash + +# Copyright 2012-2013 Arnab Ghoshal +# Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# Script for system combination using minimum Bayes risk decoding. +# This calls lattice-combine to create a union of lattices that have been +# normalized by removing the total forward cost from them. The resulting lattice +# is used as input to lattice-mbr-decode. This should not be put in steps/ or +# utils/ since the scores on the combined lattice must not be scaled. + +# begin configuration section. +cmd=run.pl +beam=4 # prune the lattices prior to MBR decoding, for speed. +stage=0 +cer=0 +decode_mbr=true +lat_weights= +word_ins_penalty=0.0 +min_lmwt=7 +max_lmwt=17 +parallel_opts="-pe smp 3" +skip_scoring=false +ctm_name= +#end configuration section. + +help_message="Usage: "$(basename $0)" [options] [:lmwt-bias] [:lmwt-bias] [[:lmwt-bias] ... ] + E.g. "$(basename $0)" data/test data/lang exp/tri1/decode exp/tri2/decode exp/tri3/decode exp/combine + or: "$(basename $0)" data/test data/lang exp/tri1/decode exp/tri2/decode:18 exp/tri3/decode:13 exp/combine +Options: + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. + --min-lmwt INT # minumum LM-weight for lattice rescoring + --max-lmwt INT # maximum LM-weight for lattice rescoring + --lat-weights STR # colon-separated string of lattice weights + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. + --stage (0|1|2) # (createCTM | filterCTM | runSclite). + --parallel-opts # extra options to command for combination stage, + # default '-pe smp 3' + --cer (0|1) # compute CER in addition to WER +"; + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + + +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + +data=$1 +lang=$2 +dir=${@: -1} # last argument to the script +shift 2; +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +#Let the user to set the CTM file name +#use the data-dir name in case the user doesn't care +if [ -z ${ctm_name} ] ; then + ctm_name=`basename $data` +fi + + +for f in $lang/words.txt $lang/phones/word_boundary.int ; do + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; +done +if ! $skip_scoring ; then + for f in $data/stm; do + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; + done +fi + + +mkdir -p $dir/log + +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. + decode_dir=`echo $decode_dir | cut -d: -f1` + [ -z "$offset" ] && offset=0 + + model=`dirname $decode_dir`/final.mdl # model one level up from decode dir + for f in $model $decode_dir/lat.1.gz ; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; + done + if [ $i -eq 0 ]; then + nj=`cat $decode_dir/num_jobs` || exit 1; + else + if [ $nj != `cat $decode_dir/num_jobs` ]; then + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + exit 1; + fi + fi + file_list="" + # I want to get the files in the correct order so we can use ",s,cs" to avoid + # memory blowup. I first tried a pattern like file.{1,2,3,4}.gz, but if the + # system default shell is not bash (e.g. dash, in debian) this will not work, + # so we enumerate all the input files. This tends to make the command lines + # very long. + for j in `seq $nj`; do file_list="$file_list $decode_dir/lat.$j.gz"; done + + lats[$i]="ark,s,cs:lattice-scale --inv-acoustic-scale=\$[$offset+LMWT] 'ark:gunzip -c $file_list|' ark:- | \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- | \ + lattice-prune --beam=$beam ark:- ark:- | \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- |" +done + +mkdir -p $dir/scoring/log + +if [ -z "$lat_weights" ]; then + lat_weights=1.0 + for i in `seq $[$num_sys-1]`; do lat_weights="$lat_weights:1.0"; done +fi + +if [ $stage -le 0 ]; then + $cmd $parallel_opts LMWT=$min_lmwt:$max_lmwt $dir/log/combine_lats.LMWT.log \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-combine --lat-weights=$lat_weights "${lats[@]}" ark:- \| \ + lattice-to-ctm-conf --decode-mbr=true ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/${ctm_name}.ctm || exit 1; +fi + + +if [ $stage -le 1 ]; then + # Remove some stuff we don't want to score, from the ctm. + for lmwt in `seq $min_lmwt $max_lmwt`; do + x=$dir/score_${lmwt}/${ctm_name}.ctm + [ ! -f $x ] && echo "File $x does not exist! Exiting... " && exit 1 + cp $x $x.bkup1; + cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ + grep -v -E '|%HESITATION|\(\(\)\)' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + perl -e '@list = (); %list = (); + while(<>) { + chomp; + @col = split(" ", $_); + push(@list, $_); + $key = "$col[0]" . " $col[1]"; + $list{$key} = 1; + } + foreach(sort keys %list) { + $key = $_; + foreach(grep(/$key/, @list)) { + print "$_\n"; + } + }' > $x; + cp $x $x.bkup2; + done +fi + +if ! $skip_scoring ; then + if [ $stage -le 2 ]; then + local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt $data $lang $dir || exit 1 + fi +fi + + +exit 0 diff --git a/egs/babel/s5d/local/score_map.sh b/egs/babel/s5d/local/score_map.sh new file mode 100755 index 00000000000..ecc528ec909 --- /dev/null +++ b/egs/babel/s5d/local/score_map.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0 + +[ -f ./path.sh ] && . ./path.sh + +# begin configuration section. +cmd=run.pl +stage=0 +decode_mbr=true +reverse=false +min_lmwt=9 +max_lmwt=20 +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + echo " --reverse (true/false) # score with time reversed features " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ + lattice-best-path --lm-scale=LMWT --word-symbol-table=$symtab \ + "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/LMWT.tra || exit 1; + +if $reverse; then + for lmwt in `seq $min_lmwt $max_lmwt`; do + mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig + awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \ + <$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra + done +fi + +# Note: the double level of quoting for the sed command +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cat $dir/scoring/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + +exit 0; diff --git a/egs/babel/s5d/local/score_mbr.sh b/egs/babel/s5d/local/score_mbr.sh new file mode 100755 index 00000000000..b2fcaf5cdf9 --- /dev/null +++ b/egs/babel/s5d/local/score_mbr.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Script for minimum bayes risk decoding. + +[ -f ./path.sh ] && . ./path.sh; + +# begin configuration section. +cmd=run.pl +min_lmwt=9 +max_lmwt=20 +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score_sclite_conf.sh [--cmd (run.pl|queue.pl...)] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score_mbr.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +# We submit the jobs separately, not as an array, because it's hard +# to get the inverse of the LM scales. +rm $dir/.error 2>/dev/null +for inv_acwt in `seq $min_lmwt $max_lmwt`; do + acwt=`perl -e "print (1.0/$inv_acwt);"` + $cmd $dir/scoring/rescore_mbr.${inv_acwt}.log \ + lattice-mbr-decode --acoustic-scale=$acwt --word-symbol-table=$symtab \ + "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/${inv_acwt}.tra \ + || touch $dir/.error & +done +wait; +[ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout."; + + +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cat $dir/scoring/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">" $dir/wer_LMWT || exit 1; + diff --git a/egs/babel/s5d/local/score_sctk_prune.sh b/egs/babel/s5d/local/score_sctk_prune.sh new file mode 100755 index 00000000000..09662af57c8 --- /dev/null +++ b/egs/babel/s5d/local/score_sctk_prune.sh @@ -0,0 +1,138 @@ +#!/bin/bash +# Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013. Apache 2.0. + +# begin configuration section. +cmd=run.pl +stage=0 +cer=0 +decode_mbr=true +beam=5 +word_ins_penalty=0 +min_lmwt=7 +max_lmwt=17 +model= +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # (createCTM | filterCTM | runSclite)." + echo " --cer (0|1) # compute CER in addition to WER" + exit 1; +fi + +data=$1 +lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. +dir=$3 + +if [ -z "$model" ] ; then + model=$dir/../final.mdl # assume model one level up from decoding dir. +fi + + +ScoringProgram=$KALDI_ROOT/tools/sctk/bin/sclite +[ ! -f $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; + +for f in $data/char.stm $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ + $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + +name=`basename $data`; # e.g. eval2000 + +mkdir -p $dir/scoring/log + +if [ $stage -le 0 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; +fi + +if [ $stage -le 1 ]; then +# Remove some stuff we don't want to score, from the ctm. + for x in $dir/score_*/$name.ctm; do + cp $x $x.bkup1; + cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ + grep -v -E '|%HESITATION|\(\(\)\)' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + perl -e '@list = (); %list = (); + while(<>) { + chomp; + @col = split(" ", $_); + push(@list, $_); + $key = "$col[0]" . " $col[1]"; + $list{$key} = 1; + } + foreach(sort keys %list) { + $key = $_; + foreach(grep(/$key/, @list)) { + print "$_\n"; + } + }' > $x; + cp $x $x.bkup2; + y=${x%.ctm}; + cat $x.bkup2 | \ + perl -e ' + use Encode; + while(<>) { + chomp; + @col = split(" ", $_); + @col == 6 || die "Bad number of columns!"; + if ($col[4] =~ m/[\x80-\xff]{2}/) { + $word = decode("UTF8", $col[4]); + @char = split(//, $word); + $start = $col[2]; + $dur = $col[3]/@char; + $start -= $dur; + foreach (@char) { + $char = encode("UTF8", $_); + $start += $dur; + # printf "$col[0] $col[1] $start $dur $char\n"; + printf "%s %s %.2f %.2f %s %s\n", $col[0], $col[1], $start, $dur, $char, $col[5]; + } + } + }' > $y.char.ctm + cp $y.char.ctm $y.char.ctm.bkup1 + done +fi + +if [ $stage -le 2 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cp $data/stm $dir/score_LMWT/ '&&' cp $data/glm $dir/score_LMWT/ '&&'\ + $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm -o all -o dtl; + + if [ $cer -eq 1 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.char.log \ + cp $data/char.stm $dir/score_LMWT/'&&'\ + $ScoringProgram -s -r $dir/score_LMWT/char.stm stm -h $dir/score_LMWT/${name}.char.ctm ctm -o all -o dtl; + fi + +# for x in $dir/score_*/*.ctm; do +# mv $x.filt $x; +# rm -f $x.filt*; +# done + +# for x in $dir/score_*/*stm; do +# mv $x.filt $x; +# rm -f $x.filt*; +# done +fi + +echo "Finished scoring on" `date` +exit 0 diff --git a/egs/babel/s5d/local/score_stm.sh b/egs/babel/s5d/local/score_stm.sh new file mode 100755 index 00000000000..56835109722 --- /dev/null +++ b/egs/babel/s5d/local/score_stm.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# This is a scoring script for the CTMS in /score_/${name}.ctm +# it tries to mimic the NIST scoring setup as much as possible (and usually does a good job) + +# begin configuration section. +cmd=run.pl +cer=0 +min_lmwt=7 +max_lmwt=17 +model= +stage=0 +ctm_name= +case_insensitive=true +use_icu=true +icu_transform='Any-Lower' +#end configuration section. + +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --cer (0|1) # compute CER in addition to WER" + exit 1; +fi + +data=$1 +lang=$2 # This parameter is not used -- kept only for backwards compatibility +dir=$3 + +set -e +set -o pipefail +set -u + +ScoringProgram=`which sclite` || ScoringProgram=$KALDI_ROOT/tools/sctk/bin/sclite +[ ! -x $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; +SortingProgram=`which hubscr.pl` || SortingProgram=$KALDI_ROOT/tools/sctk/bin/hubscr.pl +[ ! -x $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; + + +for f in $data/stm ; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + + +if [ -z $ctm_name ] ; then + name=`basename $data`; # e.g. eval2000 +else + name=$ctm_name +fi + +mkdir -p $dir/scoring/log +if [ $stage -le 0 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + set -e';' set -o pipefail';' \ + cp -f $data/stm $dir/score_LMWT/stm.unsorted '&&' \ + cp -f $dir/score_LMWT/${name}.ctm $dir/score_LMWT/${name}.ctm.unsorted '&&'\ + $SortingProgram sortSTM \<$dir/score_LMWT/stm.unsorted \>$dir/score_LMWT/stm.sorted '&&' \ + $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.unsorted \>$dir/score_LMWT/${name}.ctm.sorted '&&' \ + paste -d ' ' \<\(cut -f 1-5 -d ' ' $dir/score_LMWT/stm.sorted \) \ + \<\(cut -f 6- -d ' ' $dir/score_LMWT/stm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ + \> $dir/score_LMWT/stm '&&' \ + paste -d ' ' \<\(cut -f 1-4 -d ' ' $dir/score_LMWT/${name}.ctm.sorted \) \ + \<\(cut -f 5- -d ' ' $dir/score_LMWT/${name}.ctm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ + \> $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.sorted2 \>$dir/score_LMWT/${name}.ctm '&&' \ + $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ + -n "$name.ctm" -f 0 -D -F -o sum rsum prf dtl sgml -e utf-8 || exit 1 +fi + +if [ $stage -le 1 ]; then + if [ $cer -eq 1 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.char.log \ + $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ + -n "$name.char.ctm" -o sum rsum prf dtl sgml -f 0 -D -F -c NOASCII DH -e utf-8 || exit 1 + fi +fi + + +echo "Finished scoring on" `date` +exit 0 + diff --git a/egs/babel/s5d/local/search/analyze_stats.pl b/egs/babel/s5d/local/search/analyze_stats.pl new file mode 100755 index 00000000000..fd09f9c92a7 --- /dev/null +++ b/egs/babel/s5d/local/search/analyze_stats.pl @@ -0,0 +1,219 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + + +my $Usage = < + e.g.: gunzip -c exp/tri5/decode_dev10h.pem/kws/stats.*.gz | \ + $0 --trials 36000 data/dev10h.pem alignment.csv keywords_stats + +Allowed options: + --trials : number of trials (length of the search collection) for ATWV computation +EOU + +use strict; +use warnings; +use utf8; +use Data::Dumper; +use GetOpt::Long; + +my $T = 36212.6725; + +GetOptions ("trials=i" => \$T) or do + { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +if (@ARGV != 3) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +my $data = $ARGV[0]; +my $align = $ARGV[1]; +my $keywords = $ARGV[2]; + +my %SEGMENTS; +open(my $seg_file, "$data/segments") or + die "Cannot open the segments file in $data/segments"; + +while (my $line = <$seg_file>) { + (my $seg_id, my $file_id, my $tstart, my $tend) = split(" ", $line); + $SEGMENTS{$seg_id} = [$file_id, $tstart, $tend]; +} + + +my %ALIGNMENT; +my %TWVSTATS; +open(my $align_file, $align) or + die "Cannot open the alignment file in $align"; + +print "Reading alignment...\n"; +my $dummy=<$align_file>; +while (my $line = <$align_file>) { + chomp $line; + my @entries = split(/\s*,\s*/, $line); + my $kw_id = $entries[3]; + my $file_id = $entries[1]; + my $kw_time = $entries[7]; + my $op_id = join(",", @entries[10 .. 11]); # 'YES,CORR' | 'YES,FA' | 'NO,MISS' | 'NO,CORR!DET' | ',MISS' + + $TWVSTATS{$kw_id}{$op_id} += 1; + next if $op_id eq ",MISS"; + + my $key = sprintf "%s,%s", $kw_id, $file_id; + + if ( grep { abs($_ - $kw_time) <= 0.5 } @{$ALIGNMENT{$key}} ) { + die "The key $key is not unique\n"; + } + push @{$ALIGNMENT{$key}}, \@entries; +} + +#print Dumper(\%TWVSTATS); +print "Done reading alignment...\n"; + + +my %HITCACHE; + +print "Reading stats\n"; +while (my $line = ) { + my @entries = split(" ", $line); + + my $wav = $SEGMENTS{$entries[1]}[0]; + my $seg_start = $SEGMENTS{$entries[1]}[1]; + my $seg_end = $SEGMENTS{$entries[1]}[2]; + + my $kw = $entries[0]; + my $kw_start = $seg_start + $entries[2]/100.00000; + my $kw_stop = $seg_start + $entries[3]/100.00000; + my $kw_center = ($kw_start + $kw_stop) / 2.0; + #print Dumper($kw_start, $kw_stop, $kw_center); + my $kw_wav = $wav; + + my $key = sprintf "%s,%s", $kw, $kw_wav; + + if ( not grep { abs( (@{$_}[7] + @{$_}[8])/2.0 - $kw_center) <= 0.1 } @{$ALIGNMENT{$key}} ) { + ##print "The key $key, $kw_center does not exist in the alignment\n"; + ##print join(" ", @entries) . "\n"; + #print Dumper($ALIGNMENT{$key}); + #die; + } else { + my @tmp = @{$ALIGNMENT{$key}}; + my ($index) = grep { abs( (@{$tmp[$_]}[7] + @{$tmp[$_]}[8]) / 2.0 - $kw_center) <= 0.1 } (0 .. @{$ALIGNMENT{$key}}-1); + die unless defined $index; + my @ali = @{@{$ALIGNMENT{$key}}[$index]}; + my $diff = abs($ali[7] - $kw_start); + + #die "Weird hit " . Dumper(\@entries) if $entries[5] != 0; + + my $hit_id = join(" ", @entries[5 .. @entries-1]); + $hit_id =~ s/\b0\b//g; + $hit_id =~ s/^\s+//g; + $hit_id =~ s/\s+/ /g; + $hit_id =~ s/\s+$//g; + #print $hit_id . "\n"; + #print Dumper(\@ali, $kw_wav, $diff) if $diff > 0.1; + #print Dumper(\@entries); + + my $op_id = join(",", @ali[10 .. 11]); # 'YES,CORR' | 'YES,FA' | 'NO,MISS' | 'NO,CORR!DET' + $HITCACHE{$kw}{$hit_id}{$op_id} += 1; + #push @{$HITCACHE{$hit_id}{join(",", @ali[10 .. 11])}}, $entries[4]; + } + #print Dumper(\@entries, $kw_start, $kw_wav); + #exit +} +#print Dumper(\%HITCACHE); +print "Done reading stats\n"; + +open(my $KW, "> $keywords"); + +print "Analyzing\n"; +my $TWV = 0; +my $NEW_TWV = 0; +my $N_KW = 0; +foreach my $kwid (sort keys %HITCACHE) { + my %old_stats = %{$TWVSTATS{$kwid}}; + #print Dumper($kwid, \%old_stats); + # + $old_stats{"YES,CORR"} = 0 unless defined $old_stats{"YES,CORR"}; + $old_stats{",MISS"} = 0 unless defined $old_stats{",MISS"}; + $old_stats{"NO,MISS"} = 0 unless defined $old_stats{"NO,MISS"}; + $old_stats{"YES,FA"} = 0 unless defined $old_stats{"YES,FA"}; + + my $n_kw = $old_stats{"YES,CORR"} + + $old_stats{",MISS"} + + $old_stats{"NO,MISS"}; + + my $n_trials = $T - $n_kw; + + next if $n_kw == 0; + + my $p_miss = 0; + $p_miss = 1 - $old_stats{"YES,CORR"} / $n_kw unless $n_kw == 0; + my $p_fa = $old_stats{"YES,FA"} / $n_trials; + + my $twv = 1 - $p_miss - 999.9 * $p_fa; + print "$kwid $n_kw $p_miss $p_fa $twv\n"; + + foreach my $kwpath (sort keys $HITCACHE{$kwid}) { + my $weight = 0; + + my %new_stats = %{$HITCACHE{$kwid}{$kwpath}}; + $new_stats{"YES,CORR"} = 0 unless defined $new_stats{"YES,CORR"}; + $new_stats{"YES,FA"} = 0 unless defined $new_stats{"YES,FA"}; + + my $new_p_miss = 1 - ($old_stats{"YES,CORR"} - $new_stats{"YES,CORR"})/ $n_kw; + my $new_p_fa = ($old_stats{"YES,FA"} - $new_stats{"YES,FA"}) / $n_trials; + my $new_twv = 1 - $new_p_miss - 999.9 * $new_p_fa; + if ($new_twv > $twv) { + #print "keep: $kwid $kwpath $twv - $new_twv\n"; + if ((defined $HITCACHE{$kwid}{$kwpath}->{"YES,FA"}) || + (defined $HITCACHE{$kwid}{$kwpath}->{"NO,MISS"}) || + (defined $HITCACHE{$kwid}{$kwpath}->{"YES,CORR"})) { + print Dumper($kwid, $kwpath, $HITCACHE{$kwid}{$kwpath}); + } + $old_stats{"YES,CORR"} -= $new_stats{"YES,CORR"}; + $old_stats{"YES,FA"} -= $new_stats{"YES,FA"} ; + } else { + print $KW "$kwid $kwpath\n"; + #print "remove: $kwid $kwpath $twv - $new_twv\n"; + + } + # print $W "$kwid $weight\n"; + + } + + + my $new_p_miss = 1 - $old_stats{"YES,CORR"} / $n_kw; + my $new_p_fa = $old_stats{"YES,FA"} / $n_trials; + + my $new_twv = 1 - $new_p_miss - 999.9 * $new_p_fa; + + $NEW_TWV = $N_KW/($N_KW+1) * $NEW_TWV + $new_twv / ($N_KW+1); + $TWV = $N_KW/($N_KW+1) * $TWV + $twv / ($N_KW+1); + $N_KW += 1; +} +close($KW); +#print "ATWV: $TWV $NEW_TWV\n"; diff --git a/egs/babel/s5d/local/search/annotate_kwlist.pl b/egs/babel/s5d/local/search/annotate_kwlist.pl new file mode 100755 index 00000000000..fbbdc0c119e --- /dev/null +++ b/egs/babel/s5d/local/search/annotate_kwlist.pl @@ -0,0 +1,166 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2016 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < > output.kwlist.xml + e.g.: cat kwlist.xml | $0 data/dev10h.pem/kwset_kwlist/categories > output.kwlist.xml + +EOU +use strict; +use warnings "FATAL"; +use utf8; +use XML::Parser; +use Data::Dumper; + +binmode STDERR, ":utf8"; +binmode STDOUT, ":utf8"; + +my $IN_KWTEXT=0; +my $KWTEXT=''; +my $KWID=''; +my %CATEGORIES; + +sub kwlist { + my @entries = @_; + shift @entries; + shift @entries; + + my $header=""; + while (@entries) { + my $k = shift @entries; + my $w = shift @entries; + + $header .= " $k=\"$w\" "; + } + print "\n"; +} + +sub kwlist_ { + print "\n"; +} + +sub kw { + my @entries = @_; + shift @entries; + shift @entries; + #print Dumper(@entries); + my %params = @entries; + $KWID = $params{kwid}; +} + +sub kwtext { + my @entries = @_; + shift @entries; + $IN_KWTEXT=1; + #print Dumper(@entries); +} +sub char { + my @entries = @_; + shift @entries; + $KWTEXT=$entries[0] if $IN_KWTEXT eq 1; +} + +sub kwtext_ { + my @entries = @_; + shift @entries; + $IN_KWTEXT=0; + if ($KWTEXT) { + if (exists $CATEGORIES{$KWID}) { + print " \n"; + print " $KWTEXT\n"; + print " \n"; + print " \n"; + print " ALL\n"; + print " 1\n"; + print " \n"; + foreach my $cat (sort keys %{$CATEGORIES{$KWID}} ) { + my @entries = split("=", $cat); + my $name; + my $value; + + if (scalar @entries == 2) { + $name = $entries[0]; + $value = $entries[1]; + } else { + $name = $cat; + $value = 1; + } + print " \n"; + print " $name\n"; + print " $value\n"; + print " \n"; + } + print " \n"; + print " \n"; + } else { + my $n = scalar split " ", $KWTEXT; + my $l=length join("", split($KWTEXT)); + + $n = sprintf "%02d", $n; + $l = sprintf "%02d", $l; + + print " \n"; + print " $KWTEXT\n"; + print " \n"; + print " \n"; + print " Characters\n"; + print " $l\n"; + print " \n"; + print " \n"; + print " NGramOrder\n"; + print " $n\n"; + print " \n"; + print " \n"; + print " NGram Order\n"; + print " $n\n"; + print " \n"; + print " \n"; + print " \n"; + } + } +} + +if (@ARGV != 1) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + + +#Read the categories table +open(G, $ARGV[0]) or die "Cannot open the categories table $ARGV[0]"; +while (my $line = ) { + my @entries = split(" ", $line); + my $kwid = shift @entries; + + foreach my $group (@entries) { + $CATEGORIES{$kwid}->{$group} = 1; + } +} +close(G); + +my $p1 = new XML::Parser(Style => 'Subs'); +$p1->setHandlers(Char => \&char); +$p1->parse(*STDIN); + diff --git a/egs/babel/s5d/local/search/combine.sh b/egs/babel/s5d/local/search/combine.sh new file mode 100755 index 00000000000..4f77c0f0f7c --- /dev/null +++ b/egs/babel/s5d/local/search/combine.sh @@ -0,0 +1,258 @@ +#!/bin/bash +# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey) +# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# begin configuration section. +cmd=run.pl +stage=0 +nbest_final=900 +nbest_small=20 +extraid= +skip_scoring=false +optimize=true +duptime=52 +power=1.1 +ntrue_scale= +#end of configuration section + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +help_message="Usage: $0 [options] [ ... ] +E.g.: $0 data/dev10h.pem data/lang exp/tri6_nnet/decode_dev10h.pem/kws_10/ exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/ exp/combine/dev10hx.pem +" +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + + +data=$1; shift; +lang=$1; shift; +output=${@: -1} # last argument to the script +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +if [ -z "$extraid" ] ; then + data="$data/kws" + output="$output/kws" +else + data="$data/kwset_${extraid}" + output="$output/kwset_${extraid}" +fi + +if [ -z "$ntrue_scale" ] ; then + ntrue_scale=$num_sys +fi + +declare -A params=([PWR]=$power [NTRUE]=$ntrue_scale) +declare -A files +declare -A files_reduced + +mkdir -p $output +mkdir -p $output/log + +echo "$0: Combination config (id, weight, results) -- initial" + +i=1 +nsystems=0 +for elem in ${decode_dirs[@]} ; do + params[W$i]="0.5" + if [ -f $elem ] ; then + f=$(echo $elem | cut -d: -f1) + w=$(echo $elem | cut -d: -s -f2) + + [ ! -z "$w" ] && params[W$i]="$w" + files[W$i]=$f + files_reduced[W$i]=$output/results.reduced.$i + + elif [ -d $elem ] && [ -d $elem/details ] ; then + mtwv=$(cat $elem/details/score.txt | grep "MTWV *=" |cut -f 2 -d '=' | sed 's/ //g') + params[W$i]="$mtwv" + files[W$i]=$elem/details/results + files_reduced[W$i]=$output/results.reduced.$i + elif [ -d $elem ] ; then + best_dir=$(find ${elem}_* -name "score.txt" \ + -path "*$extraid*" \ + -path "*/details/*" |\ + xargs grep "MTWV *=" | \ + sort -k2,2g -t '=' | + tail -n 1 | \ + cut -f 1 -d ':' | \ + xargs dirname \ + ) + mtwv=$(cat $best_dir/score.txt | grep "MTWV *=" |cut -f 2 -d '=' | sed 's/ //g') + params[W$i]="$mtwv" + files[W$i]=$best_dir/results + files_reduced[W$i]=$output/results.reduced.$i + else + echo >&2 "$0: The parameter\"$elem\" is not file nor directory" + fi + + echo " $i W$i=${params[W$i]} ${files[W$i]}" + echo "${files[W$i]}" > $output/results_W$i + + cat ${files[W$i]} | \ + local/search/filter_kws_results.pl --probs --nbest $nbest_small > ${files_reduced[W$i]} + + nsystems=$i + i=$(($i+1)) + +done + +if [ $nsystems -le 0 ] ; then + echo >&2 "No acoustic system found" + return 1 +fi + +trials=$(cat $data/trials) + +if $optimize ; then + cmdline= + + + declare -A params + opt_vars="" + opt_task_params="" + for w in "${!params[@]}" ; do + opt_vars="$opt_vars --var $w=${params[$w]}" + + if [ ${files_reduced[$w]+isset} ] ; then + opt_task_params="$opt_task_params $w ${files_reduced[$w]}" + fi + done + + echo "$0: Optimization -- first stage (reduced size results)" + mkdir -p $output/opt + local/optimize2.pl --result-regexp '.*ATWV *= *(.*)' --ftol 0.01 --iftol 0.01\ + --output-dir $output/opt $opt_vars \ + local/search/combine_results.pl --probs --power PWR $opt_task_params - \| \ + local/search/normalize_results_kst.pl --duration $trials --ntrue-scale NTRUE\| \ + local/search/filter_kws_results.pl --nbest 100 \| \ + compute-atwv $trials ark:$data/hitlist ark:- | \ + tee $output/log/optimize.log | grep -i "Iter" || { + echo >&2 "$0: Optimization failed (see $output/log/optimize.log for errors)"; exit 1 + } + + # override the default parameters + if [ -f $output/opt/params.sh ] ; then + . $output/opt/params.sh + else + echo >&2 "$0: Optimization output in $output/opt/params.sh not found"; + exit 1; + fi + + # Second round of optimization -- this time, only the NTRUE + comb_task_params="" + for w in "${!params[@]}" ; do + if [ ${files[$w]+isset} ] ; then + comb_task_params="$comb_task_params ${params[$w]} ${files[$w]}" + fi + done + + echo "$0: Optimization -- second stage (full size results)" + mkdir -p $output/opt_ntrue + local/optimize2.pl --result-regexp '.*ATWV *= *(.*)' \ + --output-dir $output/opt_ntrue --var NTRUE=${params[NTRUE]} \ + local/search/combine_results.pl --probs --tolerance $duptime --power ${params[PWR]} $comb_task_params - \| \ + local/search/normalize_results_kst.pl --duration $trials --ntrue-scale NTRUE\| \ + local/search/filter_kws_results.pl --probs --duptime $duptime \| \ + compute-atwv $trials ark:$data/hitlist ark:- | \ + tee $output/log/optimize_ntrue.log | grep -i "Iteration" || { + echo >&2 "$0: Optimization failed (see $output/log/optimize_ntrue.log for errors)"; exit 1 + } + # override the default parameters + if [ -f $output/opt_ntrue/params.sh ] ; then + . $output/opt_ntrue/params.sh + else + echo >&2 "$0: Optimization output in $output/opt_ntrue/params.sh not found"; + exit 1; + fi +fi + +echo "$0: Combination config (final)" +echo -n "$0: params=[" +comb_task_params="" +for w in "${!params[@]}" ; do + echo -n " $w=${params[$w]}" + if [ ${files[$w]+isset} ] ; then + comb_task_params="$comb_task_params ${params[$w]} ${files[$w]}" + fi +done +echo "]" + +mkdir -p $output/details + + +echo "$0: Doing final combination" +local/search/combine_results.pl \ + --probs --tolerance $duptime --power ${params[PWR]} $comb_task_params - | \ + local/search/normalize_results_kst.pl \ + --duration $trials --ntrue-scale ${params[NTRUE]} |\ + local/search/filter_kws_results.pl --probs --duptime $duptime > $output/details/results + +#Write the parapeters +echo "declare -A params" > $output/details/params.sh +for w in "${!params[@]}" ; do + echo "params[$w]=${params[$w]}" +done >> $output/details/params.sh +echo "${params[NTRUE]}" > $output/details/ntrue +echo "${params[PWR]}" > $output/details/power + +if ! $skip_scoring ; then + echo "$0: Scoring..." + cat $output/details/results |\ + compute-atwv $trials ark,t:$data/hitlist ark:- \ + ${output}/details/alignment.csv \ + > ${output}/details/score.txt \ + 2> ${output}/log/score.log + + cat ${output}/details/alignment.csv |\ + perl local/search/per_category_stats.pl \ + --sweep-step 0.005 $trials $data/categories \ + > ${output}/details/per-category-score.txt \ + 2> ${output}/log/per-category-score.log + + cp $output/details/score.txt $output/score.txt + +fi + +if [ $stage -le 2 ]; then + if [ -f $data/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $data/f4de_attribs #override the previous variables + + ecf=$data/ecf.xml + rttm=$data/rttm + kwlist=$data/kwlist.xml + + mkdir -p ${output}/f4de/ + + cat $kwlist | local/search/annotate_kwlist.pl $data/categories > ${output}/f4de/kwlist.xml + kwlist=${output}/f4de/kwlist.xml + + cat ${output}/details/results | \ + utils/int2sym.pl -f 2 $data/utt.map | \ + local/search/utt_to_files.pl --flen "$flen" $data/../segments |\ + local/search/write_kwslist.pl --flen "$flen" --language "$language" \ + --kwlist-id "$kwlist_name" > ${output}/f4de/kwslist.xml + + KWSEval -e $ecf -r $rttm -t $kwlist -a \ + --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \ + -s ${output}/f4de/kwslist.xml -c -o -b -d -f ${output}/f4de/ + + local/kws_oracle_threshold.pl --duration $trials \ + ${output}/f4de/alignment.csv > ${output}/f4de/metrics.txt + fi +fi + +echo "$0: All OK" diff --git a/egs/babel/s5d/local/search/combine_results.pl b/egs/babel/s5d/local/search/combine_results.pl new file mode 100755 index 00000000000..694ee47c2cd --- /dev/null +++ b/egs/babel/s5d/local/search/combine_results.pl @@ -0,0 +1,422 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2016 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < w2 ... + e.g.: $0 0.5 kwslist1.xml 0.5 kwslist2.xml ... kwslist_comb.xml + +Allowed options: + --probs : The input scores are probabilities, not negative log-likelihoods) + --method : Use different combination method (int, default = 0) + 0 -- CombSUM + 1 -- CombMNZ + --input-norm : how the input data should be normalized (int ) + 0 -- Saturate + 1 -- NormSTO + 2 -- source-wise NormSTO + --output-norm : how the output data should be normalized (int ) + 0 -- Saturate + 1 -- NormSTO + --power : The weighted power mean p-coefficient (float, default = 0.5) + --gamma : The gamma coefficient for CombMNZ (float, default = 0.0) + --tolerance : Tolerance (in frames) for being the same hits (float, default = 50) + +EOU + +use strict; +use warnings "FATAL"; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; +use File::Basename; +use Scalar::Util qw(looks_like_number); + +$Data::Dumper::Indent = 2; + +my $TOL = 50; +my $LIKES = 0; + +sub OpenResults { + my $list = shift @_; + + my $source = "STDIN"; + if ($list ne "-") { + open(my $i, "<$list") || die "Fail to open file $list.\n"; + return $i; + } + return $source +} + +sub PrintResults { + my $KWS = shift @_; + + # Start printing + my $result = ""; + foreach my $kwentry (@{$KWS}) { + my ($kwid, $file, $tbeg, $tend, $score, $dummy) = @{$kwentry}; + if ($score > 0) { + $score = -log($score); + } elsif ($score == 0) { + $score = 9999; + } else { + die "Cannot take logarithm of a negative number\n" . join(" ", @{$kwentry}) . "\n"; + } + $result .= "$kwid $file $tbeg $tend $score\n"; + } + + return $result; +} + +sub KwslistTimeCompare { + my ($a, $b) = @_; + + if ($a->[0] eq $b->[0]) { # KWID + if ($a->[1] eq $b->[1]) { # FILEID + if (abs($a->[2] - $b->[2]) <= $TOL) { # KW START + if (abs($a->[3] - $b->[3]) <= $TOL) { #KW END + return 0; + } else { + return ($a->[3] <=> $b->[3] ); + } + } else { + return $a->[2] <=> $b->[2]; + } + } else { + return $a->[1] cmp $b->[1]; + } + } else { + $a->[0] cmp $b->[0]; + } +} + +sub KwslistTimeSort { + my $a = shift; + my $b = shift; + return KwslistTimeCompare($a, $b); +} + +sub ReadLines { + my $kwid = shift @_; + my %files = %{shift @_}; + my @lines = (); + + foreach my $id (sort keys %files) { + my $l = readline $files{$id}; + next unless $l; + chomp $l; + my @entries = split " ", $l; + while ($kwid eq $entries[0]) { + push @entries, $id; + push @lines, [@entries]; + + $l = readline $files{$id}; + last unless $l; + chomp $l; + @entries = split " ", $l; + } + next unless defined $l; + push @entries, $id; + push @lines, [@entries]; + } + return @lines; +} + +sub ReadFirstLines { + my %files = %{shift @_}; + my @lines = (); + + foreach my $id (sort keys %files) { + my $l = readline $files{$id}; + next unless $l; + chomp $l; + + my @entries = split " ", $l; + push @entries, $id; + push @lines, [@entries]; + } + return @lines; +} + +sub MergeCombPwrSum { + my @results = @{shift @_}; + my %weights = %{shift @_}; + my $pwr = shift @_; + my @output = (); + + return @output if not @results; + + while (@results) { + my @mergelist = (); + push @mergelist, shift @results; + while ((@results) && (KwslistTimeCompare($mergelist[0], $results[0]) == 0)) { + push @mergelist, shift @results; + } + + my $best_score = -9999; + my $tend; + my $tbegin; + my $out_score = 0; + foreach my $elem (@mergelist) { + my $score = $elem->[4]; + my $id = $elem->[5]; + if ($score > $best_score) { + $best_score = $score; + $tend = $elem->[3]; + $tbegin = $elem->[2]; + } + #print "$out_score += $weights{$id} * $score\n"; + $out_score += $weights{$id} * ($score ** $pwr); + } + $out_score = $out_score**(1.0/$pwr); + #print "$out_score \n\n\n"; + my $KWID = $mergelist[0]->[0]; + my $UTT = $mergelist[0]->[1]; + push @output, [$KWID, $UTT, $tbegin, $tend, $out_score, ""]; + } + + return \@output; +} + +## More generic version of the combMNZ method +sub MergeCombPwrMNZ { + my @results = @{shift @_}; + my %weights = %{shift @_}; + my $pwr = shift @_; + my $gamma = shift @_; + my @output = (); + + $gamma = 0 unless defined $gamma; + return @output if not @results; + + while (@results) { + my @mergelist = (); + push @mergelist, shift @results; + while ((@results) && (KwslistTimeCompare($mergelist[0], $results[0]) == 0)) { + push @mergelist, shift @results; + } + + my $best_score = -9999; + my $tend; + my $tbegin; + my $out_score = 0; + foreach my $elem (@mergelist) { + my $score = $elem->[4]; + my $id = $elem->[5]; + if ($score > $best_score) { + $best_score = $score; + $tend = $elem->[3]; + $tbegin = $elem->[2]; + } + #print "$out_score += $weights{$id} * $score\n"; + $out_score += $weights{$id} * ($score ** $pwr); + } + $out_score = (@mergelist ** $gamma) * $out_score**(1.0/$pwr); + #print "$out_score \n\n\n"; + my $KWID = $mergelist[0]->[0]; + my $UTT = $mergelist[0]->[1]; + push @output, [$KWID, $UTT, $tbegin, $tend, $out_score, "out"]; + } + + return \@output; +} + +### Sum-to-one normalization +sub NormalizeSTO { + my @results = @{shift @_}; + my @output = (); + my $sum = 0; + foreach my $elem(@results) { + $sum += $elem->[4]; + } + foreach my $elem(@results) { + $elem->[4] = $elem->[4]/$sum; + push @output, $elem; + } + return \@output; +} + +### This will STO normalize all entries in the @results according +### to the id, so that entries with the same id will sum to one +sub NormalizeSTOMulti { + my @results = @{shift @_}; + my @output = (); + my $sum = 0; + my %sums = (); + foreach my $elem(@results) { + $sums{$elem->[5]} += $elem->[4]; + } + foreach my $elem(@results) { + $elem->[4] = $elem->[4]/$sums{$elem->[5]}; + push @output, $elem; + } + return \@output; +} + +### Simple normalization of probabilities/scores +### Everything larger than 1 will be set to 1 +sub NormalizeSaturate { + my @results = @{shift @_}; + my @output = (); + my $sum = 0; + foreach my $elem(@results) { + $elem->[4] = $elem->[4] > 1.0 ? 1.0 : $elem->[4]; + push @output, $elem; + } + return \@output; +} + +my $method = 1; +my $input_norm = 0; +my $output_norm = 0; +my $gamma = 0; +my $power = 0.5; +GetOptions('tolerance=f' => \$TOL, + 'method=i' => sub { shift; $method = shift; + if (($method lt 0) || ($method gt 1)) { + die "Unknown method $method\n\n$Usage\n"; + } + }, + 'input-norm=i' => sub { shift; my $n = shift; + $input_norm = $n; + if (($n lt 0) || ($n gt 2)) { + die "Unknown input-norm $n\n\n$Usage\n"; + } + }, + 'output-norm=i' => sub { shift; my $n = shift; + $output_norm = $n; + if (($n ne 0) || ($n ne 1)) { + die "Unknown output-norm $n\n\n$Usage\n"; + } + }, + 'power=f' => \$power, + 'gamma=f' => \$gamma, + 'inv-power=f' => sub { + shift; my $val = shift; + $power = 1.0/$val; + }, + 'probs' => sub { + $LIKES = 0; + } + ) || do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if (@ARGV % 2 != 1) { + print STDERR "Bad number of (weight, results_list) pairs.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +# Workout the input/output source +my %results_files = (); +my %results_w = (); + +my $i = 0; +while (@ARGV != 1) { + my $w = shift @ARGV; + looks_like_number($w) || die "$0: Bad weight: $w.\n"; + $results_w{$i} = $w; + $results_files{$i} = OpenResults(shift @ARGV); + $i += 1; +} + +my $sumw=0; +foreach my $val (values %results_w ) { + $sumw += $val; +} +#foreach my $val (keys %results_w ) { +# $results_w{$val} = $results_w{$val}/$sumw; +#} + +my $output = shift @ARGV; + +my $deb = 0; +my @lines = (); +@lines = ReadFirstLines(\%results_files); +@lines = sort { KwslistTimeSort($a, $b) } @lines; +push @lines, ReadLines($lines[0]->[0], \%results_files); +@lines = sort { KwslistTimeSort($a, $b) } @lines; + +while (@lines) { + my @res = (); + + push @res, shift @lines; + while ((@lines) && ($lines[0]->[0] eq $res[0]->[0])) { + push @res, shift @lines; + } + #print PrintResults(\@res); + #print PrintResults(NormalizeSTO(MergeCombMNZ(\@res, \%results_w))); + #print PrintResults(NormalizeCutoff(MergeCombPwrSum(\@res, \%results_w, $power))); + #print PrintResults(NormalizeSaturate(MergeCombPwrMNZ(\@res, \%results_w, $power, $gamma))); + #print PrintResults(NormalizeSTO(MergeCombPwrMNZ(NormalizeSTO(\@res), \%results_w, $power, $gamma))); + + my $data = undef; + if ($input_norm == 1) { + $data = NormalizeSTO(\@res); + } elsif ($input_norm == 2) { + $data = NormalizeSTOMulti(\@res); + } else { + $data = NormalizeSaturate(\@res); + } + + if ($method == 0) { + $data = MergeCombPwrSum($data, \%results_w, $power); + } else { + $data = MergeCombPwrMNZ($data, \%results_w, $power, $gamma); + } + + if ($output_norm == 1) { + $data = NormalizeSTO($data); + } else { + $data = NormalizeSaturate($data); + } + + print PrintResults($data); + + #exit if $deb > 3; + #$deb += 1 if $deb; + #if ($res[0]->[0] eq "KW305-02318") { + # $deb = 1; + # print Dumper("START", \@res, \@lines) if $deb; + #} + + my @tmp = (); + if (@lines) { + @tmp = ReadLines($lines[0]->[0], \%results_files); + } else { + # this is probably not necessary -- ReadLines() call + # will always read one line _past_ the current KW + # so we always should have extra KW in the @lines + @tmp = ReadFirstLines(\%results_files); + } + + #print Dumper("TMP", \@tmp) if $deb; + if (@tmp > 0) { + #print Dumper("XXX", \@res, \@lines) if $deb; + push @lines, @tmp; + @lines = sort { KwslistTimeSort($a, $b) } @lines; + } + + #print Dumper(\@res, \@lines) if $deb; + +} diff --git a/egs/babel/s5d/local/search/combine_special.sh b/egs/babel/s5d/local/search/combine_special.sh new file mode 100755 index 00000000000..5802f49be06 --- /dev/null +++ b/egs/babel/s5d/local/search/combine_special.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey) +# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# begin configuration section. +cmd=run.pl +stage=0 +nbest_final=900 +nbest_small=20 +extraid= +skip_scoring=false +optimize=true +duptime=52 +power=1.1 +ntrue_scale= +#end of configuration section + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +help_message="Usage: $0 [options] [ ... ] +E.g.: $0 data/dev10h.pem data/lang exp/tri6_nnet/decode_dev10h.pem/kws_10/ exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/ exp/combine/dev10hx.pem +" +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + + +data=$1; shift; +lang=$1; shift; +template=$1; shift; +output=${@: -1} # last argument to the script +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +if [ -z "$extraid" ] ; then + data="$data/kws" + output="$output/kws" +else + data="$data/kwset_${extraid}" + output="$output/kwset_${extraid}" +fi + +if [ -z "$ntrue_scale" ] ; then + ntrue_scale=$num_sys +fi + +declare -A params=([PWR]=$power [NTRUE]=$ntrue_scale) +declare -A files +declare -A duced + +mkdir -p $output +mkdir -p $output/log + +if [ -f $template/details/params.sh ] ; then + . $template/details/params.sh +else + echo >&2 "$0: Optimization output in $template/details/params.sh not found"; + exit 1; +fi + + +echo "$0: Combination config (id, weight, results) -- initial" + +i=1 +for elem in ${decode_dirs[@]} ; do + if [ -f $elem ] ; then + files[W$i]=$f + elif [ -d $elem ] && [ -d $elem/details ] ; then + files[W$i]=$elem/details/results + elif [ -d $elem ] ; then + tmpl=`cat $template/results_W${i}` + echo $tmpl + #exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist4_10/details/results + if [[ "$tmpl" == */details/results ]] ; then + base=`echo $tmpl | sed 's:/details/results::g'` + base=`basename $base` + lmwt=${base##*_} + tmpl_kwset=${base%_*} + tmpl_kwset=${tmpl_kwset##*_} + else + echo >&2 "The template results file does not follow the naming pattern" + exit 1 + fi + f=${elem}_${lmwt}/details/results + if [ ! -f $f ]; then + echo >&2 "The file $f does not exist (check template or $template/results_W${i})" + exit 1 + fi + kwset=${elem##*_} + if [ "$kwset" != "$tmpl_kwset" ] ; then + echo >&2 "WARNING: The the kwset and the tmpl kwset do not match! ($kwset vs $tmpl_kwset) " + fi + + files[W$i]=$f + else + echo >&2 "$0: The parameter\"$elem\" is not file nor directory" + fi + echo " $i W$i=${params[W$i]} ${files[W$i]}" + + i=$(($i+1)) + +done + + + +trials=$(cat $data/trials) + + +echo "$0: Combination config (final)" +echo -n "$0: params=[" +comb_task_params="" +for w in "${!params[@]}" ; do + echo -n " $w=${params[$w]}" + if [ ${files[$w]+isset} ] ; then + comb_task_params="$comb_task_params ${params[$w]} ${files[$w]}" + fi +done +echo "]" + +mkdir -p $output/details + + +echo "$0: Doing final combination" +local/search/combine_results.pl \ + --probs --tolerance $duptime --power ${params[PWR]} $comb_task_params - | \ + local/search/normalize_results_kst.pl \ + --duration $trials --ntrue-scale ${params[NTRUE]} |\ + local/search/filter_kws_results.pl --probs --duptime $duptime > $output/details/results + +#Write the parapeters +echo "declare -A params" > $output/details/params.sh +for w in "${!params[@]}" ; do + echo "params[$w]=${params[$w]}" +done >> $output/details/params.sh +echo "${params[NTRUE]}" > $output/details/ntrue +echo "${params[PWR]}" > $output/details/power + + +echo "DATA: $data" +if ! $skip_scoring && [ -f $data/hitlist ] ; then + echo "$0: Scoring..." + cat $output/details/results |\ + compute-atwv $trials ark,t:$data/hitlist ark:- \ + ${output}/details/alignment.csv \ + > ${output}/details/score.txt \ + 2> ${output}/log/score.log + + cat ${output}/details/alignment.csv |\ + perl local/search/per_category_stats.pl \ + --sweep-step 0.005 $trials $data/categories \ + > ${output}/details/per-category-score.txt \ + 2> ${output}/log/per-category-score.log + + cp $output/details/score.txt $output/score.txt + +fi + +if [ $stage -le 2 ]; then + if [ -f $data/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $data/f4de_attribs #override the previous variables + + ecf=$data/ecf.xml + rttm=$data/rttm + kwlist=$data/kwlist.xml + + mkdir -p ${output}/f4de/ + + cat ${output}/details/results | \ + utils/int2sym.pl -f 2 $data/utt.map | \ + local/search/utt_to_files.pl --flen "$flen" $data/../segments |\ + local/search/write_kwslist.pl --flen "$flen" --language "$language" \ + --kwlist-id "$kwlist_name" > ${output}/f4de/kwslist.xml + + if [ -f $rttm ] ; then + cat $kwlist | local/search/annotate_kwlist.pl $data/categories > ${output}/f4de/kwlist.xml + kwlist=${output}/f4de/kwlist.xml + + KWSEval -e $ecf -r $rttm -t $kwlist -a \ + --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \ + -s ${output}/f4de/kwslist.xml -c -o -b -d -f ${output}/f4de/ + + local/kws_oracle_threshold.pl --duration $trials \ + ${output}/f4de/alignment.csv > ${output}/f4de/metrics.txt + fi + fi +fi + +echo "$0: All OK" diff --git a/egs/babel/s5d/local/search/compile_keywords.sh b/egs/babel/s5d/local/search/compile_keywords.sh new file mode 100755 index 00000000000..92dc4220a8e --- /dev/null +++ b/egs/babel/s5d/local/search/compile_keywords.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +silence_word= +filter='OOV=0' +# End configuration section +echo $0 "$@" +. parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +data=$1 +lang=$2 +workdir=$3 + +mkdir -p $workdir +cat $data/categories | \ + local/search/filter_by_category.pl $data/categories "$filter" > $workdir/categories + +if [ ! -s $workdir/categories ]; then + echo "$0: WARNING: $workdir/categories is zero-size. That means no keyword" + echo "$0: WARNING: was found that fits the filter \"$filter\". That might be expected." + touch $workdir/keywords.int + touch $workdir/keywords.fsts + exit 0 +fi + +grep -w -F -f <(awk '{print $1}' $workdir/categories) \ + $data/keywords.int > $workdir/keywords.int + +if [ -s $workdir/keywords.int ]; then + if [ -z $silence_word ]; then + transcripts-to-fsts ark:$workdir/keywords.int \ + ark,scp,t:$workdir/keywords.fsts,- | sort -o $workdir/keywords.scp + else + silence_int=`grep -w $silence_word $lang/words.txt | awk '{print $2}'` + [ -z $silence_int ] && \ + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; + transcripts-to-fsts ark:$data/keywords.int ark,t:- | \ + awk -v 'OFS=\t' -v silint=$silence_int '{ + if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; + }' | fstcopy ark:- ark,scp,t:$workdir/keywords.fsts,- | \ + sort -o $workdir/keywords.scp + fi +else + echo "$0: WARNING: $workdir/keywords.int is zero-size. That means no keyword" + echo "$0: WARNING: was found in the dictionary. That might be expected -- or not." + touch $workdir/keywords.fsts +fi + diff --git a/egs/babel/s5d/local/search/compile_proxy_keywords.sh b/egs/babel/s5d/local/search/compile_proxy_keywords.sh new file mode 100755 index 00000000000..a28105123f3 --- /dev/null +++ b/egs/babel/s5d/local/search/compile_proxy_keywords.sh @@ -0,0 +1,271 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# 2012-2014 Guoguo Chen +# License: Apache 2.0 + +# Begin configuration section. +nj=8 +cmd=run.pl +beam=-1 # Beam for proxy FST, -1 means no prune +phone_beam=-1 # Beam for KxL2xE FST, -1 means no prune +nbest=-1 # Use top n best proxy keywords in proxy FST, -1 means all + # proxies +phone_nbest=-1 # Use top n best phone sequences in KxL2xE, -1 means all + # phone sequences +confusion_matrix= # If supplied, using corresponding E transducer +count_cutoff=1 # Minimal count to be considered in the confusion matrix; + # will ignore phone pairs that have count less than this. +pron_probs=true # If true, then lexicon looks like: + # Word Prob Phone1 Phone2... +g_beam=10 +g_alpha= +g_inv_alpha= +g2p_nbest=10 +g2p_mass=0.95 +case_insensitive=true +icu_transform="Any-Lower" +filter="OOV=1" + +# End configuration section + +echo "$0 " "$@" +. ./utils/parse_options.sh || exit 1; + +# Gets phone symbols +phone_start=2 +if $pron_probs; then + phone_start=3 +fi + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +lang=$2 +l1lex=$3 +g2p=$4 +workdir=$5 + +if [ ! -z "$g_inv_alpha" ] && [ $g_inv_alpha -ne 0 ] ; then + g_alpha=$(echo print 1.0/$g_inv_alpha | perl ) +fi + +# Checks some files. +for f in $l1lex $data/categories $data/keywords.txt ; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1 +done + +mkdir -p $workdir +cat $data/categories | \ + local/search/filter_by_category.pl $data/categories "$filter" > $workdir/categories + +grep -w -F -f <(awk '{print $1}' $workdir/categories) $data/keywords.txt |\ + sort -R > $workdir/keywords.filtered + +paste <(cut -f 1 $workdir/keywords.filtered) \ + <(cut -f 2- $workdir/keywords.filtered | uconv -f utf-8 -t utf-8 -x "$icu_transform") > $workdir/keywords.txt + +cat $l1lex | perl -e ' + while (<>) { + ($word, $prob, $pron) = split " ", $_, 3; + $pron =~ s/_[^\s]+//g; + $pron =~ s/\s+/ /g; + $pron =~ s/^\s+//g; + $pron =~ s/\s+$//g; + print "$word $prob $pron\n" + } +' | sort -u > $workdir/L1.lex + +mkdir -p $workdir/lexicon + +cat $workdir/keywords.txt | perl -e ' + open(f, shift @ARGV); + while() { + @F = split; + $lex{$F[0]} = 1; + } + close(f); + + while() { + @F = split; + foreach $w (@F[1..$#F]) { + print "$w\n" unless defined $lex{$w}; + } + } +' $workdir/L1.lex | sort -u > $workdir/lexicon/oov.txt + +local/apply_g2p.sh --nj $nj --cmd "$cmd" --icu-transform "$icu_transform" \ + --var-counts $g2p_nbest --var-mass $g2p_mass \ + $workdir/lexicon/oov.txt $g2p $workdir/lexicon || exit 1 + +cat $workdir/L1.lex | \ + perl -e ' + while ( $line = ) { + chomp $line; + ($word, $pron) = split " ", $line, 2; + $pron = join(" ", split(" ", $pron)); + push @{$LEX{$pron}}, $word; + } + + open(L1, "| sort -u > $ARGV[0]") or die "Cannot open $ARGV[0]\n"; + open(MAP, "| sort -u > $ARGV[1]") or die "Cannot open $ARGV[1]\n"; + foreach $pron (keys %LEX) { + $head = $LEX{$pron}->[0]; + print L1 "$head $pron\n"; + foreach $alt (@{$LEX{$pron}}) { + print MAP "0 0 $alt $head\n"; + } + } + print MAP "0\n"; + close(L1); + close(MAP); +' $workdir/L1.dedup.lex $workdir/L1.revdup.fst.txt + +pron_probs_param="" +$pron_probs && pron_probs_param="--pron-probs" + +# Creates words.txt that covers all the words in L1.lex and L2.lex. We append +# new words to the original word symbol table. +cat $workdir/L1.lex $workdir/lexicon/lexicon.lex | \ + perl -e ' + binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + binmode STDERR, ":utf8"; + $max_id=0; + %WORDS=(); + open(F, "<:utf8" , $ARGV[0]) or die "Cannot open $ARGV[0]"; + while() { + ($word, $id) = split(" ", $_); + $WORDS{$word} = $id; + $max_id = $id > $max_id ? $id : $max_id; + } + close(F); + while () { + @F = split(" ", $_); + if (not exists $WORDS{$F[0]}) { + $WORDS{$F[0]} = $max_id + 1; + $max_id += 1; + } + } + foreach $kw (keys %WORDS) { + print "$kw $WORDS{$kw}\n"; + } + ' $lang/words.txt | sort -k2,2n > $workdir/words.txt + +cat $workdir/words.txt | \ + uconv -f utf-8 -t utf-8 -x "$icu_transform" > $workdir/words.normalized.txt + +#--ndisambig=`utils/add_lex_disambig.pl \ +#-- $pron_probs_param $workdir/L1.dedup.lex $workdir/L1.disambig.lex` +#--ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. +#--( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $workdir/disambig.txt + +#remove all position dependent info and other tags +awk '{print $1;}' $lang/phones.txt | sed 's/_[BEIS]//g' | sed 's/_.*//g' | \ + grep -v '^#' | uniq |\ + perl -ne 'BEGIN{$i=0;}; chomp; print $_ . " " . $i . "\n"; $i+=1;' > $workdir/phones.txt + +#--cat $workdir/L2.lex $workdir/L1.lex |\ +#-- awk '{for(i='$phone_start'; i <= NF; i++) {print $i;}}' |\ +#-- sort -u | sed '1i\' |\ +#-- cat - $workdir/disambig.txt | awk 'BEGIN{x=0} {print $0"\t"x; x++;}' \ +#-- > $workdir/phones.txt + +cat $workdir/keywords.txt |\ + local/kwords2indices.pl --map-oov 0 $workdir/words.normalized.txt > $workdir/keywords.int + + +cat $workdir/L1.lex $workdir/lexicon/lexicon.lex | sed 's/\t/ /g' | \ + perl -ne 'chomp; + ($word, $pron) = split / /, $_, 2; + $pron =~ s/_[^ ]*//g; + print "$word $pron\n";' | \ + sort -u > $workdir/L2.lex + +cat $workdir/L1.revdup.fst.txt |\ + fstcompile --isymbols=$workdir/words.txt --osymbols=$workdir/words.txt - |\ + fstarcsort --sort_type=olabel - $workdir/L1.revdup.fst + +echo "" + +#--phone_disambig_symbol=`grep \#0 $workdir/phones.txt | awk '{print $2}'` +#--word_disambig_symbol=`grep \#0 $workdir/words.txt | awk '{print $2}'` +#--phone_disambig_symbols=`grep "^#" $workdir/phones.txt |\ +#-- awk '{print $2}' | tr "\n" " "` +#--word_disambig_symbols=`grep "^#" $workdir/words.txt |\ +#-- awk '{print $2}' | tr "\n" " "` +#-- +#--cat $workdir/L1.disambig.lex |\ +#-- utils/make_lexicon_fst.pl $pron_probs_param - |\ +#-- fstcompile --isymbols=$workdir/phones.txt \ +#-- --osymbols=$workdir/words.txt - |\ +#-- fstaddselfloops "echo $phone_disambig_symbol |" \ +#-- "echo $word_disambig_symbol |" |\ +#-- fstdeterminize | fstrmsymbols "echo $phone_disambig_symbols|" |\ +#-- fstrmsymbols --remove-from-output=true "echo $word_disambig_symbols|" |\ +#-- fstarcsort --sort_type=ilabel > $workdir/L1.fst + +cat $workdir/L1.dedup.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$workdir/phones.txt --osymbols=$workdir/words.txt - |\ + fstarcsort --sort_type=ilabel > $workdir/L1.fst + +echo "" +cat $workdir/L2.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$workdir/phones.txt --osymbols=$workdir/words.txt - |\ + fstinvert | fstarcsort --sort_type=olabel > $workdir/L2.fst + +# Compiles E.fst +conf_mat_param="" +if [ ! -z $confusion_matrix ]; then + echo "$0: Using confusion matrix, normalizing" + local/count_to_logprob.pl --cutoff $count_cutoff \ + $confusion_matrix $workdir/confusion.txt + conf_mat_param="--confusion-matrix $workdir/confusion.txt" +fi + +cat $workdir/phones.txt | \ + grep -v -F -f $lang/phones/silence.txt | awk '{print $1;}' |\ + local/build_edit_distance_fst.pl --boundary-off=true $conf_mat_param - - |\ + fstcompile --isymbols=$workdir/phones.txt \ + --osymbols=$workdir/phones.txt - $workdir/E.fst + +# Pre-composes L2 and E, for the sake of efficiency +fstcompose $workdir/L2.fst $workdir/E.fst |\ + fstarcsort --sort_type=ilabel > $workdir/L2xE.fst + +nof_keywords=`cat $workdir/keywords.txt |wc -l` +if [ $nj -gt $nof_keywords ]; then + nj=$nof_keywords + echo "$0: Too many number of jobs, using $nj instead" +fi + +# Generates the proxy keywords +mkdir -p $workdir/split/log +if [ -z "$g_alpha" ] || [ $g_inv_alpha -eq 0 ] ; then + echo "$0: Generating proxies without G.fst" + $cmd JOB=1:$nj $workdir/split/log/proxy.JOB.log \ + split -n r/JOB/$nj $workdir/keywords.int \| \ + generate-proxy-keywords --verbose=1 \ + --proxy-beam=$beam --proxy-nbest=$nbest \ + --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ + $workdir/L2xE.fst $workdir/L1.fst ark:- ark,t:$workdir/split/proxy.JOB.fsts +else + echo "$0: Generating proxies with G.fst" + $cmd JOB=1:$nj $workdir/split/log/proxy.JOB.log \ + split -n r/JOB/$nj $workdir/keywords.int \| \ + generate-proxy-keywords-ex --verbose=1 --g-beam=$g_beam --g-alpha=$g_alpha\ + --proxy-beam=$beam --proxy-nbest=$nbest \ + --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ + $workdir/L2xE.fst $workdir/L1.fst $lang/G.fst ark:- ark,t:$workdir/split/proxy.JOB.fsts +fi + + +proxy_fsts="" +for j in `seq 1 $nj`; do + proxy_fsts="$proxy_fsts $workdir/split/proxy.$j.fsts" +done +cat $proxy_fsts | fsttablecompose $workdir/L1.revdup.fst ark:- ark:- |\ + fsts-project ark:- ark,scp:$workdir/keywords.fsts,-|\ + sort -o $workdir/keywords.scp diff --git a/egs/babel/s5d/local/search/create_categories.pl b/egs/babel/s5d/local/search/create_categories.pl new file mode 100755 index 00000000000..27703af20ca --- /dev/null +++ b/egs/babel/s5d/local/search/create_categories.pl @@ -0,0 +1,112 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +my $Usage = < + e.g.: $0 keywords.txt + or $0 --results results + +Allowed options: + --results : instead of keyword specification format, keyword search + results format is assumed. + +NOTE: + If you need both information, you can call the script twice (with different + parameters) and call local/search/normalize_categories.pl to merge (and normalize) + these two tables together. +EOU + +use strict; +use warnings; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; +use open qw(:std :utf8); + +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +my $result_format; +GetOptions("results", \$result_format) or do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if ( @ARGV > 1 ) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +sub QuantizeCount { + my $count = shift @_; + + if ($count <= 0) { + return "0"; + } elsif ($count == 1) { + return "000-001"; + } elsif ($count <= 5) { + return "002-005"; + } elsif ($count <=10) { + return "006-010"; + } elsif ($count <=20) { + return "011-020"; + } elsif ($count <=100) { + return "021-100"; + } else { + return "101-inf"; + } +} + +if (not $result_format ) { + my $kwlist_name=$ARGV[0]; + while (my $line = <>) { + chomp $line; + my ($kwid, $text) = split " ", $line, 2; + + my @words = split " ", $text; + printf "$kwid NGramOrder=%03d\n", scalar @words; + printf "$kwid Characters=%03d\n", length(join("", @words)); + print "$kwid $kwid\n"; + } +} else { + my $prev_kwid = ""; + my $count = 0; + + while (my $line = <>) { + chomp $line; + my @entries = split " ", $line; + next unless @entries; + + if ($prev_kwid ne $entries[0]) { + if ($prev_kwid) { + print "$prev_kwid ResCount=$count\n"; + print "$prev_kwid ResCountQuant=" . QuantizeCount($count) . "\n"; + } + $count = 0; + $prev_kwid = $entries[0]; + } + $count += 1; + } +} + + diff --git a/egs/babel/s5d/local/search/filter_by_category.pl b/egs/babel/s5d/local/search/filter_by_category.pl new file mode 100755 index 00000000000..baef4f6ac2b --- /dev/null +++ b/egs/babel/s5d/local/search/filter_by_category.pl @@ -0,0 +1,360 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2016 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < + e.g.: cat data/dev10h.pem/kws/keywords.int | \ + $0 data/dev10h.pem/kws/categories "Characters>10&&NGramOrder=2" + +Allowed options: + -f : assume the KWID (for which the filter expression is + evaluated) on k-th column (int, default 0) + +NOTE: + When the expression is empty (or missing), its evaluated as always true, + i.e. no entry will be removed from the input + +CAVEATS: + The operator '=' is equivalent to '=='. + + Do not use '-' character in the categories file if you want to use that + category in the filter expression. For example, the default setup adds + the KWID itself as a category. In case you will use the Babel-style KWIDS, + i.e. for example KW304-0008, you won't be able to use the KWID in + the expression itself (but you can still filter according to other categories) + i.e. for example + KW306-0008&&OOV=1 might be a valid expression but most probably wont do + what you want (it will get parsed as + KW306 - (8 && (OOV == 1)) which is most probably not + what you wanted. + Currently, there is no way how to make it work -- unless you rename + the categories (i.e. for example substitute '-' by '_'. While this might be + probably solved by taking the categories into account during parsing, it's + probably not that important. + +EOU + +use strict; +use warnings 'FATAL'; +use utf8; +use Switch; +use Data::Dumper; +use Scalar::Util qw(looks_like_number); +use Getopt::Long; +use POSIX; + +my $debug = ''; +my $field = 0; + +GetOptions("debug" => \$debug, + "f" => \$field) || do { + print STDERR "Cannot parse the command line parameters.\n\n"; + print $Usage . "\n"; + die "Cannot continue"; +}; + +if ((@ARGV < 1) || (@ARGV>2)) { + print STDERR "Incorrect number of parameters.\n\n"; + print $Usage . "\n"; + die "Cannot continue"; +} + +my $group_file = $ARGV[0]; +my $str_expr=""; +$str_expr=$ARGV[1] if defined($ARGV[1]); + +# Split the expression into tokens (might need some more attention +# to make it really correct +sub tokenize_string { + my $s = shift; + $s =~ s/^\s+|\s+$//g; + my @tokens = split(/ *(\&\&|\|\||\>\=|\<\=|==|!=|[\+\-\=\(\)\<\>\*\/^!]) */, $s); + #print STDERR join(", ", @tokens) . "\n"; + return @tokens; +} + + + +# precedence table should reflect the precedence of the operators in C +my %precedence = ( + #unary operators + 'u+' => 11, + 'u-' => 11, + 'u!' => 11, + + '^' => 10, + #'(' => 10, + #')' => 10, + + + #arithmetic operators + '*' => 8, + '/' => 8, + '%' => 8, + + '+' => 7, + '-' => 7, + + # logical operators + '<' => 5, + '>' => 5, + '>=' => 5, + '<=' => 5, + '=' => 4, + '==' => 4, + '!=' => 4, + '&&' => 3, + '||' => 2, +); + +my %right=( + #unary operators + 'u+' => 1, + 'u-' => 1, + 'u!' => 1, + + # this contradicts matlab, but it's what the mathematician's + # interpretation is: 2^3^4 = 2^(3^4), instead of matlabs + # left associativity 2^3^4 = (2^3)^4 + # as always -- if the order is important, use parentheses + '^' => 1, +); + +sub assoc { + my $op = $_[0]; + return (exists $right{$op}) ? $right{$op} : -1; +} + +sub looks_like_variable { + return $_[0] =~ /^[A-Za-z_][A-Za-z_0-9]*$/; +} + +sub unary_op { + my $token = shift; + my $op = shift; + my $res; + + switch( $token ) { + case 'u+' {$res = $op} + case 'u-' {$res = -$op} + case 'u!' {$res = !$op} + else {die "Unknown operator $token"} + } + + return $res; +} + +sub binary_op { + my $token = shift; + my $op2 = shift; + my $op1 = shift; + my $res; + + $op2 += 0.0; + $op1 += 0.0; + switch( $token ) { + case '^' {$res = $op1 ** $op2} + case '*' {$res = $op1 * $op2} + case '/' {$res = $op1 / $op2} + case '%' {$res = $op1 % $op2} + case '+' {$res = $op1 + $op2} + case '-' {$res = $op1 - $op2} + case '<' {$res = $op1 < $op2} + case '>' {$res = $op1 > $op2} + case '>=' {$res = $op1 >= $op2} + case '<=' {$res = $op1 <= $op2} + case '=' {$res = $op1 == $op2} + case '==' {$res = $op1 == $op2} + case '!=' {$res = $op1 != $op2} + case '&&' {$res = $op1 && $op2} + case '||' {$res = $op1 || $op2} + else {die "Unknown operator $token"} + } + + return $res; +} + +# refer to https://en.wikipedia.org/wiki/Shunting-yard_algorithm +# plus perl implementation in http://en.literateprograms.org/Shunting_yard_algorithm_(Perl) +sub to_postfix { + my @stack; + my @output = (); + my $last = ""; + + my @tokens=tokenize_string(shift); + + foreach my $token (@tokens) { + next unless $token ne ''; + + # detection of an unary operators + # not sure if this heuristics is complete + if (($token =~ /^[-+!]$/) && + (defined($precedence{$last}) || ($last eq '') || ($last eq ')'))) { + #print "Unary op: $token\n"; + $token="u$token"; + } + + if (looks_like_number($token)) { + if (looks_like_number($last) || looks_like_variable($last)) { + die "Value tokens must be separated by an operator"; + } + push @output, $token; + } elsif (looks_like_variable($token)) { + if (looks_like_number($last) || looks_like_variable($last)) { + die "Value tokens must be separated by an operator"; + } + push @output, $token; + } elsif (defined $precedence{$token}) { + my $p = $precedence{$token}; + + while (@stack) { + my $old_p = $precedence{$stack[-1]}; + last if $p > $old_p; + last if $p == $old_p and (assoc($token) >= 0); + push @output, pop @stack; + } + push @stack, $token; + } elsif ($token eq '(') { + push @stack, $token; + } elsif ($token eq ')') { + my $t; + do { + $t=pop @stack; + push @output, $t unless $t eq '(' + } while ($t && ($t ne '(')); + die "No matching (" unless $t eq '('; + #print "stack=[" . join(", ", @stack) . "] output=[" . join(", ", @output) . "]\n" ; + } else { + print "stack=[" . join(", ", @stack) . "] output=[" . join(", ", @output) . "]\n" ; + die "Unknown token \"$token\" during parsing the expression"; + } + $last=$token; + } + + # dump the rest of the operators + while (@stack) { + my $t = pop @stack; + die "No matching )" if $t eq '('; + push @output, $t; + } + + # final postfix expression + return @output; +} + +# this follows the standard RPM (postfix) expression evaluation +# the only possibly slightly confusing part is that when we encounter +# a variable, we lookup it's value in %vars. By default, (i.e. if the variable +# is not preset in the dict), the variable evaluates to 0 (false) +sub evaluate_postfix { + my @expression = @{$_[0]}; + my %vars= %{$_[1]}; + + my @stack = (); + foreach my $token (@expression) { + if (looks_like_number($token)) { + push @stack, $token; + } elsif (looks_like_variable($token)) { + my $val = 0; + if (defined $vars{$token}) { + $val = $vars{$token}; + } + push @stack, $val; + } elsif (defined $precedence{$token}) { + my $res; + if ( $token =~ /^u.*$/) { + my $op = pop @stack; + $res = unary_op($token, $op); + } else { + my $op1 = pop @stack; + my $op2 = pop @stack; + $res = binary_op($token, $op1, $op2); + } + push @stack, $res; + } else { + die "Unknown token: $token, expression=[" . join(" ", @expression) . "]\n"; + } + #print STDERR "token = $token; stack = [" . join(' ', @stack) . "]\n"; + + } + if (@stack != 1) { + my $expr = join(" ", @expression); + print STDERR "expression = [$expr]; stack = [" . join(' ', @stack) . "]\n"; + die "The operators did not reduce the stack completely!" if @stack != 1; + } + return pop @stack; +} + + +#--print "infix = [" . join(' ', @tokens) . "]\n"; +#--my @exp = to_postfix(@tokens); +#--my %vals = (A=>50, C => -3); +#--print "output = [" . join(' ', @exp) . "]\n"; +#-- +#--print evaluate_postfix(\@exp, \%vals); + + +my @expression = to_postfix($str_expr); + +my %GROUPS; +#Read the groups table +open(G, $ARGV[0]) or die "Cannot open the group table $ARGV[0]"; +while (my $line = ) { + my @entries = split(" ", $line); + my $kwid = shift @entries; + + foreach my $group (@entries) { + my @entries = split "=", $group; + if (@entries == 2) { + $GROUPS{$kwid}->{$entries[0]} = $entries[1]; + } elsif (@entries ==1 ) { + $GROUPS{$kwid}->{$group} = 1; + } else { + die "Unknown format of the category $group"; + } + } +} +close(G); + +my $let_all_pass=0; +if (not @expression) { + $let_all_pass=1; +} + +while (my $line = ) { + #shortcut if the "ALL" groups is used + if ($let_all_pass == 1) { + print $line; + next; + } + + my @entries = split(" ", $line); + my $kwid = $entries[$field]; + + my $res = evaluate_postfix(\@expression, $GROUPS{$kwid}); + if ($res) { + print $line; + } else { + print STDERR "Not keeping: $line" if $debug; + } + +} + + diff --git a/egs/babel/s5d/local/search/filter_kws_results.pl b/egs/babel/s5d/local/search/filter_kws_results.pl new file mode 100755 index 00000000000..f4e6589c50a --- /dev/null +++ b/egs/babel/s5d/local/search/filter_kws_results.pl @@ -0,0 +1,189 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < > output + e.g.: gunzip -c exp/tri5/kws/result.*.gz | $0 > exp/tri5/kws/results + +Allowed options: + --nbest : how many best results (for each KWID) should be printed + (int, default -1, i.e. no limit) + --duptime : duplicates detection, tolerance (in frames) for being + the same hits (int, default = 50) + --likes + --probs + +CAVEATS: + The script tries to be memory-effective. The impact of this is that we + assume the results are sorted by KWID (i.e. all entries with the same KWID + are in a continuous block). The user is responsible for sorting it. +EOU + +use strict; +use warnings; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; + +# if parameter nbest > 0, then filters the result list so that there is no +# more than nbest hits in the output for each of the KWID +# + +my $nbest = -1; +my $duptime = 50; +my $likes = 0; + +#print STDERR join(" ", $0, @ARGV) . "\n"; +GetOptions ("nbest=f" => \$nbest, + "likes" => \$likes, + "probs" => sub{ $likes = 0}, + "duptime=i" => \$duptime) || do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if (@ARGV != 0) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +# Function for sorting +sub KwslistOutputSort { + if ($a->[0] ne $b->[0]) { + if ($a->[0] =~ m/[0-9]+$/ && $b->[0] =~ m/[0-9]+$/) { + ($a->[0] =~ /([0-9]*)$/)[0] <=> ($b->[0] =~ /([0-9]*)$/)[0] + } else { + $a->[0] cmp $b->[0]; + } + } elsif ($a->[5] ne $b->[5]) { + $b->[5] <=> $a->[5]; + } else { + $a->[1] cmp $b->[1]; + } +} + +sub KwslistDupSort { + my ($a, $b, $duptime) = @_; + if ($a->[1] ne $b->[1]) { + #file + $a->[1] cmp $b->[1]; + } elsif (abs($a->[2]-$b->[2]) >= $duptime){ + #start + $a->[2] <=> $b->[2]; + } elsif ($a->[4] ne $b->[4]) { + #score + $b->[4] <=> $a->[4]; + } else { + #end time + $b->[3] <=> $a->[3]; + } +} + +my @RESULTS; +my %SEEN_KWS; +my $kw = ""; + +while ( my $line = ) { + chomp $line; + my @F = split " ", $line; + @F == 5 || die "$0: Bad number of columns in raw results \"$line\"\n"; + + $F[4] = -$F[4] if $likes; + + if ($F[0] eq $kw) { + push @RESULTS, \@F; + } elsif ($kw eq "" ) { + @RESULTS = (); + push @RESULTS, \@F; + $kw = $F[0]; + } else { + + my @results; + my @tmp = sort { KwslistDupSort($a, $b, $duptime) } @RESULTS; + + @results = (); + if (@tmp >= 1) {push(@results, $tmp[0])}; + for (my $i = 1; $i < scalar(@tmp); $i ++) { + my $prev = $results[-1]; + my $curr = $tmp[$i]; + if ((abs($prev->[2]-$curr->[2]) < $duptime ) && + ($prev->[1] eq $curr->[1])) { + next; + } else { + push(@results, $curr); + } + } + + # this is probably needed only when nbest > 0 + @results = sort { ($b->[4] + 0.0) <=> ($a->[4] + 0.0) } @results; + + my $len; + if( $nbest > 0) { + $len = scalar @results < $nbest ? scalar @results : $nbest; + } else { + $len = scalar @results; + } + for (my $i=0; $i < $len; $i++) { + $results[$i]->[4] = -$results[$i]->[4] if $likes; + print join(" ", @{$results[$i]}) . "\n"; + } + + @RESULTS = (); + push @RESULTS, \@F; + $kw = $F[0]; + } +} +do { + my @results; + my @tmp = sort { KwslistDupSort($a, $b, $duptime) } @RESULTS; + + @results = (); + if (@tmp >= 1) {push(@results, $tmp[0])}; + for (my $i = 1; $i < scalar(@tmp); $i ++) { + my $prev = $results[-1]; + my $curr = $tmp[$i]; + if ((abs($prev->[2]-$curr->[2]) < $duptime ) && + ($prev->[1] eq $curr->[1])) { + next; + } else { + push(@results, $curr); + } + } + + # this is probably needed only when nbest > 0 + @results = sort { ($b->[4] + 0.0) <=> ($a->[4] + 0.0) } @results; + + my $len; + if( $nbest > 0) { + $len = scalar @results < $nbest ? scalar @results : $nbest; + } else { + $len = scalar @results; + } + for (my $i=0; $i < $len; $i++) { + $results[$i]->[4] = -$results[$i]->[4] if $likes; + print join(" ", @{$results[$i]}) . "\n"; + } +} + + diff --git a/egs/babel/s5d/local/search/normalize.sh b/egs/babel/s5d/local/search/normalize.sh new file mode 100755 index 00000000000..38054f75879 --- /dev/null +++ b/egs/babel/s5d/local/search/normalize.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +# case_insensitive=true +extraid= +min_lmwt=8 +max_lmwt=12 +cmd=run.pl +stage=0 +ntrue_from= +# End configuration section. + +help_message="$0: score the kwslist using the F4DE scorer from NIST + Example: + $0 [additional-parameters] + where the most important additional parameters can be: + --extraid #for using, when a non-default kws tasks are setup + (using the kws_setup.sh --extraid) for a kaldi-single data-dir" + +echo $0 $@ +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 3 ]; then + printf "FATAL: incorrect number of variables given to the script\n\n" + printf "$help_message\n" + exit 1; +fi + +set -e -o pipefail + +langdir=$1 +if [ -z $extraid ] ; then + kwsdatadir=$2/kws +else + kwsdatadir=$2/kwset_${extraid} +fi +kwsoutputdir="$3" + +trials=$(cat $kwsdatadir/trials) +mkdir -p $kwsoutputdir/log/ + +if [ $stage -le 0 ] ; then + for LMWT in $(seq $min_lmwt $max_lmwt) ; do + mkdir -p ${kwsoutputdir}_$LMWT/details/ + + cp ${ntrue_from}_$LMWT/details/ntrue ${kwsoutputdir}_$LMWT/details/ntrue + cp ${ntrue_from}_$LMWT/details/ntrue_raw ${kwsoutputdir}_$LMWT/details/ntrue_raw + echo "$ntrue_from" > ${kwsoutputdir}_$LMWT/details/ntrue_from + done +fi + +if [ $stage -le 1 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/normalize.LMWT.log \ + cat ${kwsoutputdir}_LMWT/results \|\ + local/search/normalize_results_kst.pl --trials $trials --ntrue-scale \$\(cat ${kwsoutputdir}_LMWT/details/ntrue\)\ + \> ${kwsoutputdir}_LMWT/details/results + +fi + +if [ $stage -le 2 ]; then +if [ -f $kwsdatadir/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $kwsdatadir/f4de_attribs #override the previous variables + + ecf=$kwsdatadir/ecf.xml + kwlist=$kwsdatadir/kwlist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_write_kwslist.LMWT.log \ + mkdir -p ${kwsoutputdir}_LMWT/f4de/\; \ + cat ${kwsoutputdir}_LMWT/details/results \| \ + utils/int2sym.pl -f 2 $kwsdatadir/utt.map \| \ + local/search/utt_to_files.pl --flen $flen $kwsdatadir/../segments \|\ + local/search/write_kwslist.pl --flen $flen --language $language \ + --kwlist-id $kwlist_name \> ${kwsoutputdir}_LMWT/f4de/kwslist.xml + +fi +fi + +echo "$0: Done" +exit 0; + + diff --git a/egs/babel/s5d/local/search/normalize_categories.pl b/egs/babel/s5d/local/search/normalize_categories.pl new file mode 100755 index 00000000000..f3354e8c4d4 --- /dev/null +++ b/egs/babel/s5d/local/search/normalize_categories.pl @@ -0,0 +1,89 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage=< > categories + e.g.: cat partial_categories.* | $0 > categories + +Allowed options: + --one-per-line : by default, there will be only one line for each KWID + THis option changes the output format so that for + each pair "KWID CATEGORY" will be on a single line. + +Note: + Reads the stream of categories information in the format + + keyword-ID1 category category2 + keyword-ID2 category2 + keyword-ID1 category category2 + + The duplicities are allowed (and will be removed). + Multiple categories per line are allowed (and will be merged) + + The purpose of the script is to be able to merge the information from different + scripts. Each script can generate it's own information about categories + and this script can be then used to merge these partial tables into one global +EOU + +use strict; +use warnings; +use utf8; +use Getopt::Long; +use Data::Dumper; +use POSIX; + +my $one_per_line; + +GetOptions("one-per-line", \$one_per_line) or + do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if (@ARGV != 0) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +my %GROUPS; + +while (my $line=) { + chomp $line; + my @entries = split " ", $line; + + die "The line \"$line\" does not have correct format" if @entries < 2; + + my $kwid=shift @entries; + for my $category (@entries) { + $GROUPS{$kwid}->{$category} = 1; + } +} + +for my $kwid (sort keys %GROUPS) { + if ($one_per_line) { + foreach my $category (sort keys %{$GROUPS{$kwid}} ) { + print $kwid . " " . $category . "\n"; + } + } else { + print $kwid . " " . join(" ", sort keys %{$GROUPS{$kwid}}) . "\n"; + } +} diff --git a/egs/babel/s5d/local/search/normalize_results_kst.pl b/egs/babel/s5d/local/search/normalize_results_kst.pl new file mode 100755 index 00000000000..e57b947f278 --- /dev/null +++ b/egs/babel/s5d/local/search/normalize_results_kst.pl @@ -0,0 +1,203 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +my $Usage = < results.normalized + +Allowed options: + --probs : the input is probabilities instead of neg-loglikelihoods + + --duration|--trials : size of the searched collectiona in seconds (float) + --beta : the FA vs MISS rate (float, default 999.9) + --ntrue-scale : scales for scaling the expected count of true hits (float, default 1.0) + --thr|--threshold : the decision threshold (float, default 0.5) +EOU + +use strict; +use warnings; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; + +my $ntrue_scale = 1.0; +my $global_thr = 0.5; +my $beta = 999.9; +my $duration = 35785.578; +my $ntrue_table_filename; +my $probs=0; +my $bsum_filename; + +GetOptions("duration|trials=f" => \$duration, + "ntrue-scale=f" => \$ntrue_scale, + "beta=f" => \$beta, + "probs" => \$probs, + "thr|threshold=f" => \$global_thr, + "ntrue-table=s" => \$ntrue_table_filename, + "bsum-table=s" => \$bsum_filename) or do + { + print STDERR "$0: Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "$0: Cannot continue\n" +}; + +if (@ARGV != 0) { + print STDERR "$0: Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "$0: Cannot continue\n" +} + +sub ComputeKST { + my @instances = @{shift @_}; + my $ntrue_scale = shift @_; + my %ntrue_table = %{shift @_}; + + + my $ntrue = 0; + foreach my $elem(@instances) { + $ntrue += $elem->[4]; + } + #$ntrue = $ntrue / @instances; + if (defined ($ntrue_table{$instances[0]->[0]})) { + #print STDERR "For KW " . $instances[0]->[0] . " using the value " . $ntrue_table{$instances[0]->[0]} . "\n"; + $ntrue = $ntrue * $ntrue_table{$instances[0]->[0]}; + } else { + #print STDERR "Using the default vsalue $ntrue_scale\n"; + $ntrue = $ntrue * $ntrue_scale; + } + + my $thr = $beta * $ntrue / ( $duration + $ntrue * ($beta - 1)); + return $thr; +} + +sub ComputeKSTWithExpected { + my @instances = @{shift @_}; + my %expected_table = %{shift @_}; + my $ntrue_scale = shift @_; + my %ntrue_table = %{shift @_}; + + + my $ntrue = $expected_table{$instances[0]->[0]}; + #$ntrue = $ntrue / @instances; + if (defined ($ntrue_table{$instances[0]->[0]})) { + #print STDERR "For KW " . $instances[0]->[0] . " using the value " . $ntrue_table{$instances[0]->[0]} . "\n"; + $ntrue = $ntrue * $ntrue_table{$instances[0]->[0]}; + } else { + #print STDERR "Using the default vsalue $ntrue_scale\n"; + $ntrue = $ntrue * $ntrue_scale; + } + + my $thr = $beta * $ntrue / ( $duration + $ntrue * ($beta - 1)); + return $thr; +} +sub NormalizeScores { + my @instances = @{shift @_}; + my $thr = shift @_; + my $global_thr = shift @_; + + + if ($thr == 0) { + $thr = 0.001; + } + my $q = log($global_thr)/log($thr); + + foreach my $elem(@instances) { + $elem->[4] = pow($elem->[4], $q); + } +} + +sub WriteResults { + my @instances = @{shift @_}; + + foreach my $elem(@instances) { + print join(" ", @{$elem}) . "\n"; + die "$0: " . join(" ", @{$elem}) . "\n" if $elem->[-1] > 1.0; + } + +} + +my $KWID; +my @putative_hits; +my %NTRUE_TABLE = (); + +my %BSUM=(); +if (defined $bsum_filename) { + open(BSUMF, $bsum_filename) or die "$0: Cannot open $bsum_filename"; + while (my $line = ) { + chomp $line; + next unless (($line =~ m/^\s*KW/) || ($line =~ m/^Keyword\s*KW/)); + $line =~ s/^Keyword//g; + $line =~ s/^\s+|\s+$//g; + my @entries = split /\s*\|\s*/, $line; + $BSUM{$entries[0]} = $entries[12]; + } + close(BSUMF); +} + +if ( defined $ntrue_table_filename) { + open (F, $ntrue_table_filename) or die "$0: Cannot open the Ntrue-table file\n"; + while (my $line = ) { + my @entries=split(" ", $line); + + die "$0: The Ntrue-table does not have expected format\n" if @entries != 2; + $NTRUE_TABLE{$entries[0]} = $entries[1] + 0.0; + } + close (F); +} + +while (my $line = ) { + chomp $line; + (my $kwid, my $file, my $start, my $end, my $score) = split " ", $line; + + if ($KWID && ($kwid ne $KWID)) { + + my $thr = ComputeKST(\@putative_hits, $ntrue_scale, \%NTRUE_TABLE ); + if ((defined $BSUM{$KWID}) && (scalar @putative_hits > 100)) { + print STDERR "$0: $KWID $thr $BSUM{$KWID} " . log($thr)/log($global_thr) . "\n"; + my $old_thr = $thr; + $thr = pow($BSUM{$KWID}, log($thr)/log($global_thr)); + } + if ($thr < 0.9999 ) { + NormalizeScores(\@putative_hits, $thr, $global_thr); + WriteResults(\@putative_hits); + } + + $KWID = $kwid; + @putative_hits = (); + } elsif ( not $KWID ) { + $KWID = $kwid; + } + + unless ($probs) { + $score = exp(-$score); + } + push @putative_hits, [$kwid, $file, $start, $end, $score]; +} + +if ($KWID) { + my $thr = ComputeKST(\@putative_hits, $ntrue_scale, \%NTRUE_TABLE ); + if ((defined $BSUM{$KWID}) && (scalar @putative_hits > 100)) { + $thr = pow($BSUM{$KWID}, log($thr)/log($global_thr)); + } + if ($thr < 0.9999 ) { + NormalizeScores(\@putative_hits, $thr, $global_thr); + WriteResults(\@putative_hits); + } +} + diff --git a/egs/babel/s5d/local/search/per_category_stats.pl b/egs/babel/s5d/local/search/per_category_stats.pl new file mode 100755 index 00000000000..d14636dcc0f --- /dev/null +++ b/egs/babel/s5d/local/search/per_category_stats.pl @@ -0,0 +1,326 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# Takes the alignment.csv and the category tables and computes the per-category +# statistics including the oracle measures (OTWV, MTWV, STWV) +# Is not particulary effective (for example, it computes the oracle measures +# for each keyword several times (once for each category the keyword is in); +# To achieve at least partial speed-up, we cache some of the partial statistics +# The caching gave us speed improvement approx. from 22s down to 14s +# +# The lines in output starting with '#' are intended as comments only -- you +# can filter them out using grep -v '^#' +# The first comment line contains header, +# The second cooment line contains column numbers (to make easier using cut -f) +# -- you don't have to count the fields, just use the present +# number of the field +# +# Compatibility: +# We tried to make the numbers comparable with F4DE output. If there is a large +# difference, something is probably wrong and you should report it +# The column names should be compatible (to large extent) with F4DE output +# files (sum.txt, bsum.txt, cond.bsum.txt). Our intention was, however, +# to make this file easily grepable/machine-processable, so we didn't honor +# the original F4DE file fomrat +# +# Usage: +# It reads the alignment.csv from the STDIN. +# Moreover, it expects exactly two arguments: number of trials and +# the category table +# I.e. +# local/search/per_category_stats.pl +# +# Example: +# cat alignment.csv | perl local/search/per_category_stats.pl `cat data/dev10h.pem/extra_kws/trials` data/dev10h.pem/extra_kws/categories +# +# Additional parameters +# --beta # beta value (weight of FAs), defailt 999.9 +# --sweep-step # sweep step for the oracle measures +# +# TODO +# Document what each field means (might be slightly tricky, as even F4DE +# does not document the exact meaning of some of the fields. +# +# ATWV - actual Term-Weighted Value (TWV for the threshold 0.5) +# MTWV - Maximum Term-Weighted Value (TWV for the threshold that maximizes +# the given category's TWV +# OTWV - Optimum Term-Weighted Value (TWV assuming the decision threshold +# for each Term/KW is determined optimally) +# STWV - Supreme TWV - essentially Lattice Recall + +use strict; +use warnings FATAL => 'all'; +use utf8; +use List::Util; +use Data::Dumper; +use Getopt::Long; +use Scalar::Util qw(looks_like_number); + +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +my %CATEGORIES; +my %STATS; +my %K; + +my $beta=999.9; +my $step_size=0.005; +my $threshold = 0.5; +my $enable_caching = 1; + +my $cat_maxlen = 9; #Must accomodate string "#CATEGORY" in the header +my $field_size = 9; + +my $L = int(1.0/$step_size) + 1; + +GetOptions("beta=f" => \$beta, + "sweep-step=f" => \$step_size, + "disable-caching" => sub{ $enable_caching=''; } + ) or die "Cannot process the input options (possibly unknown switch)"; + +die "Unsupported number of arguments." if @ARGV != 2; +if ( not looks_like_number($ARGV[0])) { + die "The first parameter must be a float number (number of trials) -- got $ARGV[0]"; +} + +my $T= 0.0 + $ARGV[0]; + + +open(CAT, $ARGV[1]) or die("Cannot open categories file $ARGV[1]"); +while(my $line = ) { + my @entries =split(" ", $line); + + die "Unknown format of category line: \"$line\"" if scalar @entries < 2; + my $kw = shift @entries; + + + if (not defined $STATS{$kw}->{fa_sweep}) { + $STATS{$kw}->{fa} = 0; + $STATS{$kw}->{corr} = 0; + $STATS{$kw}->{miss} = 0; + $STATS{$kw}->{lattice_miss} = 0; + $STATS{$kw}->{ntrue} = 0; + $STATS{$kw}->{count} = 0; + $STATS{$kw}->{corrndet} = 0; + + my @tmp1 = (0) x ($L+1); + $STATS{$kw}->{fa_sweep} = \@tmp1; + my @tmp2 = (0) x ($L+1); + $STATS{$kw}->{corr_sweep} = \@tmp2; + } + + push @entries, "ALL"; + foreach my $cat (@entries) { + $cat_maxlen = length($cat) if length($cat) > $cat_maxlen; + push @{$CATEGORIES{$cat}}, $kw; + $K{$cat} += 1; + } +} +close(CAT); +#print Dumper(\%CATEGORIES); + + +#print STDERR "Reading the whole CSV\n"; +my $i = 0; +my $dummy=; +while (my $line=) { + chomp $line; + my @entries = split(",", $line); + + die "Unknown format of category line: \"$line\"" if scalar @entries != 12; + + + my $termid = $entries[3]; + my $ref_time = $entries[5]; + my $score = $entries[9]; + my $decision=$entries[10]; + my $ref = $entries[11]; + + if (not defined($STATS{$termid}->{ntrue})) { + print STDERR "Term $termid not present in the category table, skipping\n"; + next + } + #print "$termid, ref_time=$ref_time, score=$score, start=" . int($score/$step_size + 0.5) . ", L=$L\n" if $termid eq "KW303-00025"; + if ($score) { + $score = 1.0 if $score > 1.0; + my $q = int($score/$step_size) + 1; + for (my $i = 0; $i < $q ; $i += 1) { + if ($ref_time) { + $STATS{$termid}->{corr_sweep}->[$i] += 1; + } else { + $STATS{$termid}->{fa_sweep}->[$i] += 1; + } + } + } + + #print STDERR "$line "; + $STATS{$termid}->{count} += 1 if $score; + + #print Dumper($ref_time, $score, $STATS{$termid}) if ($ref_time); + if (($decision eq "YES") && ($ref eq "FA")) { + $STATS{$termid}->{fa} += 1; + } elsif (($decision eq "YES") && ($ref eq "CORR")) { + $STATS{$termid}->{corr} += 1; + $STATS{$termid}->{ntrue} += 1; + } elsif ($ref eq "MISS") { + $STATS{$termid}->{lattice_miss} += 1 unless $decision; + $STATS{$termid}->{miss} += 1; + $STATS{$termid}->{ntrue} += 1; + } elsif ($ref eq "CORR!DET") { + $STATS{$termid}->{corrndet} += 1; + } + #print STDERR "Done\n"; + +} + +#print STDERR "Read the whole CSV\n"; + +# Create the header +my $H=sprintf "%*s", $cat_maxlen-1, "CATEGORY"; +my @int_vals = map{ sprintf("%*s", $field_size, $_) } (split " ", "#KW #Targ #NTarg #Sys #CorrDet #CorrNDet #FA #MISS"); +my @float_vals = map{ sprintf("%*s", $field_size, $_) } (split " ", "ATWV MTWV OTWV STWV PFA MPFA OPFA PMISS MPMISS OPMISS THR MTHR OTHR"); +print "#" . join(" ", $H, @int_vals, @float_vals) . "\n"; +# Create secondary header with column numbers (to make cut'ing easier +my @col_nrs = map { sprintf "%*d", $field_size, $_ } (2.. 1+@int_vals + @float_vals); +print "#" . join(" ", sprintf("%*d", $cat_maxlen-1, 1), @col_nrs) . "\n"; +# End of the header + +my %CACHE = (); + +foreach my $cat (sort keys %CATEGORIES) { + my $K = 0; + my $ATWV = 0; + my $STWV = 0; + my $PMISS = 0; + my $PFA = 0; + + my $OTWV = 0; + my $OPMISS = 0; + my $OPFA = 0; + my $OTHR = 0; + + my $NTRUE = 0; + my $CORR = 0; + my $FA = 0; + my $MISS = 0; + my $COUNT = 0; + my $CORRNDET = 0; + + my @MTWV_SWEEP = (0) x ($L+1); + my @MPMISS_SWEEP = (0) x ($L+1); + my @MPFA_SWEEP = (0) x ($L+1); + #print Dumper($cat, $CATEGORIES{$cat}); + foreach my $kw (sort @{$CATEGORIES{$cat}}) { + #print Dumper($kw, $STATS{$kw}); + next unless defined $STATS{$kw}->{ntrue}; + next if $STATS{$kw}->{ntrue} == 0; + my $pmiss = 1 - $STATS{$kw}->{corr}/$STATS{$kw}->{ntrue}; + my $pfa = $STATS{$kw}->{fa}/($T - $STATS{$kw}->{ntrue}); + my $twv = 1 - $pmiss - $beta * $pfa; + my $stwv = 1 - $STATS{$kw}->{lattice_miss}/$STATS{$kw}->{ntrue}; + + $NTRUE += $STATS{$kw}->{ntrue}; + $CORR += $STATS{$kw}->{corr}; + $CORRNDET += $STATS{$kw}->{corrndet}; + $FA += $STATS{$kw}->{fa}; + $MISS += $STATS{$kw}->{miss}; + $COUNT += $STATS{$kw}->{count} if $STATS{$kw}->{ntrue} > 0; + + $ATWV = ($K * $ATWV + $twv) / ($K + 1); + $PMISS = ($K * $PMISS + $pmiss) / ($K + 1); + $PFA = ($K * $PFA + $pfa) / ($K + 1); + + $STWV = ($K * $STWV + $stwv ) / ($K + 1); + + $pmiss = 0; + $pfa = 0; + $twv = -99999; + my $othr = -0.1; + #print Dumper($kw, $STATS{$kw}); + if (($enable_caching) && (defined $CACHE{$kw})) { + ($pfa, $pmiss, $twv, $OTHR, my $twv_sweep_cache, my $pfa_sweep_cache, my $pmiss_sweep_cache) = @{$CACHE{$kw}}; + @MTWV_SWEEP = map {($K * $MTWV_SWEEP[$_] + $twv_sweep_cache->[$_]) / ($K + 1)} (0..$L); + @MPFA_SWEEP = map {($K * $MPFA_SWEEP[$_] + $pfa_sweep_cache->[$_]) / ($K + 1)} (0..$L); + @MPMISS_SWEEP = map{($K * $MPMISS_SWEEP[$_] + $pmiss_sweep_cache->[$_]) / ($K + 1)} (0..$L); + } else { + my @twv_sweep_cache = (0) x ($L+1); + my @pmiss_sweep_cache = (0) x ($L+1); + my @pfa_sweep_cache = (0) x ($L+1); + + for (my $i = 0; $i <= $L; $i += 1) { + my $sweep_pmiss = 1 - $STATS{$kw}->{corr_sweep}->[$i]/$STATS{$kw}->{ntrue}; + my $sweep_pfa = $STATS{$kw}->{fa_sweep}->[$i]/($T - $STATS{$kw}->{ntrue}); + my $sweep_twv = 1 - $sweep_pmiss - $beta * $sweep_pfa; + if ($twv < $sweep_twv) { + $pfa = $sweep_pfa; + $pmiss = $sweep_pmiss; + $twv = $sweep_twv; + $OTHR = ($i - 1) * $step_size; + } + $pmiss_sweep_cache[$i] = $sweep_pmiss; + $pfa_sweep_cache[$i] = $sweep_pfa; + $twv_sweep_cache[$i] = $sweep_twv; + + #print "$i $sweep_pmiss $sweep_pfa $sweep_twv\n"; + $MTWV_SWEEP[$i] = ($K * $MTWV_SWEEP[$i] + $sweep_twv) / ($K + 1); + $MPFA_SWEEP[$i] = ($K * $MPFA_SWEEP[$i] + $sweep_pfa) / ($K + 1); + $MPMISS_SWEEP[$i] = ($K * $MPMISS_SWEEP[$i] + $sweep_pmiss) / ($K + 1); + } + $CACHE{$kw} = [$pfa, $pmiss, $twv, $OTHR, \@twv_sweep_cache, \@pfa_sweep_cache, \@pmiss_sweep_cache]; + } + + $OTWV = ($K * $OTWV + $twv) / ($K + 1); + $OPMISS = ($K * $OPMISS + $pmiss) / ($K + 1); + $OPFA = ($K * $OPFA + $pfa) / ($K + 1); + $K += 1; + } + + my $max_idx = 0; + my $MTWV = $MTWV_SWEEP[0]; + my $MPMISS = $MPMISS_SWEEP[0]; + my $MPFA = $MPFA_SWEEP[0]; + my $MTHR = 0; + for(my $i = 1; $i <= $L; $i += 1) { + if ($MTWV_SWEEP[$i] > $MTWV) { + $max_idx = $i; + $MTWV = $MTWV_SWEEP[$i]; + $MPMISS = $MPMISS_SWEEP[$i]; + $MPFA = $MPFA_SWEEP[$i]; + $MTHR = ($i - 1) * $step_size; + } + } + + if ($K > 1) { + $OTHR = "NA"; + } + + my $ntarg = $CORRNDET + $FA; + + my @abs_nrs = ($K, $NTRUE, $ntarg, $COUNT, $CORR, $CORRNDET, $FA, $MISS); + @abs_nrs = map { sprintf "%*d", $field_size, $_ } @abs_nrs; + my @flt_nrs = map { $_ eq "NA" ? sprintf "%6s", $_ : sprintf "% 6.3g", $_ } ($ATWV, $MTWV, $OTWV, $STWV, $PFA, $MPFA, $OPFA, $PMISS, $MPMISS, $OPMISS, 0.5, $MTHR, $OTHR); + @flt_nrs = map {sprintf "%*s", $field_size, $_} @flt_nrs; + + my $nrs = join(" ", @abs_nrs, @flt_nrs); + + $cat = sprintf("%*s", $cat_maxlen, $cat); + print "$cat $nrs \n"; +} + + diff --git a/egs/babel/s5d/local/search/rttm_to_hitlists.sh b/egs/babel/s5d/local/search/rttm_to_hitlists.sh new file mode 100755 index 00000000000..6d4af6fb916 --- /dev/null +++ b/egs/babel/s5d/local/search/rttm_to_hitlists.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +flen=0.01 +segments= +utt_table= +# End configuration section +echo $0 "$@" +. ./utils/parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +if [ $# -ne 5 ] ; then + echo "Usage: " + exit 1 +fi + +rttm=$1 +kwlist=$2 +ecf=$3 +workdir=$4 +output=$5 + +for f in $rttm $kwlist $ecf ; do + [ ! -f $f ] && echo "File \"$f\" does not exist." && exit 1 +done + +mkdir -p $workdir + +{ + echo '' + echo '' +} > $workdir/kwslist.xml + +kwseval=`which KWSEval` +if [ -z "$kwseval" ] ; then + echo >&2 "KWSEval from F4DE tools not found" + exit 1 +fi + +bash -x $kwseval -c -r $rttm -e $ecf -t $kwlist -s $workdir/kwslist.xml -f $workdir/ +grep -E ",,MISS" $workdir/alignment.csv | \ + perl -e ' + binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + binmode STDERR, ":utf8"; + + use Data::Dumper; + $flen='$flen'; + %SEGMENTS=(); + if ((defined $ARGV[0]) && ( $ARGV[0] ne "" )) { + open(F, $ARGV[0]) or die "Cannot open \"$ARGV[0]\""; + while() { + @entries = split(" ", $_); + $entries[2] = int($entries[2]/$flen+0.5); + $entries[3] = int($entries[3]/$flen+0.5); + push @{$SEGMENTS{$entries[1]}}, [@entries]; + } + close(F); + } + + while() { + chomp; + @entries_tmp = split(",", $_); + @entries = ($entries_tmp[3], + $entries_tmp[1], + int($entries_tmp[5]/$flen + 0.5), + int($entries_tmp[6]/$flen + 0.5), + 1.0 + ); + + $fid = $entries[1]; + $start = $entries[2]; + $end = $entries[3]; + + if ((defined $ARGV[0]) && ( $ARGV[0] ne "" )) { + $found = 0; + foreach $entry ( @{$SEGMENTS{$fid}} ) { + if (($start >= $entry->[2]) && ($end <= $entry->[3])) { + $relstart = $start - $entry->[2]; + $relend = $end - $entry->[2]; + print join(" ", $entries[0], $entry->[0], $relstart, $relend, 1.0) . "\n"; + if ($found eq 1) { + print STDERR "WARNING: Segments file generates duplicate hits for the entry"; + print STDERR join(" ", @entries_tmp) . "\n"; + } + $found = 1; + } + } + if ($found eq 0) { + print STDERR "WARNING: Segments file does not allow for finding entry "; + print STDERR join(" ", @entries_tmp) . "\n"; + } + } else { + print join(" ", @entries) . "\n"; + } + } + ' "$segments" | sort | { + if [ -z "$utt_table" ]; then + cat - + else + utils/sym2int.pl -f 2 $utt_table + fi +} > $output diff --git a/egs/babel/s5d/local/search/run_phn_search.sh b/egs/babel/s5d/local/search/run_phn_search.sh new file mode 100755 index 00000000000..e4dba529b3d --- /dev/null +++ b/egs/babel/s5d/local/search/run_phn_search.sh @@ -0,0 +1,139 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=2 +dir=dev10h.pem +# End configuration section +. ./conf/common_vars.sh +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./lang.conf + +#Example script how to run keyword search using the Kaldi-native pipeline + + +if [ $stage -le 0 ]; then + local/generate_confusion_matrix.sh --nj 64 --cmd "$decode_cmd" \ + exp/sgmm5_denlats/dengraph/ exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix +fi + +if [ $stage -le 1 ] ; then + local/train_g2p.sh --cmd "$decode_cmd" data/local/lexicon.txt exp/g2p +fi + +dataset=${dir%%.*} +datatype=${dir#*.} + +lang=data/lang.phn +if [ "$dir" == "$dataset" ]; then + data=data/${dataset}.phn +else + data=data/${dataset}.phn.${datatype} +fi + +set +o nounset +eval kwsets=${!dataset_kwlists[@]} +eval my_ecf_file=\$${dataset}_ecf_file +eval my_rttm_file=\$${dataset}_rttm_file +set -o nounset + +my_array_name=${dataset}_kwlists + +eval kwsets=\( \${!$my_array_name[@]} \) +declare -p kwsets +for set in ${kwsets[@]} ; do + eval my_kwlist=\${$my_array_name[$set]} + declare -p my_kwlist +done +declare -p my_ecf_file +declare -p my_rttm_file + +if [ $stage -le 2 ] ; then + + for set in ${kwsets[@]} ; do + + eval my_kwlist=\${$my_array_name[$set]} + + #This will set up the basic files and converts the F4DE files into Kaldi-native format + local/search/setup.sh $my_ecf_file $my_rttm_file "${my_kwlist}" \ + $data $lang $data/kwset_${set} + + # we will search for the IV words normally (i.e. will look for the specificsequence + # of the words + local/search/compile_keywords.sh --filter "OOV=0&&Characters>2"\ + $data/kwset_${set} $lang $data/kwset_${set}/tmp.2 + + # in addition to the direct search of the IV words, we will set up the proxy + # search as well -- we will use lower nbest, compared to OOV=1 + #-- local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --category "OOV=0" \ + #-- --beam 5 --nbest 10 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + #-- ${data}/kwset_${set} ${lang} ${data}/${set}_oov_kws/tmp/L1.lex \ + #-- ${data}/${set}_oov_kws/tmp/L1.lex ${data}/kwset_${set}/tmp.3 + + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --filter "OOV=1&&Characters>4"\ + --beam 5 --nbest 100 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + ${data}/kwset_${set} ${lang} data/local/dict.phn/lexiconp.txt exp/g2p \ + ${data}/kwset_${set}/tmp.4 + + # and finally, replace the categories by the word-level categories + cp data/${dir}/kwset_${set}/categories $data/kwset_${set}/categories + done +fi + +if [ $stage -le 3 ] ; then + for set in ${kwsets[@]} ; do + fsts-union scp:<(sort $data/kwset_${set}/tmp*/keywords.scp) \ + ark,t:"|gzip -c >$data/kwset_${set}/keywords.fsts.gz" + done +fi + + +echo "Directories are set up -- running run-4-phn-anydecode.sh will take care of the rest" +exit 0 + +if [ $stage -le 4 ] ; then + for set in $kwsets ; do + for it in $(seq 1 4); do + system=exp/sgmm5_mmi_b0.1/decode_fmllr_$(basename $data)_it$it + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices ${lang} ${data} $system + done + done +fi + +if [ $stage -le 5 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 6 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem_17_8.5 + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 7 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.bg + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 8 ] ; then + for set in $kwsets ; do + system=exp/tri6_nnet/decode_dev10h.phn.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + diff --git a/egs/babel/s5d/local/search/run_search.sh b/egs/babel/s5d/local/search/run_search.sh new file mode 100755 index 00000000000..1fbdb071123 --- /dev/null +++ b/egs/babel/s5d/local/search/run_search.sh @@ -0,0 +1,139 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=2 +dir=dev10h.pem +# End configuration section +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./conf/common_vars.sh +. ./lang.conf + +#Example script how to run keyword search using the Kaldi-native pipeline + + +if [ $stage -le 0 ]; then + local/generate_confusion_matrix.sh --nj 64 --cmd "$decode_cmd" \ + exp/sgmm5_denlats/dengraph/ exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix +fi + +if [ $stage -le 1 ] ; then + local/train_g2p.sh --cmd "$decode_cmd" data/local/lexicon.txt exp/g2p +fi + +dataset=${dir%%.*} + +set +o nounset +eval kwsets=${!dataset_kwlists[@]} +eval my_ecf_file=\$${dataset}_ecf_file +eval my_rttm_file=\$${dataset}_rttm_file +set -o nounset + +my_array_name=${dataset}_kwlists + +eval kwsets=\( \${!$my_array_name[@]} \) +declare -p kwsets +for set in ${kwsets[@]} ; do + eval my_kwlist=\${$my_array_name[$set]} + declare -p my_kwlist +done +declare -p my_ecf_file +declare -p my_rttm_file + +if [ $stage -le 2 ] ; then + + for set in ${kwsets[@]} ; do + + eval my_kwlist=\${$my_array_name[$set]} + + #This will set up the basic files and converts the F4DE files into Kaldi-native format + local/search/setup.sh $my_ecf_file $my_rttm_file "${my_kwlist}" \ + data/$dir/ data/lang/ data/$dir/kwset_${set} + + # we will search for the IV words normally (i.e. will look for the specificsequence + # of the words + local/search/compile_keywords.sh --filter "OOV=0&&Characters>2"\ + data/$dir/kwset_${set} data/lang data/$dir/kwset_${set}/tmp.2 + + # in addition to the direct search of the IV words, we will set up the proxy + # search as well -- we will use lower nbest, compared to OOV=1 + #-- local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --category "OOV=0" \ + #-- --beam 5 --nbest 10 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + #-- data/dev10h.pem/kwset_${set} data/lang data/dev10h.pem/${set}_oov_kws/tmp/L1.lex \ + #-- data/dev10h.pem/${set}_oov_kws/tmp/L1.lex data/dev10h.pem/kwset_${set}/tmp.3 + if [ -d data/local/extend ]; then + echo "Detected extended lexicon system..." + local/search/compile_proxy_keywords.sh --filter "OOV=1&&Characters>2"\ + --cmd "$decode_cmd --mem 24G --max-jobs-run 64" --nj 128 \ + --beam $extlex_proxy_beam --nbest $extlex_proxy_nbest \ + --phone-beam $extlex_proxy_phone_beam --phone-nbest $extlex_proxy_phone_nbest\ + --confusion-matrix exp/conf_matrix/confusions.txt \ + data/$dir/kwset_${set} data/lang data/local/lexiconp.txt exp/g2p \ + data/$dir/kwset_${set}/tmp.4 + else + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --filter "OOV=1&&Characters>2"\ + --beam 5 --nbest 50 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + data/$dir/kwset_${set} data/lang data/local/lexiconp.txt exp/g2p \ + data/$dir/kwset_${set}/tmp.4 + fi + + cut -f 1 data/local/filtered_lexicon.txt | uconv -f utf8 -t utf8 -x Any-Lower | sort -u | \ + nl | awk '{print $2, $1;}' > data/$dir/kwset_${set}/base_words.txt + paste <(cut -f 1 data/$dir/kwset_${set}/keywords.txt ) \ + <(cut -f 2 data/$dir/kwset_${set}/keywords.txt | \ + uconv -f utf8 -t utf8 -x Any-Lower ) | \ + local/kwords2indices.pl --map-oov 0 data/$dir/kwset_${set}/base_words.txt |\ + perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] BaseOOV=1\n";} + else { print "$F[0] BaseOOV=0\n";}' |\ + cat - data/$dir/kwset_${set}/categories | sort -u |\ + local/search/normalize_categories.pl > data/$dir/kwset_${set}/categories.2 + mv data/$dir/kwset_${set}/categories data/$dir/kwset_${set}/categories.bak + mv data/$dir/kwset_${set}/categories.2 data/$dir/kwset_${set}/categories + + echo >&2 "Kwset $set processed successfully..." + done +fi + +if [ $stage -le 3 ] ; then + for set in ${kwsets[@]} ; do + fsts-union scp:<(sort data/$dir/kwset_${set}/tmp*/keywords.scp) \ + ark,t:"|gzip -c >data/$dir/kwset_${set}/keywords.fsts.gz" + done +fi + + +exit + +if [ $stage -le 4 ] ; then + for set in $kwsets ; do + for it in $(seq 1 4); do + system=exp/sgmm5_mmi_b0.1/decode_fmllr_$dir_it$it + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices \ + data/lang data/$dir $system + done + done +fi + +if [ $stage -le 5 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_$dir + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices \ + data/lang data/$dir $system + done +fi + +if [ $stage -le 6 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_sp/decode_$dir + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices \ + data/lang data/$dir $system + done +fi diff --git a/egs/babel/s5d/local/search/run_syll_search.sh b/egs/babel/s5d/local/search/run_syll_search.sh new file mode 100755 index 00000000000..41a925ce13a --- /dev/null +++ b/egs/babel/s5d/local/search/run_syll_search.sh @@ -0,0 +1,139 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=2 +dir=dev10h.pem +# End configuration section +. ./conf/common_vars.sh +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./lang.conf + +#Example script how to run keyword search using the Kaldi-native pipeline + + +if [ $stage -le 0 ]; then + local/generate_confusion_matrix.sh --nj 64 --cmd "$decode_cmd" \ + exp/sgmm5_denlats/dengraph/ exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix +fi + +if [ $stage -le 1 ] ; then + local/train_g2p.sh --cmd "$decode_cmd" data/local/lexicon.txt exp/g2p +fi + +dataset=${dir%%.*} +datatype=${dir#*.} + +lang=data/lang.syll +if [ "$dir" == "$dataset" ]; then + data=data/${dataset}.syll +else + data=data/${dataset}.syll.${datatype} +fi + +set +o nounset +eval kwsets=${!dataset_kwlists[@]} +eval my_ecf_file=\$${dataset}_ecf_file +eval my_rttm_file=\$${dataset}_rttm_file +set -o nounset + +my_array_name=${dataset}_kwlists + +eval kwsets=\( \${!$my_array_name[@]} \) +declare -p kwsets +for set in ${kwsets[@]} ; do + eval my_kwlist=\${$my_array_name[$set]} + declare -p my_kwlist +done +declare -p my_ecf_file +declare -p my_rttm_file + +if [ $stage -le 2 ] ; then + + for set in ${kwsets[@]} ; do + + eval my_kwlist=\${$my_array_name[$set]} + + #This will set up the basic files and converts the F4DE files into Kaldi-native format + local/search/setup.sh $my_ecf_file $my_rttm_file "${my_kwlist}" \ + $data $lang $data/kwset_${set} + + # we will search for the IV words normally (i.e. will look for the specificsequence + # of the words + local/search/compile_keywords.sh --filter "OOV=0&&Characters>2"\ + $data/kwset_${set} $lang $data/kwset_${set}/tmp.2 + + # in addition to the direct search of the IV words, we will set up the proxy + # search as well -- we will use lower nbest, compared to OOV=1 + #-- local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --category "OOV=0" \ + #-- --beam 5 --nbest 10 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + #-- ${data}/kwset_${set} ${lang} ${data}/${set}_oov_kws/tmp/L1.lex \ + #-- ${data}/${set}_oov_kws/tmp/L1.lex ${data}/kwset_${set}/tmp.3 + + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --filter "OOV=1&&Characters>4"\ + --beam 5 --nbest 100 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + ${data}/kwset_${set} ${lang} data/local/dict.syll/lexiconp.txt exp/g2p \ + ${data}/kwset_${set}/tmp.4 + + # and finally, replace the categories by the word-level categories + cp data/${dir}/kwset_${set}/categories $data/kwset_${set}/categories + done +fi + +if [ $stage -le 3 ] ; then + for set in ${kwsets[@]} ; do + fsts-union scp:<(sort $data/kwset_${set}/tmp*/keywords.scp) \ + ark,t:"|gzip -c >$data/kwset_${set}/keywords.fsts.gz" + done +fi + + +echo "Directories are set up -- running run-4-syll-anydecode.sh will take care of the rest" +exit 0 + +if [ $stage -le 4 ] ; then + for set in $kwsets ; do + for it in $(seq 1 4); do + system=exp/sgmm5_mmi_b0.1/decode_fmllr_$(basename $data)_it$it + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices ${lang} ${data} $system + done + done +fi + +if [ $stage -le 5 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 6 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem_17_8.5 + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 7 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.bg + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 8 ] ; then + for set in $kwsets ; do + system=exp/tri6_nnet/decode_dev10h.syll.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + diff --git a/egs/babel/s5d/local/search/score.sh b/egs/babel/s5d/local/search/score.sh new file mode 100755 index 00000000000..e429b1da030 --- /dev/null +++ b/egs/babel/s5d/local/search/score.sh @@ -0,0 +1,143 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +# case_insensitive=true +extraid= +min_lmwt=8 +max_lmwt=12 +cmd=run.pl +stage=0 +ntrue_from= +# End configuration section. + +help_message="$0: score the kwslist using the F4DE scorer from NIST + Example: + $0 [additional-parameters] + where the most important additional parameters can be: + --extraid #for using, when a non-default kws tasks are setup + (using the kws_setup.sh --extraid) for a kaldi-single data-dir" + +echo $0 $@ +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 3 ]; then + printf "FATAL: incorrect number of variables given to the script\n\n" + printf "$help_message\n" + exit 1; +fi + +set -e -o pipefail + +langdir=$1 +if [ -z $extraid ] ; then + kwsdatadir=$2/kws +else + kwsdatadir=$2/kwset_${extraid} +fi +kwsoutputdir="$3" + +trials=$(cat $kwsdatadir/trials) +mkdir -p $kwsoutputdir/log/ + +if [ $stage -le 0 ] ; then + if [ -z "$ntrue_from" ]; then + for LMWT in $(seq $min_lmwt $max_lmwt) ; do + mkdir -p ${kwsoutputdir}_$LMWT/details/ + mkdir -p ${kwsoutputdir}_$LMWT/scoring/ + + # as we need to sweep through different ntrue-scales we will + # we will do it in one parallel command -- it will be more effective + # than sweeping in a loop and for all lmwts in parallel (as usuallyu + # there will be just a couple of different lmwts, but the ntrue-scale + # has a larger dynamic range + $cmd NTRUE=1:21 $kwsoutputdir/log/score.${LMWT}.NTRUE.log \ + ntrue=\$\(perl -e 'print 1+(NTRUE-1)/5.0' \) '&&' \ + cat ${kwsoutputdir}_$LMWT/results \|\ + local/search/normalize_results_kst.pl --trials $trials --ntrue-scale \$ntrue \|\ + local/search/filter_kws_results.pl --probs --nbest 200 \|\ + compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \ + \> ${kwsoutputdir}_$LMWT/scoring/score.NTRUE.txt + + ntrue=$(grep ATWV ${kwsoutputdir}_$LMWT/scoring/score.*.txt | \ + sort -k2,2nr -t '=' | head -n 1 | \ + sed 's/.*score\.\([0-9][0-9]*\)\.txt.*/\1/g') + #The calculation of ntrue must be the same as in the command above + echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue_raw + ntrue=$(perl -e "print 1+($ntrue-1)/5.0") + echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue + done + else + for LMWT in $(seq $min_lmwt $max_lmwt) ; do + mkdir -p ${kwsoutputdir}_$LMWT/details/ + mkdir -p ${kwsoutputdir}_$LMWT/scoring/ + + cp ${ntrue_from}_${LMWT}/details/ntrue ${kwsoutputdir}_${LMWT}/details/ntrue + [ -f ${ntrue_from}_${LMWT}/details/ntrue_raw ] && \ + cp ${ntrue_from}_${LMWT}/details/ntrue_raw ${kwsoutputdir}_${LMWT}/details/ntrue_raw + echo "$ntrue_from" > ${kwsoutputdir}_${LMWT}/details/ntrue_from + done + fi +fi + +if [ $stage -le 1 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/normalize.LMWT.log \ + cat ${kwsoutputdir}_LMWT/results \|\ + local/search/normalize_results_kst.pl --trials $trials --ntrue-scale \$\(cat ${kwsoutputdir}_LMWT/details/ntrue\)\ + \> ${kwsoutputdir}_LMWT/details/results + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/score.final.LMWT.log \ + cat ${kwsoutputdir}_LMWT/details/results \|\ + compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \ + ${kwsoutputdir}_LMWT/details/alignment.csv \> ${kwsoutputdir}_LMWT/details/score.txt '&&' \ + cp ${kwsoutputdir}_LMWT/details/score.txt ${kwsoutputdir}_LMWT/score.txt + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/per-category-stats.LMWT.log \ + cat ${kwsoutputdir}_LMWT/details/alignment.csv \|\ + perl local/search/per_category_stats.pl --sweep-step 0.005 $trials \ + $kwsdatadir/categories \> ${kwsoutputdir}_LMWT/details/per-category-score.txt +fi + +if [ $stage -le 2 ]; then +if [ -f $kwsdatadir/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $kwsdatadir/f4de_attribs #override the previous variables + + ecf=$kwsdatadir/ecf.xml + rttm=$kwsdatadir/rttm + kwlist=$kwsdatadir/kwlist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_prepare.LMWT.log \ + mkdir -p ${kwsoutputdir}_LMWT/f4de/ '&&' cat $kwlist \| \ + local/search/annotate_kwlist.pl $kwsdatadir/categories \> ${kwsoutputdir}_LMWT/f4de/kwlist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_write_kwslist.LMWT.log \ + cat ${kwsoutputdir}_LMWT/details/results \| \ + utils/int2sym.pl -f 2 $kwsdatadir/utt.map \| \ + local/search/utt_to_files.pl --flen $flen $kwsdatadir/../segments \|\ + local/search/write_kwslist.pl --flen $flen --language $language \ + --kwlist-id $kwlist_name \> ${kwsoutputdir}_LMWT/f4de/kwslist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_score.LMWT.log \ + KWSEval -e $ecf -r $rttm -t ${kwsoutputdir}_LMWT/f4de/kwlist.xml -a \ + --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \ + -s ${kwsoutputdir}_LMWT/f4de/kwslist.xml -c -o -b -d -f ${kwsoutputdir}_LMWT/f4de/ + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_report.LMWT.log \ + local/kws_oracle_threshold.pl --duration $trials \ + ${kwsoutputdir}_LMWT/f4de/alignment.csv \> ${kwsoutputdir}_LMWT/f4de/metrics.txt +fi +fi + +echo "$0: Done" +exit 0; + + diff --git a/egs/babel/s5d/local/search/search.sh b/egs/babel/s5d/local/search/search.sh new file mode 100755 index 00000000000..6a5b2d35a97 --- /dev/null +++ b/egs/babel/s5d/local/search/search.sh @@ -0,0 +1,207 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# License: Apache 2.0 + + +help_message="$(basename $0): do keyword indexing and search. data-dir is assumed to have + kws/ subdirectory that specifies the terms to search for. Output is in + decode-dir/kws/ + Usage: + $(basename $0) " + +# Begin configuration section. +min_lmwt=8 +max_lmwt=12 +cmd=run.pl +model= +skip_scoring=false +skip_optimization=false # true can speed it up if #keywords is small. +max_states=350000 +indices_dir= +kwsout_dir= +stage=0 +word_ins_penalty=0 +extraid= +silence_word= # specify this if you did to in kws_setup.sh, it's more accurate. +strict=false +duptime=0.6 +ntrue_scale=1.0 +frame_subsampling_factor=1 +nbest=-1 +max_silence_frames=50 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e +set -o pipefail + + +if [[ "$#" -ne "3" ]] ; then + echo -e "$0: FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +silence_opt= + +langdir=$1 +datadir=$2 +decodedir=$3 + +if [ -z $extraid ] ; then + kwsdatadir=$datadir/kws +else + kwsdatadir=$datadir/kwset_${extraid} +fi + +if [ -z $extraid ] ; then + kwsoutdir=$decodedir/kws +else + kwsoutdir=$decodedir/kwset_${extraid} +fi + + +if [ -z $indices_dir ]; then + indices_dir=$kwsoutdir +fi + +if [ ! -z "$model" ]; then + model_flags="--model $model" +else + model_flags= +fi + +mkdir -p $kwsoutdir +for d in "$datadir" "$kwsdatadir" "$langdir" "$decodedir"; do + if [ ! -d "$d" ]; then + echo "$0: FATAL: expected directory $d to exist" + exit 1; + fi +done + +echo "$0: Searching: $kwsdatadir" +duration=$(cat $kwsdatadir/trials) +echo "$0: Duration: $duration" + + +frame_subsampling_factor=1 +if [ -f $decodedir/../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../frame_subsampling_factor) + echo "$0: Frame subsampling factor autodetected: $frame_subsampling_factor" +elif [ -f $decodedir/../../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../../frame_subsampling_factor) + echo "$0: Frame subsampling factor autodetected: $frame_subsampling_factor" +fi + +if [ $stage -le 0 ] ; then + if [ ! -f $indices_dir/.done.index ] ; then + [ ! -d $indices_dir ] && mkdir $indices_dir + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + indices=${indices_dir}_$lmwt + mkdir -p $indices + + acwt=$(perl -e "print 1.0/$lmwt") + [ ! -z $silence_word ] && silence_opt="--silence-word $silence_word" + steps/make_index.sh $silence_opt --cmd "$cmd" --acwt $acwt $model_flags\ + --skip-optimization $skip_optimization --max-states $max_states \ + --word-ins-penalty $word_ins_penalty --max-silence-frames $max_silence_frames\ + --frame-subsampling-factor ${frame_subsampling_factor} \ + $kwsdatadir $langdir $decodedir $indices || exit 1 + done + touch $indices_dir/.done.index + else + echo "$0: Assuming indexing has been aready done. If you really need to re-run " + echo "$0: the indexing again, delete the file $indices_dir/.done.index" + fi +fi + +keywords=$kwsdatadir/keywords.fsts +if [ -f $keywords ] ; then + echo "$0: Using ${keywords} for search" + keywords="ark:$keywords" +elif [ -f ${keywords}.gz ] ; then + echo "$0: Using ${keywords}.gz for search" + keywords="ark:gunzip -c ${keywords}.gz |" +else + echo "$0: The keyword file ${keywords}[.gz] does not exist" +fi + + +if [ $stage -le 1 ]; then + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + kwsoutput=${kwsoutdir}_$lmwt + indices=${indices_dir}_$lmwt + nj=$(cat $indices/num_jobs) + + + for f in $indices/index.1.gz ; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; + done + + mkdir -p $kwsoutput/log + $cmd JOB=1:$nj $kwsoutput/log/search.JOB.log \ + set -e -o pipefail '&&' \ + kws-search --strict=$strict --negative-tolerance=-1 \ + --frame-subsampling-factor=${frame_subsampling_factor} \ + "ark:gzip -cdf $indices/index.JOB.gz|" "$keywords" \ + "ark,t:| sort -u | gzip -c > $kwsoutput/result.JOB.gz" \ + "ark,t:| sort -u | gzip -c > $kwsoutput/stats.JOB.gz" || exit 1; + done +fi + +if [ $stage -le 2 ]; then + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + kwsoutput=${kwsoutdir}_$lmwt + indices=${indices_dir}_$lmwt + nj=$(cat $indices/num_jobs) + + # This is a memory-efficient way how to do the filtration + # we do this in this way because the result.* files can be fairly big + # and we do not want to run into troubles with memory + files="" + for job in $(seq 1 $nj); do + if [ -f $kwsoutput/result.${job}.gz ] ; then + files="$files <(gunzip -c $kwsoutput/result.${job}.gz)" + elif [ -f $kwsoutput/result.${job} ] ; then + files="$files $kwsoutput/result.${job}" + else + echo >&2 "The file $kwsoutput/result.${job}[.gz] does not exist" + exit 1 + fi + done + # we have to call it using eval as we need the bash to interpret + # the (possible) command substitution in case of gz files + # bash -c would probably work as well, but would spawn another + # shell instance + eval "sort -m -u $files" |\ + local/search/filter_kws_results.pl --likes --nbest $nbest > $kwsoutput/results || exit 1 + done +fi + +if [ -z $extraid ] ; then + extraid_flags= +else + extraid_flags=" --extraid ""$extraid"" " +fi + +if [ $stage -le 4 ]; then + if $skip_scoring ; then + echo "$0: Not scoring, because --skip-scoring true was issued" + elif [ ! -x local/kws_score.sh ] ; then + echo "$0: Not scoring, because the file local/kws_score.sh is not present" + else + echo "$0: Scoring KWS results" + local/search/score.sh --cmd "$decode_cmd" \ + --min-lmwt $min_lmwt --max-lmwt $max_lmwt $extraid_flags \ + $langdir $datadir ${kwsoutdir} || exit 1; + fi +fi + +echo "$0: Done" +exit 0 + diff --git a/egs/babel/s5d/local/search/setup.sh b/egs/babel/s5d/local/search/setup.sh new file mode 100755 index 00000000000..d4e2013a443 --- /dev/null +++ b/egs/babel/s5d/local/search/setup.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +flen=0.01 +icu_transform="Any-Lower" +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +if [ $# -eq 6 ]; then + ecf=$1 + rttm=$2 + kwlist=$3 + data=$4 + lang=$5 + output=$6 +elif [ $# -eq 5 ]; then + ecf=$1 + rttm="" + kwlist=$2 + data=$3 + lang=$4 + output=$5 +else + echo >&2 "Incorrect number of script parameters!" +fi + +mkdir -p $output +for f in $ecf $kwlist; do + [ ! -f $f ] && echo "Mandatory file \"$f\" does not exist." +done + + +# The first way how to compute the duration produced numbers significantly +# dufferent from the numbers reported by F4DE. I'm leaving it here to document +# the fact that the signal_duration field is not the same number as the sum +# of the individual durations (dur field in each ) +#duration=`head -n 1 $ecf | sed 's/.*signal_duration=\"\([0-9.][0-9.]*\)\".*/\1/g'` +#duration=`echo print $duration/2.0 | perl` + +duration=$(cat $ecf | perl -ne 'BEGIN{$dur=0;}{next unless $_ =~ /dur\=/; s/.*dur="([^"]*)".*/$1/; $dur+=$_;}END{print $dur/2}') + +echo $duration > $output/trials +echo $flen > $output/frame_length + +echo "Number of trials: `cat $output/trials`" +echo "Frame lengths: `cat $output/frame_length`" + +echo "Generating map files" +cat $data/segments | awk 'BEGIN{i=1}; {print $1, i; i+=1;}' > $output/utt.map +cat $data/wav.scp | awk 'BEGIN{i=1}; {print $1, i; i+=1;}' > $output/wav.map + +#This does not work cp --no-preserve=all $ecf $output/ecf.xml +cat $ecf > $output/ecf.xml +cat $kwlist > $output/kwlist.xml +[ ! -z "$rttm" ] && cat $rttm > $output/rttm + +{ + echo "kwlist_name=`basename $kwlist`" + language=$(grep kwlist $kwlist | head -n 1 | sed -E 's/.*language="([^"]*)".*/\1/g') + echo "language=$language" + echo "flen=$flen" +} > $output/f4de_attribs + +cat ${kwlist} | \ + perl -ne '{ + chomp; + next unless (m// || m/kwid/); + if ($_ =~ m//) { + s/.*(.*)<\/kwtext>.*/$1/g; + die "Undefined format of the kwlist file!" unless defined $kwid; + print $kwid . "\t" . $_ . "\n"; } + else { + s/.*kwid="(.*)".*/$1/g; $kwid=$_;}; + }' > $output/keywords.txt + + +command -v uconv >/dev/null 2>&1 || { + echo >&2 "I require uconv but it's not installed. Use $KALDI_ROOT/tools/extras/install_icu.sh to install it (or use the system packager)"; + exit 1; +} + +if [ -z "$icu_transform" ]; then + cp $lang/words.txt $output/words.txt +else + uconv -f utf8 -t utf8 -x "${icu_transform}" -o $output/words.txt $lang/words.txt +fi + +if [ -z "$icu_transform" ]; then + cat $output/keywords.txt +else + paste <(cut -f 1 $output/keywords.txt ) \ + <(cut -f 2 $output/keywords.txt | \ + uconv -f utf8 -t utf8 -x "${icu_transform}" ) +fi | local/kwords2indices.pl --map-oov 0 $output/words.txt |\ + sort -u > $output/keywords.int + + +echo "Generating categories" +{ + local/search/create_categories.pl $output/keywords.txt + cat $output/keywords.int | perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] OOV=1\n";} + else { print "$F[0] OOV=0\n";}' +} | local/search/normalize_categories.pl > $output/categories + +if [ ! -z "$rttm" ] && [ -f $rttm ] ; then + local/search/rttm_to_hitlists.sh --segments $data/segments --utt-table $output/utt.map\ + $rttm $kwlist $ecf $output/tmp $output/hitlist +else + echo "Not generating hitlist, scoring won't be possible" +fi +echo "Done" + + diff --git a/egs/babel/s5d/local/search/utt_to_files.pl b/egs/babel/s5d/local/search/utt_to_files.pl new file mode 100755 index 00000000000..ad5da8a50bf --- /dev/null +++ b/egs/babel/s5d/local/search/utt_to_files.pl @@ -0,0 +1,62 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# Converts the kws result with utterances into whole file offsets +use strict; +use warnings; +use utf8; +use Data::Dumper; +use Getopt::Long; + +my $flen = 0.01; + +GetOptions ("flen=f" => \$flen) or die "$0: Cannot parse command-line options\n"; + +my $segments=$ARGV[0]; +my %SEGMENTS; + +open(SEG, $segments) or die "Cannot open segment file $segments"; +while(my $line = ) { + chomp $line; + my @entries = split(" ", $line); + die "The format of line \"$line\" does not conform the the segments file format" if @entries ne 4; + + $SEGMENTS{$entries[0]} = \@entries; +} + + +while (my $line = ) { + chomp $line; + my @entries = split(" ", $line); + die "The format of line \"$line\" does not conform the result.* file format" if @entries ne 5; + + my $kw = $entries[0]; + my $utt = $entries[1]; + my $start = $entries[2]; + my $end = $entries[3]; + my $score = $entries[4]; + + die "The utterance $utt is not in the segments file" unless exists $SEGMENTS{$utt}; + my $file = $SEGMENTS{$utt}->[1]; + my $utt_start = int( 0.5 + $SEGMENTS{$utt}->[2] / $flen); + my $utt_end = int(0.5 + $SEGMENTS{$utt}->[3] / $flen); + + $start += $utt_start; + $end += $utt_start; + print "$kw $file $start $end $score\n"; +} diff --git a/egs/babel/s5d/local/search/write_kwslist.pl b/egs/babel/s5d/local/search/write_kwslist.pl new file mode 100755 index 00000000000..ade87212829 --- /dev/null +++ b/egs/babel/s5d/local/search/write_kwslist.pl @@ -0,0 +1,134 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < kwslist.xml + +Allowed options: + --flen : duration (in seconds) of audio/feature frame + --language : language (string, default "") + --kwlist-id : kwlist.xml name (string, default "") + --system-id : name of the system (string, default "") + --digits : how many digits should the scores be rounded to? + (int, default 2). Sometimes F4DE gets extremely slow + when the scores have too many digits (perhaps some sweping + issue). This switch can be used to prevent it. +EOU + +use strict; +use warnings; +use utf8; + +use POSIX; +use Data::Dumper; +use Getopt::Long; + +my $flen = 0.01; +my $language=""; +my $kwlist_filename=""; +my $system_id=""; +my $digits = 2; + +GetOptions("flen=f" => \$flen, + "language=s" => \$language, + "kwlist-id=s" => \$kwlist_filename, + "system-id=s" => \$system_id, + "digits=i" => \$digits) or do { + print STDERR "Cannot parse the command-line options.\n"; + print STDERR "$Usage\n"; + die "Cannot continue.\n"; +}; + +if (@ARGV != 0) { + print STDERR "Incorrect number of command-line arguments\n"; + print STDERR "$Usage\n"; + die "Cannot continue.\n"; +} + +sub KwsOutputSort { + my $a = shift @_; + my $b = shift @_; + + if ($a->[4] != $b->[4]) { + #score + return $b->[4] <=> $a->[4]; + } elsif ($a->[1] ne $b->[1]) { + return $a->[1] cmp $b->[1]; + } else { + return $a->[2] <=> $b->[2]; + } +} + +sub PrettyPrint { + my @instances = sort {KwsOutputSort($a, $b)} @{shift @_}; + + return if @instances <= 0; + my $kwid=$instances[0]->[0]; + + print " \n"; + foreach my $elem(@instances) { + (my $kwidx, my $file, my $start, my $end, my $score) = @{$elem}; + my $filename="file=\"$file\""; + + # this is because the decision has to be done on the already + # rounded number (otherwise it can confuse F4DE. + # It's because we do the decision based on the non-rounded score + # but F4DE will see only the rounded score, so the decision + # won't be correctly aligned with the score (especially, for + # some numbers with score 0.5 the decision will be "YES" and for + # other with the same score, the decision will be "NO" + $score = sprintf "%.${digits}f", $score; + my $decision=$score >= 0.5 ? "decision=\"YES\"" : "decision=\"NO\""; + my $tbeg = $start * $flen; + my $dur = $end * $flen - $tbeg; + + $tbeg=sprintf "tbeg=\"%.${digits}f\"", $tbeg; + $dur=sprintf "dur=\"%.${digits}f\"", $dur; + $score=sprintf "score=\"%.${digits}f\"", $score; + my $channel="channel=\"1\""; + + print " \n"; + } + print " \n"; +} + +my $KWID=""; +my @putative_hits; + +print "\n"; + +while (my $line = ) { + chomp $line; + (my $kwid, my $file, my $start, my $end, my $score) = split " ", $line; + + if ($kwid ne $KWID) { + PrettyPrint(\@putative_hits) if $KWID; + $KWID=$kwid; + @putative_hits = (); + } + + push @putative_hits, [$kwid, $file, $start, $end, $score]; + +} +PrettyPrint(\@putative_hits) if $KWID; + +print "\n" diff --git a/egs/babel/s5d/local/search_index.sh b/egs/babel/s5d/local/search_index.sh new file mode 100755 index 00000000000..9e7cdb77f3d --- /dev/null +++ b/egs/babel/s5d/local/search_index.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0 + +# Begin configuration section. +cmd=run.pl +nbest=-1 +strict=true +indices_dir= +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 2 ]; then + echo "Usage: steps/search_index.sh [options] " + echo " e.g.: steps/search_index.sh data/kws exp/sgmm2_5a_mmi/decode/kws/" + echo "" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --nbest # return n best results. (-1 means all)" + echo " --indices-dir # where the indices should be stored, by default it will be in " + exit 1; +fi + + +kwsdatadir=$1; +kwsdir=$2; + +if [ -z $indices_dir ] ; then + indices_dir=$kwsdir +fi + +mkdir -p $kwsdir/log; +nj=`cat $indices_dir/num_jobs` || exit 1; +keywords=$kwsdatadir/keywords.fsts; + +for f in $indices_dir/index.1.gz $keywords; do + [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1; +done + +$cmd JOB=1:$nj $kwsdir/log/search.JOB.log \ + kws-search --strict=$strict --negative-tolerance=-1 \ + "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \ + "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id | sort -u | gzip > $kwsdir/result.JOB.gz" \ + "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id | sort -u | gzip > $kwsdir/stats.JOB.gz" || exit 1; + +exit 0; diff --git a/egs/babel/s5d/local/setup_categories.sh b/egs/babel/s5d/local/setup_categories.sh new file mode 100644 index 00000000000..ffc65173786 --- /dev/null +++ b/egs/babel/s5d/local/setup_categories.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +set=kwlist +output=data/dev10h.pem/kwset_${set}/ + +{ + local/search/create_categories.pl $output/keywords.txt + cat $output/keywords.int | perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] OOV=1\n";} + else { print "$F[0] OOV=0\n";}' +} | local/search/normalize_categories.pl > $output/categories +cut -f 1 data/local/filtered_lexicon.txt | uconv -f utf8 -t utf8 -x Any-Lower | sort -u | \ + nl | awk '{print $2, $1;}' > data/dev10h.pem/kwset_${set}/base_words.txt + paste <(cut -f 1 data/dev10h.pem/kwset_${set}/keywords.txt ) \ + <(cut -f 2 data/dev10h.pem/kwset_${set}/keywords.txt | \ + uconv -f utf8 -t utf8 -x Any-Lower ) | \ + local/kwords2indices.pl --map-oov 0 data/dev10h.pem/kwset_${set}/base_words.txt |\ + perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] BaseOOV=1\n";} + else { print "$F[0] BaseOOV=0\n";}' |\ + cat - data/dev10h.pem/kwset_${set}/categories | sort -u |\ + local/search/normalize_categories.pl > data/dev10h.pem/kwset_${set}/categories.2 + mv data/dev10h.pem/kwset_${set}/categories data/dev10h.pem/kwset_${set}/categories.bak + mv data/dev10h.pem/kwset_${set}/categories.2 data/dev10h.pem/kwset_${set}/categories + +cp data/dev10h.pem/kwset_kwlist/categories data/dev10h.phn.pem/kwset_kwlist/categories +cp data/dev10h.pem/kwset_kwlist/categories data/dev10h.syll.pem/kwset_kwlist/categories +find exp/ -name ".done.kwset.kwlist" | xargs rm + diff --git a/egs/babel/s5d/local/shadow_set_kws_search.sh b/egs/babel/s5d/local/shadow_set_kws_search.sh new file mode 100755 index 00000000000..a67a3a57f6a --- /dev/null +++ b/egs/babel/s5d/local/shadow_set_kws_search.sh @@ -0,0 +1,265 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +#Fail at any unhandled non-zero error code +set -e +set -o pipefail + +help_message="$0: create subset of the input directory (specified as the first directory). + The subset is specified by the second parameter. + The directory in which the subset should be created is the third parameter + Example: + $0 [data-dir2 [data-dir3 [ ...] ]" + +# Begin configuration section. +#acwt=0.0909091 +min_lmwt=7 +max_lmwt=17 +duptime=0.6 +cmd=run.pl +model= +skip_scoring=false +stage=0 +strict=true +skip_optimization=false +max_states=150000 +word_ins_penalty=0 +index_only=false +ntrue_scale=0.1 +# End configuration section. + +echo "$0 $@" # Print the command line for logging +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [[ "$#" -le "2" ]] ; then + echo -e "FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + + +datadir=$1 +langdir=$2 +decodedir=$3 +shift; shift; shift; +datasetA=$1 +datasetB=$2 + + +if [[ ! -d "$langdir" ]] ; then + echo "FATAL: the lang directory does not exist" + exit 1; +fi +if [[ ! -d "$decodedir" ]] ; then + echo "FATAL: the directory with decoded files does not exist" + exit 1; +fi + +for splitdatadir in $@ ; do + kwsdatadir=$splitdatadir/kws + if [ ! -d "$splitdatadir" ] ; then + echo "FATAL: the data directory $splitdatadir does not exist" + exit 1; + fi + if [ ! -d "$kwsdatadir" ] ; then + echo "FATAL: the data directory $kwsdatadir does not exist" + exit 1; + fi + if [ ! -f "$kwsdatadir/ecf.xml" ] ; then + echo "FATAL: the $kwsdatadir does not contain the ecf.xml file" + exit 1; + fi +done + +kwsdatadir=$datadir/kws + +! durationA=`head -1 $datasetA/kws/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` && + echo "Error getting duration from $datasetA/kws/ecf.xml" && exit 1; + + +! durationB=`head -1 $datasetB/kws/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` && + echo "Error getting duration from $datasetB/kws/ecf.xml" && exit 1; + +[ -z $durationA ] && echo "Error getting duration from $datasetA/kws/ecf.xml" && exit 1; +[ -z $durationB ] && echo "Error getting duration from $datasetB/kws/ecf.xml" && exit 1; + +if [ ! -z "$model" ]; then + model_flags="--model $model" +fi + +mkdir -p $decodedir/kws/ +if [ $stage -le 0 ] ; then + echo "Making KWS indices..." + if [ ! -f $decodedir/kws/.done.index ] ; then + for lmwt in `seq $min_lmwt $max_lmwt` ; do + kwsoutdir=$decodedir/kws_$lmwt + mkdir -p $kwsoutdir + + acwt=`perl -e "print (1.0/$lmwt);"` + steps/make_index.sh --strict $strict --cmd "$cmd" --max-states $max_states\ + --acwt $acwt $model_flags --skip-optimization $skip_optimization \ + --word_ins_penalty $word_ins_penalty \ + $kwsdatadir $langdir $decodedir $kwsoutdir || exit 1 + done + touch $decodedir/kws/.done.index + else + echo "Assuming indexing has been aready done. If you really need to re-run " + echo "the indexing again, delete the file $decodedir/kws/.done.index" + fi +fi + +if $index_only ; then + echo "Indexing only was requested, existing now..." + exit 0 +fi + +if [ $stage -le 1 ] ; then + echo "Searching KWS indices..." + for lmwt in `seq $min_lmwt $max_lmwt` ; do + kwsoutdir=$decodedir/kws_$lmwt + dirA=$decodedir/`basename $datasetA`/kws_$lmwt + dirB=$decodedir/`basename $datasetB`/kws_$lmwt + mkdir -p $dirA + mkdir -p $dirB + + steps/search_index.sh --cmd "$cmd" $kwsdatadir $kwsoutdir || exit 1 + + [ ! -f $datasetA/kws/utter_id ] && echo "File $datasetA/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-A " | \ + sed 's/^\(KW.*\)-A /\1 /g' > $dirA/results + + [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetB/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-B " | \ + sed 's/^\(KW.*\)-B /\1 /g' > $dirB/results + + + dirA=$decodedir/`basename $datasetA`_`basename $datasetB`/kws_$lmwt + dirB=$decodedir/`basename $datasetB`_`basename $datasetA`/kws_$lmwt + mkdir -p $dirA + mkdir -p $dirB + [ ! -f $datasetA/kws/utter_id ] && echo "File $datasetA/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-B " | \ + sed 's/^\(KW.*\)-B /\1 /g' > $dirA/results + + [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetB/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-A " | \ + sed 's/^\(KW.*\)-A /\1 /g' > $dirB/results + done +fi + +rootdirA=$decodedir/`basename $datasetA` +rootdirB=$decodedir/`basename $datasetB` +rootdirAB=$decodedir/`basename $datasetA`_`basename $datasetB` +rootdirBA=$decodedir/`basename $datasetB`_`basename $datasetA` + + +echo "Processing $datasetA" +if [ $stage -le 2 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirA/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=true --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map --digits=3 - $rootdirA/kws_LMWT/kwslist.xml || exit 1 + + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirAB/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=true --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map --digits=3 - $rootdirAB/kws_LMWT/kwslist.xml || exit 1 +fi + +if [ $stage -le 3 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirA/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirA/kws_LMWT/kwslist.unnormalized.xml || exit 1 + + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirAB/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirAB/kws_LMWT/kwslist.unnormalized.xml || exit 1 +fi + +echo "Scoring $datasetA" +if [ $stage -le 4 ] ; then + if [[ (! -x local/kws_score.sh ) || ($skip_scoring == true) ]] ; then + echo "Not scoring, because the file local/kws_score.sh is not present" + exit 1 + elif [ ! -f $datasetA/kws/rttm ] ; then + echo "Not scoring, because the file $datasetA/kws/rttm is not present" + else + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_scoring.LMWT.log \ + local/kws_score.sh $datasetA $rootdirA/kws_LMWT + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_scoring.LMWT.log \ + local/kws_score.sh --kwlist $datasetB/kws/kwlist.xml $datasetA $rootdirAB/kws_LMWT + fi +fi + +echo "Processing $datasetB" +if [ $stage -le 5 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirB/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirB/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=true --digits=3 --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirB/kws_LMWT/kwslist.xml || exit 1 + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirBA/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirBA/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=true --digits=3 --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirBA/kws_LMWT/kwslist.xml || exit 1 +fi + +if [ $stage -le 6 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirB/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirB/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirB/kws_LMWT/kwslist.unnormalized.xml || exit 1 + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirBA/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirBA/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirBA/kws_LMWT/kwslist.unnormalized.xml || exit 1 +fi + +echo "Scoring $datasetB" +if [ $stage -le 7 ] ; then + if [[ (! -x local/kws_score.sh ) || ($skip_scoring == true) ]] ; then + echo "Not scoring, because the file local/kws_score.sh is not present" + elif [ ! -f $datasetB/kws/rttm ] ; then + echo "Not scoring, because the file $datasetB/kws/rttm is not present" + else + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirB/kws/kws_scoring.LMWT.log \ + local/kws_score.sh $datasetB $rootdirB/kws_LMWT || exit 1 + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirBA/kws/kws_scoring.LMWT.log \ + local/kws_score.sh --kwlist $datasetA/kws/kwlist.xml $datasetB $rootdirBA/kws_LMWT || exit 1 + fi +fi + +echo "Done, everything seems fine" +exit 0 diff --git a/egs/babel/s5d/local/show_lattice.sh b/egs/babel/s5d/local/show_lattice.sh new file mode 100755 index 00000000000..f18132234ee --- /dev/null +++ b/egs/babel/s5d/local/show_lattice.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +. path.sh + +format=pdf # pdf svg +output= + +. utils/parse_options.sh + +if [ $# != 3 ]; then + echo "usage: $0 [--format pdf|svg] [--output ] " + echo "e.g.: $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt" + exit 1; +fi + +uttid=$1 +lat=$2 +words=$3 + +tmpdir=$(mktemp -d /tmp/kaldi.XXXX); trap "rm -r $tmpdir" EXIT # cleanup + +gunzip -c $lat | lattice-to-fst ark:- ark,scp:$tmpdir/fst.ark,$tmpdir/fst.scp || exit 1 +! grep "^$uttid " $tmpdir/fst.scp && echo "ERROR : Missing utterance '$uttid' from gzipped lattice ark '$lat'" && exit 1 +fstcopy "scp:grep '^$uttid ' $tmpdir/fst.scp |" "scp:echo $uttid $tmpdir/$uttid.fst |" || exit 1 +fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format} + +if [ ! -z $output ]; then + cp $tmpdir/$uttid.${format} $output +fi + +[ $format == "pdf" ] && evince $tmpdir/$uttid.pdf +[ $format == "svg" ] && eog $tmpdir/$uttid.svg + +exit 0 diff --git a/egs/babel/s5d/local/split_ctms.sh b/egs/babel/s5d/local/split_ctms.sh new file mode 100755 index 00000000000..b24a1380111 --- /dev/null +++ b/egs/babel/s5d/local/split_ctms.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# begin configuration section. +min_lmwt=7 +max_lmwt=17 +stage=0 +cer=0 +ctm_name= +cmd=run.pl +#end configuration section. + +echo "$0 $@" + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +set -e +set -o pipefail + +data=$1; +q=$2; +shift; shift; + +if [ -z $ctm_name ] ; then + ctm_name=`basename $data`; +fi + +name=$ctm_name + +for i in $@ ; do + p=$q/`basename $i` + [ ! -f $i/reco2file_and_channel ] && "The file reco2file_and_channel not present in the $i directory!" && exit 1 + for lmw in $q/score_* ; do + test -d $lmw || exit 1; #this is to protect us before creating directory "score_*" in cases no real score_[something] directory exists + d=$p/`basename $lmw` + mkdir -p $d + + [ ! -f $lmw/$name.ctm ] && echo "File $lmw/$name.ctm does not exist!" && exit 1 + utils/filter_scp.pl <(cut -f 1 -d ' ' $i/reco2file_and_channel) $lmw/$name.ctm > $d/`basename $i`.ctm + done + + if [ -f $i/stm ] ; then + local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt --cer $cer --cmd "$cmd" $i data/lang $p + else + echo "Not running scoring, file $i/stm does not exist" + fi + +done +exit 0 + diff --git a/egs/babel/s5d/local/stm2text.pl b/egs/babel/s5d/local/stm2text.pl new file mode 100755 index 00000000000..3b069c63554 --- /dev/null +++ b/egs/babel/s5d/local/stm2text.pl @@ -0,0 +1,43 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +#This script takes the source STM file and generates the *.txt files which +#are usually part of the BABEL delivery +#The *.txt files are not the part of the delivery for the evalpart1 subset +#The program works as a filter and the only parameter it expects is +#the path to the output directory +#The filenames are figured out from the STM file +#example of usage: +# cat data/evalpart1/stm local/stm2text.pl data/raw_evalpart1_data/transcriptions + +use strict; +use warnings; + +use utf8; +use Data::Dumper; + +binmode(STDIN, ":encoding(utf8)"); +binmode(STDOUT, ":encoding(utf8)"); + +my $output_dir = $ARGV[0]; +my $prev_filename = ""; +my $OUTPUT; +while ( ) { + chop; + my ($filename, $channel, $speaker, $start, $end, $text) = split(" ", $_, 6); + next if ( $filename =~ /;;.*/ ); + #$filename =~ s/;;(.*)/$1/ if ( $filename =~ /;;.*/ ); + $text = "" if not $text; + + if ( $prev_filename ne $filename ) { + #close($OUTPUT) if ( tell(FH) != -1 ); + print "$output_dir/$filename.txt\n"; + open($OUTPUT, ">:encoding(UTF-8)", "$output_dir/$filename.txt") or die $!; + $prev_filename = $filename; + } + + print $OUTPUT "[$start]\n"; + print $OUTPUT "$text\n"; +} diff --git a/egs/babel/s5d/local/subset_atwv.pl b/egs/babel/s5d/local/subset_atwv.pl new file mode 100755 index 00000000000..ce6b7043116 --- /dev/null +++ b/egs/babel/s5d/local/subset_atwv.pl @@ -0,0 +1,120 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + e.g.: subset_atwv.pl keywords.list bsum.txt + +This script will compute the ATWV for a subset of the original keywords in bsum.txt. +Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list +is a list of the keywords that you want to compute the ATWV for. For example: +KW101-0001 +KW101-0002 +... + +Allowed options: + --subset-name : Name of the subset (string, default = "") + --width : Width of the printed numbers (int, default = 5 ) +EOU + +my $subset_name = ""; +my $width = 5; +GetOptions('subset-name=s' => \$subset_name, + 'width=i' => \$width); + +@ARGV == 2 || die $Usage; + +# Workout the input/output source +my $kws_filename = shift @ARGV; +my $bsum_filename = shift @ARGV; + +my $source = "STDIN"; +if ($kws_filename ne "-") { + open(KWS, "<$kws_filename") || die "Fail to open keywords file: $kws_filename\n"; + $source = "KWS"; +} +open(BSUM, "<$bsum_filename") || die "Fail to open bsum file: $bsum_filename\n"; + +# Read in the keywords. +my $kws = ""; +while (<$source>) { + chomp; + my @col = split(); + @col == 1 || die "Bad line $_\n"; + if ($kws eq "") { + $kws = $col[0]; + } else { + $kws .= "|$col[0]"; + } +} + +# Process bsum.txt +my $targ_sum = 0; +my $corr_sum = 0; +my $fa_sum = 0; +my $miss_sum = 0; +my $twv_sum = 0; +my $count = 0; +my $subset_count = 0; +my $flag = 0; +if ($kws ne "") { + while () { + chomp; + # Workout the total keywords that have occurrence in the search collection + if (/^Summary Totals/) {$flag = 0;} + if (/^Keyword/) {$flag = 1;} + my @col; + if ($flag == 1) { + # Figure out keywords that don't have occurrences in the search collection + @col = split(/\|/, $_); + $col[2] =~ s/^\s+//; + $col[2] =~ s/\s+$//; + $col[2] ne "" || next; + $count ++; + } else { + next; + } + + # Only collect statistics for given subset + m/$kws/ || next; + + # Keywods that are in the given subset, and have occurrences + $targ_sum += $col[2]; + $corr_sum += $col[3]; + $fa_sum += $col[4]; + $miss_sum += $col[5]; + $twv_sum += $col[6]; + $subset_count ++; + } +} + +# Compute ATWV +my $subset_atwv = ($subset_count == 0) ? 0 : $twv_sum/$subset_count; +my $atwv = ($count == 0) ? 0 : $twv_sum/$count; +my $bp_atwv = ($count == 0) ? 0 : $subset_count/$count; + +# Format the numbers +my $format = "%-${width}d"; +$subset_count = sprintf($format, $subset_count); +$targ_sum = sprintf($format, $targ_sum); +$corr_sum = sprintf($format, $corr_sum); +$fa_sum = sprintf($format, $fa_sum); +$miss_sum = sprintf($format, $miss_sum); +$subset_atwv = sprintf("% .4f", $subset_atwv); +$atwv = sprintf("% .4f", $atwv); +$bp_atwv = sprintf("% .4f", $bp_atwv); + +# Print +if ($subset_name ne "") {print "$subset_name: ";} +print "#Keywords=$subset_count, #Targ=$targ_sum, #Corr=$corr_sum, #FA=$fa_sum, #Miss=$miss_sum, "; +print "Contributed ATWV=$atwv, Best Possible Contributed ATWV=$bp_atwv, ATWV=$subset_atwv\n"; + +if ($kws_filename ne "-") {close(KWS);} +close(BSUM); diff --git a/egs/babel/s5d/local/subset_kwslist.pl b/egs/babel/s5d/local/subset_kwslist.pl new file mode 100755 index 00000000000..361291179ef --- /dev/null +++ b/egs/babel/s5d/local/subset_kwslist.pl @@ -0,0 +1,33 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University +# Apache 2.0. +# +use strict; +use warnings; +use XML::Simple; +use Data::Dumper; + +binmode STDOUT, ":utf8"; + +my %seen; +while (my $keyword = ) { + chomp $keyword; + $seen{$keyword} = 1; +} + + +my $data = XMLin($ARGV[0], ForceArray => 1); + +#print Dumper($data->{kw}); +my @filtered_kws = (); + +foreach my $kwentry (@{$data->{kw}}) { + if (defined $seen{$kwentry->{kwid}}) { + push @filtered_kws, $kwentry; + } +} +$data->{kw} = \@filtered_kws; +my $xml = XMLout($data, RootName=> "kwlist", KeyAttr=>''); +print $xml; +exit 0 diff --git a/egs/babel/s5d/local/summarize_logs.pl b/egs/babel/s5d/local/summarize_logs.pl new file mode 100755 index 00000000000..e816d57d68f --- /dev/null +++ b/egs/babel/s5d/local/summarize_logs.pl @@ -0,0 +1,121 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. + +#scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1; + +sub split_hundreds { # split list of filenames into groups of 100. + my $names = shift @_; + my @A = split(" ", $names); + my @ans = (); + while (@A > 0) { + my $group = ""; + for ($x = 0; $x < 100 && @A>0; $x++) { + $fname = pop @A; + $group .= "$fname "; + } + push @ans, $group; + } + return @ans; +} + +sub parse_accounting_entry { + $entry= shift @_; + + @elems = split " ", $entry; + + $time=undef; + $threads=undef; + foreach $elem (@elems) { + if ( $elem=~ m/time=(\d+)/ ) { + $elem =~ s/time=(\d+)/$1/; + $time = $elem; + } elsif ( $elem=~ m/threads=(\d+)/ ) { + $elem =~ s/threads=(\d+)/$1/g; + $threads = $elem; + } else { + die "Unknown entry \"$elem\" when parsing \"$entry\" \n"; + } + } + + if (defined($time) and defined($threads) ) { + return ($time, $threads); + } else { + die "The accounting entry \"$entry\" did not contain all necessary attributes"; + } +} + +foreach $dir (@ARGV) { + + #$dir = $ARGV[0]; + print $dir + + ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ; + + $dir =~ s:/$::; # Remove trailing slash. + + + # Group the files into categories where all have the same base-name. + foreach $f (glob ("$dir/*.log")) { + $f_category = $f; + # do next expression twice; s///g doesn't work as they overlap. + $f_category =~ s:\.\d+\.(?!\d+):.*.:; + #$f_category =~ s:\.\d+\.:.*.:; + $fmap{$f_category} .= " $f"; + } +} + +foreach $c (sort (keys %fmap) ) { + $n = 0; + foreach $fgroup (split_hundreds($fmap{$c})) { + $n += `grep -w WARNING $fgroup | wc -l`; + } + if ($n != 0) { + print "$n warnings in $c\n" + } +} +foreach $c (sort (keys %fmap)) { + $n = 0; + foreach $fgroup (split_hundreds($fmap{$c})) { + $n += `grep -w ERROR $fgroup | wc -l`; + } + if ($n != 0) { + print "$n errors in $c\n" + } +} + +$supertotal_cpu_time=0.0; +$supertotal_clock_time=0.0; +$supertotal_threads=0.0; + +foreach $c (sort (keys %fmap)) { + $n = 0; + + $total_cpu_time=0.0; + $total_clock_time=0.0; + $total_threads=0.0; + foreach $fgroup (split_hundreds($fmap{$c})) { + $lines=`grep -P "# Accounting:? " $fgroup |sed 's/.* Accounting:* *//g'`; + + #print $lines ."\n"; + + @entries = split "\n", $lines; + + foreach $line (@entries) { + $time, $threads = parse_accounting_entry($line); + + $total_cpu_time += $time * $threads; + $total_threads += $threads; + if ( $time > $total_clock_time ) { + $total_clock_time = $time; + } + } + } + print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n"; + + $supertotal_cpu_time += $total_cpu_time; + $supertotal_clock_time += $total_clock_time; + $supertotal_threads += $total_threads; +} +print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n"; + diff --git a/egs/babel/s5d/local/syllab/ali_to_syllabs.sh b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh new file mode 100755 index 00000000000..8f0cb88771a --- /dev/null +++ b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh + +if [ -f ./path.sh ]; then . ./path.sh; fi + +if [ $# != 4 ]; then + echo "This script takes an ali directory and syllab lang dir and generates" + echo "syllabic transceription of the alignment" + echo "" + echo "Usage: $0 " + echo " e.g.: $0 data/train data/lang_syll exp/tri5_ali exp/tri5_ali_syll" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) " + + exit 1; +fi + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +data=$1 +lang=$2 +ali=$3 +out=$4 + + +for f in real_words.txt lex.words2syllabs.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory is probably not a syllable lang dir" && \ + echo "The file $lang/$f is missing" && \ + exit 1 +done + +for f in words.txt L.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +for f in $ali/num_jobs $ali/final.mdl $ali/ali.1.gz ; do + [ ! -f $f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +nj=$(cat $ali/num_jobs) +echo "Extracting phoneme sequences" +$cmd JOB=1:$nj $out/log/ali-to-phones.JOB.log \ + ali-to-phones $ali/final.mdl ark:"gunzip -c $ali/ali.JOB.gz|" ark:- \| \ + transcripts-to-fsts ark:- ark:$out/phones.JOB.fst || exit 1 + +echo "Composing with files in $lang to get syllable sequences" +$cmd JOB=1:$nj $out/log/get-syll-text.JOB.log \ + cat $data/split$nj/JOB/text \| sym2int.pl -f 2- --map-oov '\' $lang/real_words.txt \| \ + transcripts-to-fsts ark,t:- ark:- \|\ + fsttablecompose $lang/lex.words2syllabs.fst ark:- ark:-\| \ + fsts-project ark:- ark:-\| \ + fsttablecompose $lang/L.fst ark:- ark:- \|\ + fsttablecompose ark:$out/phones.JOB.fst ark:- ark:- \| \ + fsts-to-transcripts ark:- ark,t:"|int2sym.pl -f 2- $lang/words.txt > $out/text.JOB" +cat $out/text.* | sort > $out/text + +echo "Done" + diff --git a/egs/babel/s5d/local/syllab/create_syll_datadir.sh b/egs/babel/s5d/local/syllab/create_syll_datadir.sh new file mode 100755 index 00000000000..4c014285619 --- /dev/null +++ b/egs/babel/s5d/local/syllab/create_syll_datadir.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +help_message="Converts normal (with word level transcriptions) into syllabic\nExpects 4 parameters:\n" +# Begin configuration section. +boost_sil=1.0 +cmd=run.pl +nj=4 +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh +. ./path.sh + +if [ $# -ne 4 ] ; then + echo "$#" + echo -e "$help_message" + return 1; +fi + +input=$1 +word_lang=$2 +syll_lang=$3 +output=$4 + +[ ! -f exp/tri5/final.mdl ] && \ + echo "File exp/tri5/final.mdl must exist" && exit 1; + +[ ! -d $input/split$nj ] && utils/split_data.sh $input $nj + +utils/copy_data_dir.sh $input $output +touch $output/.plp.done +touch $output/.done + +if [ -f $input/text ] ; then + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $nj --cmd "$cmd" \ + $input $word_lang exp/tri5 exp/tri5_ali/align_$(basename $input) + + local/syllab/ali_to_syllabs.sh \ + --cmd "$cmd" \ + $input $syll_lang exp/tri5_ali/align_$(basename $input) \ + exp/tri5_ali_syll/align_$(basename $output) + + cp exp/tri5_ali_syll/align_$(basename $output)/text $output/text +fi + +exit 0 + + + diff --git a/egs/babel/s5d/local/syllab/create_syllables.pl b/egs/babel/s5d/local/syllab/create_syllables.pl new file mode 100755 index 00000000000..29a0a67dc8d --- /dev/null +++ b/egs/babel/s5d/local/syllab/create_syllables.pl @@ -0,0 +1,154 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 Johns Hopkins University (Author: Yenda Trmal) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use Getopt::Long; +use Data::Dumper; + +my $with_probs; +my $position_independent_phones; + +GetOptions("with-probs" => \$with_probs, + "position-independent-phones" => \$position_independent_phones +); + +my %SYLLS; +my %LEXICON; + +while (my $line = ) { + chomp $line; + my $word; my $prob; my $pron; + if ($with_probs) { + ($word, $prob, $pron) = split(" ", $line, 3); + } else { + ($word, $pron) = split(" ", $line, 2); + } + my @syllabs = split(/\s*\t\s*/, $pron); + + my $pronlen= scalar @syllabs; + my @extended_syllabs; + if (( $syllabs[0] =~ /x\<.*\>/) || ($word eq "SIL")) { + $SYLLS{$pron} +=1; + push @extended_syllabs, $pron; + } elsif ($pronlen == 1) { + my $syl; + my @phones=split " ", $syllabs[0]; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if (scalar(@phones) == 1 ) { + $syl = "$phones[0]_S"; + } else { + $phones2[0] = $phones[0] . "_B" unless $position_independent_phones; + $phones2[-1] = $phones[-1] ."_E" unless $position_independent_phones; + $syl = join(" ", @phones2); + } + } + $SYLLS{$syl} += 1; + push @extended_syllabs, $syl; + } else { + for (my $i = 0; $i lt $pronlen; $i+=1) { + my $syl; + my @phones=split " ", $syllabs[$i]; + my $first_index = 0; + my $last_index = scalar(@phones)-1; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if ($i == 0) { + $phones2[$first_index] = $phones[$first_index] . "_B"; + } elsif ( $i == ($pronlen - 1)) { + $phones2[$last_index] = $phones[$last_index] . "_E"; + } + $syl = join(" ", @phones2); + } + + push @extended_syllabs, $syl; + $SYLLS{$syl} += 1; + } + } + push @{$LEXICON{$word}}, \@extended_syllabs; +} + + +my %VOCAB; +my %COUNTS; +my %REV_VOCAB; +foreach my $syl (keys %SYLLS) { + my $seq=1; + my $word=$syl; + $word =~ s/_[^\s]*//g; + $word =~ s/ //g; + $word =~ s/[^a-zA-Z0-9<>-|\/]//g; + + my $wordx=$word; + $wordx .= "#$seq"; + while (exists $COUNTS{$wordx}) { + $seq += 1; + $wordx = "$word#$seq"; + } + + $COUNTS{$wordx} += $SYLLS{$syl}; + push @{$VOCAB{$wordx}}, $syl; + $REV_VOCAB{$syl} = $wordx; +} + +open(my $lex_f, "|sort -u > $ARGV[0]") or +die "Cannot open the file\"$ARGV[0]\" for writing"; + +foreach my $word (keys %VOCAB) { + print $lex_f "$word\t" . join("\t", @{$VOCAB{$word}}) . "\n"; +} + +close($lex_f); + +open(my $word2syll_f, "|sort -u > $ARGV[1]") or +die "Cannot open the file\"$ARGV[1]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + my @pron_in_syllabs; + foreach my $syl (@{$pron}) { + die "In word $word, pronunciation $pron: syllable $syl not in the lexicon!" unless exists $REV_VOCAB{$syl}; + push @pron_in_syllabs, $REV_VOCAB{$syl}; + } + print $word2syll_f "$word\t" . join(" ", @pron_in_syllabs) . "\n"; + } +} + +close($word2syll_f); + +open(my $word2ali_f, "|sort -u > $ARGV[2]") or +die "Cannot open the file\"$ARGV[2]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + print $word2ali_f "$word\t$word\t" . join(" ", @{$pron}) . "\n"; + } +} + +close($word2ali_f); + diff --git a/egs/babel/s5d/local/syllab/generate_phone_lang.sh b/egs/babel/s5d/local/syllab/generate_phone_lang.sh new file mode 100755 index 00000000000..fc21a23231b --- /dev/null +++ b/egs/babel/s5d/local/syllab/generate_phone_lang.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh +. ./path.sh + + + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +llang=$2 +lang=$3 +out=$4 +lout=$5 + +test -d $lout && rm -rf $lout +mkdir -p $lout +test -d $out && rm -rf $out +cp -R $lang $out +rm -rf $out/tmp $out/L.fst $out/L_disambig.fst $out/G.fst $out/words.txt +rm -rf $out/phones/word_boundary.{int,txt} + +echo "Generating lexicons.." +if [ -f $lang/phones/word_boundary.int ] ; then + echo "Position dependent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | sed 's/ /\t/g' | local/syllab/create_syllables.pl --with-probs\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | sed 's/ /\t/g' | local/syllab/create_syllables.pl \ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +else + echo "Position independent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs --position-independent-phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl --position_independent_phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +fi +cp $lout/lex.{syllabs2phones,words2syllabs,words2phones}.txt $out + +#We will fake the words.txt file +( + echo ""; + cut -f 1 $out/lex.syllabs2phones.txt; + echo -e "#0\n\n"; +) | nl -v 0 | awk '{print $2, $1}' > $out/syllabs.txt +ln -s syllabs.txt $out/words.txt +cp $lang/words.txt $out/real_words.txt + + +#Figure out the "OOV" token +oovword=$(cat $lang/oov.txt) +oovsyl=$(grep -w -F "$oovword" $out/lex.words2syllabs.txt | \ + awk '{if (NF == 2) { print $2;} + else {print "Error, oov word has more than one syllable "; exit 1;}}') + +echo $oovsyl > $out/oov.txt +grep -w -F "$oovsyl" $out/words.txt | awk '{print $2}' > $out/oov.int + +phone_disambig_symbol=$(grep '#0' $out/phones.txt | awk '{print $2}') +word_disambig_symbol=$(grep '#0' $out/words.txt | awk '{print $2}') + +if [ -f $out/phones/wdisambig_words.int ]; then + echo $word_disambig_symbol > $out/phones/wdisambig_words.int +fi + +optional_sil=$(cat $out/phones/optional_silence.txt) +utils/add_lex_disambig.pl $out/lex.syllabs2phones.txt $out/lex.syllabs2phones.disambig.txt > /dev/null +cat $out/lex.syllabs2phones.disambig.txt | sort -u > $lout/lexicon.txt + +echo " SIL" | cat - $lout/lexicon.txt | perl -ane 'print $F[0], " ", join(" ", @F), "\n";' | \ + sed 's/ #[0-9]$//g' > $out/phones/align_lexicon.txt +cat $lout/lexicon.txt | perl -ane 'print $F[0], "\t1.0\t", join(" ", @F[1..$#F]), "\n";' \ + > $lout/lexiconp.txt + +cat $out/phones/align_lexicon.txt |\ + sym2int.pl -f 3- $out/phones.txt |\ + sym2int.pl -f 1-2 $out/words.txt \ + > $out/phones/align_lexicon.int + +ndisambig=$(cat $out/phones/disambig.int | wc -l) +ndisambig=$[$ndisambig-1] + + +#Compile the lexicons +echo "Compiling words2syllables FST" +utils/make_lexicon_fst.pl $out/lex.words2syllabs.txt | \ + fstcompile --isymbols=$out/syllabs.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.words2syllabs.fst + +echo "Compiling L.fst and L_disambig.fst" +sil=$(cat $lang/phones/optional_silence.txt) +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.txt 0.5 $sil | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.fst +ln -s lex.syllabs2phones.fst $out/L.fst + + +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.disambig.txt 0.5 $sil '#'$ndisambig | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstaddselfloops "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |"|\ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.disambig.fst +ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst + +echo "Validating the output lang dir" +utils/validate_lang.pl $out || exit 1 + +sed -i'' 's/#1$//g' $lout/lexicon.txt +sed -i'' 's/#1$//g' $lout/lexiconp.txt + +echo "Done OK." +exit 0 diff --git a/egs/babel/s5d/local/syllab/generate_syllable_lang.sh b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh new file mode 100755 index 00000000000..db7b0902425 --- /dev/null +++ b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh +. ./path.sh + + + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +llang=$2 +lang=$3 +out=$4 +lout=$5 + +test -d $lout && rm -rf $lout +mkdir -p $lout +test -d $out && rm -rf $out +cp -R $lang $out +rm -rf $out/tmp $out/L.fst $out/L_disambig.fst $out/G.fst $out/words.txt +rm -rf $out/phones/word_boundary.{int,txt} + +echo "Generating lexicons.." +if [ -f $lang/phones/word_boundary.int ] ; then + echo "Position dependent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl \ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +else + echo "Position independent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs --position-independent-phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl --position_independent_phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +fi +cp $lout/lex.{syllabs2phones,words2syllabs,words2phones}.txt $out + +#We will fake the words.txt file +( + echo ""; + cut -f 1 $out/lex.syllabs2phones.txt; + echo -e "#0\n\n"; +) | nl -v 0 | awk '{print $2, $1}' > $out/syllabs.txt +ln -s syllabs.txt $out/words.txt +cp $lang/words.txt $out/real_words.txt + + +#Figure out the "OOV" token +oovword=$(cat $lang/oov.txt) +oovsyl=$(grep -w -F "$oovword" $out/lex.words2syllabs.txt | \ + awk '{if (NF == 2) { print $2;} + else {print "Error, oov word has more than one syllable "; exit 1;}}') + +echo $oovsyl > $out/oov.txt +grep -w -F "$oovsyl" $out/words.txt | awk '{print $2}' > $out/oov.int + +phone_disambig_symbol=$(grep '#0' $out/phones.txt | awk '{print $2}') +word_disambig_symbol=$(grep '#0' $out/words.txt | awk '{print $2}') + +optional_sil=$(cat $out/phones/optional_silence.txt) +utils/add_lex_disambig.pl $out/lex.syllabs2phones.txt $out/lex.syllabs2phones.disambig.txt > /dev/null +cat $out/lex.syllabs2phones.disambig.txt | sort -u > $lout/lexicon.txt + +if [ -f $out/phones/wdisambig_words.int ]; then + echo $word_disambig_symbol > $out/phones/wdisambig_words.int +fi + +echo " SIL" | cat - $lout/lexicon.txt | perl -ane 'print $F[0], " ", join(" ", @F), "\n";' | \ + sed 's/ #[0-9]$//g' > $out/phones/align_lexicon.txt +cat $lout/lexicon.txt | perl -ane 'print $F[0], "\t1.0\t", join(" ", @F[1..$#F]), "\n";' \ + > $lout/lexiconp.txt + +cat $out/phones/align_lexicon.txt |\ + sym2int.pl -f 3- $out/phones.txt |\ + sym2int.pl -f 1-2 $out/words.txt \ + > $out/phones/align_lexicon.int + +ndisambig=$(cat $out/phones/disambig.int | wc -l) +ndisambig=$[$ndisambig-1] + + +#Compile the lexicons +echo "Compiling words2syllables FST" +utils/make_lexicon_fst.pl $out/lex.words2syllabs.txt | \ + fstcompile --isymbols=$out/syllabs.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.words2syllabs.fst + +echo "Compiling L.fst and L_disambig.fst" +sil=$(cat $lang/phones/optional_silence.txt) +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.txt 0.5 $sil | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.fst +ln -s lex.syllabs2phones.fst $out/L.fst + + +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.disambig.txt 0.5 $sil '#'$ndisambig | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstaddselfloops "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |"|\ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.disambig.fst +ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst + +echo "Validating the output lang dir" +utils/validate_lang.pl $out || exit 1 + +sed -i'' 's/#1$//g' $lout/lexicon.txt +sed -i'' 's/#1$//g' $lout/lexiconp.txt + +echo "Done OK." +exit 0 diff --git a/egs/babel/s5d/local/syllab/lattice_word2syll.sh b/egs/babel/s5d/local/syllab/lattice_word2syll.sh new file mode 100755 index 00000000000..63e9114875d --- /dev/null +++ b/egs/babel/s5d/local/syllab/lattice_word2syll.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +acwt=0.1 +beam=8 +# End configuration section +echo $0 "$@" +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1; shift; +ilang=$1; shift; +olang=$1; shift; +input=$1; shift +output=$1; shift + +nj=$(cat $input/num_jobs) + +mkdir -p $output/log + + +if [ -f $olang/lex.words2syllabs.fst ] ; then + fstinvert $olang/lex.words2syllabs.fst | fstreverse | \ + fstminimize --allow_nondet | fstreverse > $output/L.fst + + $cmd JOB=1:$nj $output/log/convert.JOB.log \ + lattice-push --push-strings ark:"gunzip -c $input/lat.JOB.gz|" ark:- \| \ + lattice-lmrescore --lm-scale=-1.0 ark:- "fstproject --project_output=true $ilang/G.fst|" ark:- \| \ + lattice-compose ark:- $output/L.fst ark:- \| \ + lattice-determinize-pruned --beam=8 --acoustic-scale=0.1 ark:- ark:- \| \ + lattice-minimize ark:- "ark:|gzip -c > $output/lat.JOB.gz" + #lattice-minimize ark:- ark:- \| \ + #lattice-lmrescore --lm-scale=1.0 ark:- "fstproject --project_output=true $olang/G.fst|" "ark:|gzip -c > $output/lat.JOB.gz" +else + #for phonemes.... (IIRC) + fstreverse $olang/L.fst | fstminimize | fstreverse > $output/L.fst + $cmd JOB=1:$nj $output/log/convert.JOB.log \ + lattice-push --push-strings ark:"gunzip -c $input/lat.JOB.gz|" ark:- \| \ + lattice-lmrescore --lm-scale=-1.0 ark:- "fstproject --project_output=true $ilang/G.fst|" ark:- \| \ + lattice-align-words $ilang/phones/word_boundary.int $input/../final.mdl ark:- ark:- \| \ + lattice-to-phone-lattice --replace-words $input/../final.mdl ark:- ark:- \| \ + lattice-align-phones $input/../final.mdl ark:- ark:- \| \ + lattice-compose ark:- $output/L.fst ark:- \|\ + lattice-determinize-pruned --beam=$beam --acoustic-scale=$acwt ark:- ark:-\| \ + lattice-minimize ark:- "ark:|gzip -c > $output/lat.JOB.gz" + #lattice-lmrescore --lm-scale=1.0 ark:- "fstproject --project_output=true $olang/G.fst|" ark:"|gzip -c > $output/lat.JOB.gz" +fi + + #lattice-1best ark:- ark:-| nbest-to-linear ark:- ark:/dev/null ark,t:- \ + #utils/int2sym.pl -f 2- $olang/words.txt | head +cp $input/num_jobs $output/num_jobs + diff --git a/egs/babel/s5d/local/syllab/map_prons_to_syllables.pl b/egs/babel/s5d/local/syllab/map_prons_to_syllables.pl new file mode 100755 index 00000000000..df3ce93ce4e --- /dev/null +++ b/egs/babel/s5d/local/syllab/map_prons_to_syllables.pl @@ -0,0 +1,61 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use GetOpt::Long; + +my $probs; + +GetOptions ("--with-probs" => \$probs) + +my $syllab_lexicon=$ARGV[0]; + +my %PRON2SYL; + + +open(my $f, $syllab_lexicon) or die "Cannot open file $syllab_lexicon\n"; +while (my $line = <$f>) { + chomp $line; + + my $syll; + my $pron; + my $prob; + + if ($probs) { + $syll, $prob, $pron = split " ", $line, 3; + } else { + $syll, $pron = split " ", $line, 2; + } + $PRON2SYL{$pron} = $syll; +} + +while (my $line = ) { + chomp $line; + my ($word, $pron) = split(/\s*\t\s*/, $line, 2); + my @syllabs = split(/\s*\t\s*/, $pron); + + my @syl_pron; + foreach my $syl (@syllabs) { + die "in $line unknown syllable $syl" unless exists $PRON2SYL{$syl}; + push @syl_pron, $PRON2SYL{$syl}; + } + print "$word\t" . join(" ", @syl_pron) . "\n"; + +} diff --git a/egs/babel/s5d/local/syllab/run_phones.sh b/egs/babel/s5d/local/syllab/run_phones.sh new file mode 100755 index 00000000000..7c4a13c61f9 --- /dev/null +++ b/egs/babel/s5d/local/syllab/run_phones.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=0 +# End configuration section +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh +. ./path.sh + +. ./conf/common_vars.sh +. ./lang.conf + +if [ $# -ne 1 ] ; then + echo "Invalid number of parameters" + exit 1 +fi + +idir=$1 + +if [ ! -d "$idir" ] ; then + echo "The directory $idir does not exist" + exit 1 +fi + +idata=${idir##*/} + + +if [ "$idata" == ${idata%%.*} ]; then + odata=${idata%%.*}.phn +else + odata=${idata%%.*}.phn.${idata#*.} +fi + +if [ $stage -le -1 ] ; then + local/syllab/generate_phone_lang.sh \ + data/train data/local/ data/lang data/lang.phn data/local/dict.phn + + local/syllab/ali_to_syllabs.sh \ + data/train data/lang.phn exp/tri5_ali exp/tri5_ali_phn + + + utils/copy_data_dir.sh data/train data/train.phn + cp exp/tri5_ali_phn/text data/train.phn/text + + #Create syllab LM + local/train_lms_srilm.sh \ + --words-file data/lang.phn/words.txt --train-text data/train.phn/text \ + --oov-symbol "`cat data/lang.phn/oov.txt`" data data/srilm.phn + + local/arpa2G.sh data/srilm.phn/lm.gz data/lang.phn/ data/lang.phn/ +fi + +if [ $stage -le 0 ] && [ -f "$idir/text" ] ; then + #Create dev10h.phn.pem dir + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + $idir data/lang exp/tri5 exp/tri5_ali/align_$idata + + local/syllab/ali_to_syllabs.sh \ + --cmd "$decode_cmd" \ + $idir data/lang.phn exp/tri5_ali/align_$idata exp/tri5_ali_phn/align_$idata +fi + +if [ $stage -le 1 ] ; then + utils/copy_data_dir.sh data/$idata data/$odata + [ -f exp/tri5_ali_phn/align_$idata/text ] && \ + cp exp/tri5_ali_phn/align_$idata/text data/$odata/text + touch data/$odata/.plp.done + touch data/$odata/.done +fi + + diff --git a/egs/babel/s5d/local/syllab/run_syllabs.sh b/egs/babel/s5d/local/syllab/run_syllabs.sh new file mode 100755 index 00000000000..7366ac9ad35 --- /dev/null +++ b/egs/babel/s5d/local/syllab/run_syllabs.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=0 +# End configuration section +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh +. ./path.sh + +. ./conf/common_vars.sh +. ./lang.conf + +if [ $# -ne 1 ] ; then + echo "Invalid number of parameters" + exit 1 +fi + +idir=$1 + +if [ ! -d "$idir" ] ; then + echo "The directory $idir does not exist" + exit 1 +fi + +idata=${idir##*/} + +if [ "$idata" == ${idata%%.*} ]; then + odata=${idata%%.*}.syll +else + odata=${idata%%.*}.syll.${idata#*.} +fi + +if [ $stage -le -1 ] ; then + local/syllab/generate_syllable_lang.sh \ + data/train data/local/ data/lang data/lang.syll data/local/dict.syll + + local/syllab/ali_to_syllabs.sh \ + data/train data/lang.syll exp/tri5_ali exp/tri5_ali_syll + + + utils/copy_data_dir.sh data/train data/train.syll + cp exp/tri5_ali_syll/text data/train.syll/text + + #Create syllab LM + local/train_lms_srilm.sh \ + --words-file data/lang.syll/words.txt --train-text data/train.syll/text \ + --oov-symbol "`cat data/lang.syll/oov.txt`" data data/srilm.syll + + local/arpa2G.sh data/srilm.syll/lm.gz data/lang.syll/ data/lang.syll/ +fi + +if [ $stage -le 0 ] && [ -f "$idir/text" ]; then + #Create dev10h.syll.pem dir + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + $idir data/lang exp/tri5 exp/tri5_ali/align_$idata + + local/syllab/ali_to_syllabs.sh \ + --cmd "$decode_cmd" \ + $idir data/lang.syll exp/tri5_ali/align_$idata exp/tri5_ali_syll/align_$idata +fi + +if [ $stage -le 1 ] ; then + utils/copy_data_dir.sh data/$idata data/$odata + [ -f exp/tri5_ali_syll/align_$idata/text ] && \ + cp exp/tri5_ali_syll/align_$idata/text data/$odata/text + touch data/$odata/.plp.done + touch data/$odata/.done +fi + + diff --git a/egs/babel/s5d/local/train_g2p.sh b/egs/babel/s5d/local/train_g2p.sh new file mode 100755 index 00000000000..08be0014656 --- /dev/null +++ b/egs/babel/s5d/local/train_g2p.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +iters=5 +stage=0 +encoding='utf-8' +remove_tags=true +only_words=true +icu_transform="Any-Lower" +cmd=run.pl +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e + +if [ $# != 2 ]; then + echo "Usage: $0 [options] " + echo " where is the training lexicon (one pronunciation per " + echo " word per line) and is directory where the models will " + echo " be stored" + echo "e.g.: train_g2p.sh data/local/lexicon.txt exp/g2p/" + echo "" + echo "main options (for others, see top of script file)" + echo " --iters # How many iterations. Relates to N-ngram order" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +lexicon=$1 +wdir=$2 + + +mkdir -p $wdir/log + +[ ! -f $lexicon ] && echo "$0: Training lexicon does not exist." && exit 1 + +if $only_words ; then + cat $lexicon | sed 's/^<.*>.*$//g' | sed 's/^#.*//g' > $wdir/lexicon_onlywords.txt + lexicon=$wdir/lexicon_onlywords.txt +fi + +if $remove_tags ; then + cat $lexicon |\ + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' > $wdir/lexicon_notags.txt + lexicon=$wdir/lexicon_notags.txt +fi + +if [ ! -z $icu_transform ] ; then + paste \ + <(cat $lexicon | awk '{print $1}' | uconv -f $encoding -t $encoding -x "$icu_transform") \ + <(cat $lexicon | sed 's/^[^ \t][^ \t]*[ \t]//g') \ + > $wdir/lexicon_transformed.txt + lexicon=$wdir/lexicon_transformed.txt +fi + +if ! g2p=`which g2p.py` ; then + echo "Sequitur was not found !" + echo "Go to $KALDI_ROOT/tools and execute extras/install_sequitur.sh" + exit 1 +fi + +echo "Training the G2P model (iter 0)" + +if [ $stage -le 0 ]; then + $cmd $wdir/log/g2p.0.log \ + g2p.py -S --encoding $encoding --train $lexicon --devel 5% --write-model $wdir/g2p.model.0 +fi + +for i in `seq 0 $(($iters-2))`; do + + echo "Training the G2P model (iter $[$i + 1] )" + + if [ $stage -le $i ]; then + $cmd $wdir/log/g2p.$(($i + 1)).log \ + g2p.py -S --encoding $encoding --model $wdir/g2p.model.$i --ramp-up --train $lexicon --devel 5% --write-model $wdir/g2p.model.$(($i+1)) + fi + +done + +! (set -e; cd $wdir; ln -sf g2p.model.$[$iters-1] g2p.model.final ) && echo "Problem finalizing training... " && exit 1 + +if [ $stage -le $(($i + 2)) ]; then + echo "Running test..." + $cmd $wdir/log/test.log \ + g2p.py --encoding $encoding --model $wdir/g2p.model.final --test $lexicon +fi + diff --git a/egs/babel/s5d/local/train_lms_srilm.sh b/egs/babel/s5d/local/train_lms_srilm.sh new file mode 100755 index 00000000000..cf357260d8c --- /dev/null +++ b/egs/babel/s5d/local/train_lms_srilm.sh @@ -0,0 +1,229 @@ +#!/bin/bash +export LC_ALL=C + +words_file= +train_text= +dev_text= +oov_symbol="" + +echo "$0 $@" + +[ -f path.sh ] && . ./path.sh +. ./utils/parse_options.sh || exit 1 + +echo "-------------------------------------" +echo "Building an SRILM language model " +echo "-------------------------------------" + +if [ $# -ne 2 ] ; then + echo "Incorrect number of parameters. " + echo "Script has to be called like this:" + echo " $0 [switches] " + echo "For example: " + echo " $0 data data/srilm" + echo "The allowed switches are: " + echo " words_file= word list file -- data/lang/words.txt by default" + echo " train_text= data/train/text is used in case when not specified" + echo " dev_text= last 10 % of the train text is used by default" + echo " oov_symbol=> symbol to use for oov modeling -- by default" + exit 1 +fi + +datadir=$1 +tgtdir=$2 +outlm=lm.gz + + +##End of configuration +loc=`which ngram-count`; +if [ -z $loc ]; then + if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + else + sdir=`pwd`/../../../tools/srilm/bin/i686 + fi + if [ -f $sdir/ngram-count ]; then + echo Using SRILM tools from $sdir + export PATH=$PATH:$sdir + else + echo You appear to not have SRILM tools installed, either on your path, + echo or installed in $sdir. See tools/install_srilm.sh for installation + echo instructions. + exit 1 + fi +fi + +# Prepare the destination directory +mkdir -p $tgtdir + +for f in $words_file $train_text $dev_text; do + [ ! -s $f ] && echo "No such file $f" && exit 1; +done + +[ -z $words_file ] && words_file=$datadir/lang/words.txt +if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then + nr=`cat $train_text | wc -l` + nr_dev=$(($nr / 10 )) + nr_train=$(( $nr - $nr_dev )) + orig_train_text=$train_text + head -n $nr_train $train_text > $tgtdir/train_text + tail -n $nr_dev $train_text > $tgtdir/dev_text + + train_text=$tgtdir/train_text + dev_text=$tgtdir/dev_text + echo "Using words file: $words_file" + echo "Using train text: 9/10 of $orig_train_text" + echo "Using dev text : 1/10 of $orig_train_text" +elif [ ! -z "$train_text" ] && [ ! -z "$dev_text" ] ; then + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" + train_text=$train_text + dev_text=$dev_text +else + train_text=$datadir/train/text + dev_text=$datadir/dev2h/text + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" +fi + + + +# Extract the word list from the training dictionary; exclude special symbols +sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' | grep -v -F "$oov_symbol" > $tgtdir/vocab +if (($?)); then + echo "Failed to create vocab from $words_file" + exit 1 +else + # wc vocab # doesn't work due to some encoding issues + echo vocab contains `cat $tgtdir/vocab | perl -ne 'BEGIN{$l=$w=0;}{split; $w+=$#_; $w++; $l++;}END{print "$l lines, $w words\n";}'` +fi + +# Kaldi transcript files contain Utterance_ID as the first word; remove it +cat $train_text | cut -f2- -d' ' > $tgtdir/train.txt +if (($?)); then + echo "Failed to create $tgtdir/train.txt from $train_text" + exit 1 +else + echo "Removed first word (uid) from every line of $train_text" + # wc text.train train.txt # doesn't work due to some encoding issues + echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` +fi + +# Kaldi transcript files contain Utterance_ID as the first word; remove it +cat $dev_text | cut -f2- -d' ' > $tgtdir/dev.txt +if (($?)); then + echo "Failed to create $tgtdir/dev.txt from $dev_text" + exit 1 +else + echo "Removed first word (uid) from every line of $dev_text" + # wc text.train train.txt # doesn't work due to some encoding issues + echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` +fi + +echo "-------------------" +echo "Good-Turing 2grams" +echo "-------------------" +ngram-count -lm $tgtdir/2gram.gt01.gz -gt1min 0 -gt2min 1 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/2gram.gt02.gz -gt1min 0 -gt2min 2 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 2grams" +echo "-------------------" +ngram-count -lm $tgtdir/2gram.kn01.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/2gram.kn02.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Good-Turing 3grams" +echo "-------------------" +ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 3grams" +echo "-------------------" +ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + + +echo "-------------------" +echo "Good-Turing 4grams" +echo "-------------------" +ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 4grams" +echo "-------------------" +ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +if [ ! -z ${LIBLBFGS} ]; then + #please not that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault + #instead of that, we simply output the model in the maxent format and convert it using the "ngram" + echo "-------------------" + echo "Maxent 2grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 2 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/2gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 3grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 4grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1 + +fi + + +echo "--------------------" +echo "Computing perplexity" +echo "--------------------" +( + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done +) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt + +echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " + +#This will link the lowest perplexity LM as the output LM. +#ln -sf $tgtdir/`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` $outlm + +#A slight modification of the previous approach: +#We look at the two lowest perplexity LMs and use a 3gram LM if one of the two, even if the 4gram is of lower ppl +nof_trigram_lm=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | wc -l` +if [[ $nof_trigram_lm -eq 0 ]] ; then + lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` +elif [[ $nof_trigram_lm -eq 2 ]] ; then + lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` +else #exactly one 3gram LM + lmfilename=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' '` +fi +(cd $tgtdir; ln -sf `basename $lmfilename` $outlm ) + diff --git a/egs/babel/s5d/local/txt_to_rttm.pl b/egs/babel/s5d/local/txt_to_rttm.pl new file mode 100755 index 00000000000..0e128520880 --- /dev/null +++ b/egs/babel/s5d/local/txt_to_rttm.pl @@ -0,0 +1,108 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + +Allowed options: + --flen : Frame length (float, default = 0.1) + --symtab : Symbol table (string, default = "") + --segment : Segment file from Kaldi (string, default = "") +EOU + +my $symtab = ""; +my $segment = ""; +my $flen = 0.01; +GetOptions('symtab=s' => \$symtab, + 'segment=s' => \$segment, + 'flen=f' => \$flen); + +if ($symtab) { + if (!open(S, "<$symtab")) {print "Fail to open symbol table: $symtab\n"; exit 1;} +} + +if ($segment) { + if (!open(SEG, "<$segment")) {print "Fail to open segment file: $segment\n"; exit 1;} +} + +if(@ARGV != 2) { + die $Usage; +} + +# Get parameters +my $filein = shift @ARGV; +my $fileout = shift @ARGV; + +# Get input source +my $source = ""; +if ($filein eq "-") { + $source = "STDIN"; +} else { + if (!open(I, "<$filein")) {print "Fail to open input file: $filein\n"; exit 1;} + $source = "I"; +} + +# Open output fst list +my $sourceout = ""; +if ($fileout ne "-") { + if (!open(O, ">$fileout")) {print "Fail to open output file: $fileout\n"; exit 1;} + $sourceout = "O"; +} + +# Get symbol table and start time +my %sym = (); +my %tbeg = (); +my %uid2utt = (); +if ($symtab) { + while() { + chomp; + my @col = split(" ", $_); + @col == 2 || die "Bad number of columns in $symtab\n"; + $sym{$col[1]} = $col[0]; + } +} + +if ($segment) { + while() { + chomp; + my @col = split(" ", $_); + @col == 4 || die "Bad number of columns in $segment\n"; + $tbeg{$col[0]} = $col[2]; + $uid2utt{$col[0]} = $col[1]; + } +} + +# Processing +while (<$source>) { + chomp; + my @col = split(" ", $_); + my $uid = shift @col; + my $words = join(" ", @col); + @col = split(/;/, $words); + + my $utt = $uid; + my $sta = 0; + if ($segment) { + $utt = $uid2utt{$uid}; + $sta = $tbeg{$uid}; + } + foreach (@col) { + my @subcol = split(" ", $_); + @subcol == 2 || die "Bad number of columns in word-frame pair\n"; + my $word = $subcol[0]; + my $dur = $subcol[1]*$flen; + my $lex = "LEXEME"; + if ($symtab) {$word = $sym{$word};} + if ($word =~ m/^<.*>$/) {$lex = "NON-LEX";} + eval "print $sourceout \"$lex $utt 1 $sta $dur $word \n\""; + $sta += $dur; + } +} + +if ($symtab) {close(S);} +if ($segment) {close(SEG);} +if ($filein ne "-") {close(I);} +if ($fileout ne "-") {close(O);} diff --git a/egs/babel/s5d/local/uem_ctm2segments.pl b/egs/babel/s5d/local/uem_ctm2segments.pl new file mode 100755 index 00000000000..658690172c8 --- /dev/null +++ b/egs/babel/s5d/local/uem_ctm2segments.pl @@ -0,0 +1,232 @@ +#!/usr/bin/env perl +use Getopt::Long; + +################################################################################ +# Convert a CTM file produced by decoding a long segment, typically several min +# long, into a sequence of shorter segments of duration 10-15 seconds. Produce +# a segments file of the form used for Kaldi training/decoding +# +# utteranceID recordingID startTime endTime +# +# The desired outcome is that the long (input) segment will be recursively cut +# into shorter segments at the location of long silences, leaving (say) 0.5 sec +# of silence at each end of the two resulting shorter segments, until all the +# segments are of the desired duration. +# +# NOTE: It is assumed that the CTM file provides time information at 0.01 sec +# resolution, and that any missing segments in the CTM correspond to the +# optional silence model, whose output token was removed by the sequence +# +# lattice-align-words --> lattice-to-ctm-conf --> raw CTM file +# + $ctmTimeStep = 0.01; # Could be changed if needed by --timeStep +# +# It is further assumed that the explicit silence token (word) is +# + $silence = ""; +# +# This could be changed using the --silence option if needed. +# +# Another option is the minimum silence duration to permit segmentation +# + $minSilence = 1.02; # seconds +# +# Maximum allowed segment length, could be changed through --maxSegLen +# + $maxSegLen = 30; #seconds +# +# Default segment length, used when the ctm segment is too long +# + $defaultSegLen = 10; # seconds +################################################################################ + +GetOptions("ctmTimeStep=f" => \$ctmTimeStep, + "minSilence=f" => \$minSilence, + "silence=s" => \$silence, + "maxSegLen=f" => \$maxSegLen, + "defaultSegLen=f" => \$defaultSegLen); + +if ($#ARGV == 1) { + $ctmFile = $ARGV[0]; + $segmentsFile = $ARGV[1]; + print STDERR ("$0: $ctmFile $segmentsFile\n"); + print STDERR ("\t--ctmTimeStep = $ctmTimeStep\n") unless ($ctmTimeStep == 0.01); + print STDERR ("\t--silence = $silence\n") unless ($silence eq ""); + print STDERR ("\t--maxSegLen = $maxSegLen\n") unless ($maxSegLen == 30); + print STDERR ("\t--defaultSegLen = $defaultSegLen\n") unless ($defaultSegLen == 10); + +} else { + print STDERR ("Usage: $0 [--options] inputCTM outputSegments\n"); + print STDERR ("\t--ctmTimeStep %f Time resolution of CTM file (default 0.01 sec)\n"); + print STDERR ("\t--silence %s Word token for silence (default )\n"); + print STDERR ("\t--maxSegLen %f Max allowed segment length (default 30 sec)\n"); + print STDERR ("\t--defaultSegLen %f Default segment length (default 10 sec)\n"); + exit(1); +} + +open (CTM, $ctmFile) +|| die "Unable to open input CTM file $ctmFile for reading"; +$numRecordings = $numWords = $n = 0; +$prevFileName = ""; +$prevChannel = ""; +$prevEndTime = 0.00; +$prevConfidence = 0.00; +while ($line=) { + @token = split(/\s+/, $line); + unless (($#token==4)||($#token==5)) { + # CTM should have 5 or 6 tokens per line + # audioFile channel startTime duration word [confidence] + print STDERR ("$0 WARNING: unparsable line $. in ctm file: $line"); + next; + } + if ( ( ($token[0] ne $prevFileName) || ($token[1] ne $prevChannel) ) && ($prevFileName ne "") ) { + break if ($n==0); + ######################################################################## + # This is the next audio file; create segments for the previous file + ######################################################################## + print STDERR ("Audio file $prevFileName contains $n word tokens\n"); + printf STDERR ("\t%d alternating speech/silence segments after mergers\n", &process_this_audio_file); + ######################################################################## + # Done writing out the segments for the previous audio recording + ######################################################################## + $numRecordings++; + # Reset to process the next file + $prevFileName = ""; + $prevChannel = ""; + $prevEndTime = 0.00; + $prevConfidence = 0.00; + $n=0; + } + # Otherwise, this is the next word in the same (i.e. previous) audio file + if ( ($token[2]-$prevEndTime) > $ctmTimeStep ) { + # There is a missing segment in the CTM, presumably silence + $fileName[$n] = $token[0]; + $channel[$n] = $token[1]; + $startTime[$n] = $prevEndTime; + $endTime[$n] = $token[2]; + $wordToken[$n] = $silence; + $confidence[$n]= $prevConfidence; + $n++; + } + # Record this token for processing later + $prevFileName = $fileName[$n] = $token[0]; + $prevChannel = $channel[$n] = $token[1]; + $startTime[$n] = $token[2]; + $prevEndTime = $endTime[$n] = ($token[2]+$token[3]); + $wordToken[$n] = $token[4]; + $prevConfidence = $confidence[$n] = $token[5] if ($#token==5); + $n++; + $numWords++; +} +close(CTM); +if ($n>0) { + # This is the last audio file; create segments for the file + print STDERR ("Audio file $prevFileName contains $n word tokens\n"); + printf STDERR ("\t%d alternating speech/silence segments after mergers\n", &process_this_audio_file); + # Done writing out the segments for the last audio recording + $numRecordings++; +} +print STDERR ("Read $numRecordings filenames containing $numWords words from $ctmFile\n"); + + +sub process_this_audio_file { + # Merge consecutive speech/silence tokens to create candidate "segments" + $s=0; + $segmentStart[$s] = 0.00; + $segmentType[$s] = $silence; + $segmentEnd[$s] = -1.0; + for ($i=0; $i<$n; $i++) { + $sTime = $startTime[$i]; + $word = $wordToken[$i]; + $eTime = $endTime[$i]; + if ( ($word eq $silence) && ($segmentType[$s] ne $silence) + || ($word ne $silence) && ($segmentType[$s] eq $silence) ) { + $segmentEnd[$s] = $sTime; + $s++; + $segmentStart[$s] = $sTime; + $segmentType[$s] = ($word eq $silence) ? $silence : "" ; + } + $segmentEnd[$s] = $eTime; + } + # Merge speech segments separated by silence of less than some minimum duration + # Note: there must be at least two segments for mergers to be an option, i.e. $s>0. + if ($s>0) { + if ( ($segmentType[0] eq $silence) + && ( ($segmentEnd[0]-$segmentStart[0]) < $minSilence) ) { + die "Something wrong: initial silence segment must have a speech segment following it" + unless ($segmentType[1] eq ""); + $segmentType[0] = $segmentType[1]; + $segmentEnd[0] = $segmentEnd[1]; + for ($j=2; $j<=$s; $j++) { + $segmentStart[$j-1] = $segmentStart[$j]; + $segmentType[$j-1] = $segmentType[$j]; + $segmentEnd[$j-1] = $segmentEnd[$j]; + } + $s--; # one silence segment removed + } + for ($i=1; $i<$s; $i++) { + if ( ($segmentType[$i] eq $silence) + && ( ($segmentEnd[$i]-$segmentStart[$i]) < $minSilence) ) { + die "Something wrong: internal silence segment must have speech segments on eithe side" + unless ( ($segmentType[$i-1] eq "") && ($segmentType[$i+1] eq "") ); + $segmentEnd[$i-1] = $segmentEnd[$i+1]; + for ($j=$i+2; $j<=$s; $j++) { + $segmentStart[$j-2] = $segmentStart[$j]; + $segmentType[$j-2] = $segmentType[$j]; + $segmentEnd[$j-2] = $segmentEnd[$j]; + } + $s -= 2; # one silence removed, two speech segments merged + $i--; # backtrack, to process the segment that just moved into position $i + } + } + if ( ($segmentType[$s] eq $silence) + && ( ($segmentEnd[$s]-$segmentStart[$s]) < $minSilence) ) { + die "Something wrong: final silence segment must have a speech segment preceding it" + unless ($segmentType[$s-1] eq ""); + $segmentEnd[$s-1] = $segmentEnd[$s]; + $s--; # one silence segment removed + } + } + # Print segment markers for debugging + $num = $s + 1; + for ($i=0; $i<=$s; $i++) { +# printf STDOUT ("%s %s %.2f %.2f %s\n", +# printf STDOUT ("%s %s %.2f %.2f\n", +# sprintf ("%s_%06i",$prevFileName,(100*$segmentStart[$i])), +# $prevFileName, +# $segmentStart[$i], +# $segmentEnd[$i], $segmentType[$i]); +# ($segmentStart[$i] - (($i==0) ? 0.0 : 0.5)), +# ($segmentEnd[$i] + (($i==$s) ? 0.0 : 0.5))) unless ($segmentType[$i] eq $silence); + if ($segmentType[$i] ne $silence) { + if (($segmentEnd[$i] - $segmentStart[$i]) > $maxSegLen) { + $fakeStart = $segmentStart[$i] - (($i==0) ? 0.0 : 0.5); + while (($segmentEnd[$i] - $fakeStart) > $defaultSegLen) { + printf STDOUT ("%s %s %.2f %.2f\n", + sprintf ("%s_%06i",$prevFileName,(100*$fakeStart)), + $prevFileName, + $fakeStart, + $fakeStart + $defaultSegLen); + $fakeStart += $defaultSegLen; + $num += 2; + } + if (($segmentEnd[$i] - $fakeStart) > 0) { + printf STDOUT ("%s %s %.2f %.2f\n", + sprintf ("%s_%06i",$prevFileName,(100*$fakeStart)), + $prevFileName, + $fakeStart, + ($segmentEnd[$i] + (($i==$s) ? 0.0 : 0.5))); + } else { + $num -= 2; + } + } else { + printf STDOUT ("%s %s %.2f %.2f\n", + sprintf ("%s_%06i",$prevFileName,(100*$segmentStart[$i])), + $prevFileName, + ($segmentStart[$i] - (($i==0) ? 0.0 : 0.5)), + ($segmentEnd[$i] + (($i==$s) ? 0.0 : 0.5))); + } + } + } + $num; +} diff --git a/egs/babel/s5d/nnet3_examples.sh b/egs/babel/s5d/nnet3_examples.sh new file mode 100644 index 00000000000..82661140d3c --- /dev/null +++ b/egs/babel/s5d/nnet3_examples.sh @@ -0,0 +1,32 @@ +# The results shown below are for Telugu fullLP condition +#TDNN + local/nnet3/run_tdnn.sh \ + --affix "6layer_r512" \ + --splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -7,2 0 0 " \ + --relu-dim 512 || exit 1; + + # I modified the TDNN scripts to run for 5 epochs, however these results are with 3 epoch training + ./run-4-anydecode.sh --skip-kws true --dir dev10h.seg --nnet3-model nnet3/tdnn_6layer_r512_sp + #%WER 68.4 | 22131 40145 | 36.3 45.9 17.9 4.7 68.4 31.9 | -1.082 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.seg/score_10/dev10h.seg.ctm.sys + ./run-4-anydecode.sh --skip-kws true --dir dev10h.pem --nnet3-model nnet3/tdnn_6layer_r512_sp + #%WER 67.1 | 22131 40145 | 36.4 45.9 17.8 3.5 67.1 29.6 | -0.902 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys + + + + +#LSTM + local/nnet3/run_lstm.sh + + ./run-4-anydecode.sh --skip-kws true --dir dev10h.seg --is-rnn true --nnet3-model nnet3/lstm_sp --extra-left-context 40 --frames-per-chunk 20 + #%WER 68.0 | 22131 40145 | 38.2 44.8 17.0 6.2 68.0 33.5 | -1.491 | exp/nnet3/lstm_sp/decode_dev10h.seg/score_10/dev10h.seg.ctm.sys + ./run-4-anydecode.sh --skip-kws true --dir dev10h.pem --is-rnn true --nnet3-model nnet3/lstm_sp --extra-left-context 40 --frames-per-chunk 20 + #%WER 65.1 | 22131 40145 | 39.2 45.9 14.9 4.3 65.1 28.8 | -1.299 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys + + +#BLSTM + local/nnet3/run_blstm.sh + ./run-4-anydecode.sh --skip-kws true --dir dev10h.seg --is-rnn true --nnet3-model nnet3/lstm_bidirectional_sp --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 + #%WER 67.1 | 22131 40145 | 38.8 44.9 16.3 5.9 67.1 33.6 | -1.737 | exp/nnet3/lstm_birectional_cell512_sp/decode_dev10h.seg/score_10/dev10h.seg.ctm.sys + ./run-4-anydecode.sh --skip-kws true --dir dev10h.pem --is-rnn true --nnet3-model nnet3/lstm_bidirectional_sp --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 + #%WER 64.2 | 22131 40145 | 39.8 46.0 14.2 4.0 64.2 29.0 | -1.548 | exp/nnet3/lstm_birectional_cell512_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys + diff --git a/egs/babel/s5d/path.sh b/egs/babel/s5d/path.sh new file mode 100755 index 00000000000..212c5e15d55 --- /dev/null +++ b/egs/babel/s5d/path.sh @@ -0,0 +1,7 @@ +export KALDI_ROOT=/export/a09/jtrmal/kaldi/ +. $KALDI_ROOT/tools/env.sh +. /export/a09/jtrmal/kaldi-current/tools/env.sh +. /export/babel/data/software/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/online2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +export LC_ALL=C + diff --git a/egs/babel/s5d/results/RESULTS.105-turkish.flp b/egs/babel/s5d/results/RESULTS.105-turkish.flp new file mode 100644 index 00000000000..737d0893abe --- /dev/null +++ b/egs/babel/s5d/results/RESULTS.105-turkish.flp @@ -0,0 +1,29 @@ +%WER 57.5 | 22070 54382 | 49.0 41.7 9.2 6.5 57.5 30.8 | -1.255 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.8 | 22070 54382 | 57.3 34.1 8.6 5.1 47.8 29.0 | -0.605 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.7 8.3 4.8 45.8 28.7 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.4 8.5 4.8 45.8 28.4 | -0.630 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_17/dev10h.pem.ctm.sys +%WER 47.1 | 22070 54382 | 56.5 32.7 10.8 3.6 47.1 28.7 | -0.430 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5003, THRESHOLD = 0.555 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_13/metrics.txt:MTWV = 0.5339, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_19/metrics.txt:MTWV = 0.5203, THRESHOLD = 0.553 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5078, THRESHOLD = 0.553 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_12/metrics.txt:MTWV = 0.0045, THRESHOLD = 0.891000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_11/metrics.txt:MTWV = 0.0066, THRESHOLD = 0.720000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_18/metrics.txt:MTWV = 0.0058, THRESHOLD = 0.867000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_20/metrics.txt:MTWV = 0.0072, THRESHOLD = 0.785000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 diff --git a/egs/babel/s5d/results/RESULTS.106-tagalog.flp b/egs/babel/s5d/results/RESULTS.106-tagalog.flp new file mode 100644 index 00000000000..72568cebf81 --- /dev/null +++ b/egs/babel/s5d/results/RESULTS.106-tagalog.flp @@ -0,0 +1,34 @@ +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/dev_oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5117, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_10/metrics.txt:MTWV = 0.5408, THRESHOLD = 0.504 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_17/metrics.txt:MTWV = 0.5221, THRESHOLD = 0.556 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5077, THRESHOLD = 0.648 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_oov_kws_10/metrics.txt:MTWV = 0.0038, THRESHOLD = 0.900000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.659 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_17/metrics.txt:MTWV = 0.0047, THRESHOLD = 0.889000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0052, THRESHOLD = 0.522 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 diff --git a/egs/babel/s5d/results/RESULTS.107-vietnamese.flp b/egs/babel/s5d/results/RESULTS.107-vietnamese.flp new file mode 100644 index 00000000000..e64bca74572 --- /dev/null +++ b/egs/babel/s5d/results/RESULTS.107-vietnamese.flp @@ -0,0 +1,50 @@ +%WER 57.9 | 21875 111957 | 45.4 42.3 12.3 3.2 57.9 36.7 | -1.203 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.3 | 21875 111957 | 53.2 37.3 9.5 3.5 50.3 35.8 | -0.917 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_9/dev10h.pem.ctm.sys +%WER 47.4 | 21875 111957 | 55.1 32.8 12.1 2.6 47.4 35.7 | -0.642 | exp/tri6_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 48.6 | 21875 111957 | 54.3 35.9 9.8 2.9 48.6 35.4 | -0.769 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 51.3 32.4 16.2 1.8 50.4 35.7 | -0.487 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys + +############################################################################################################################# + +#KWS on the dev kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_9/metrics.txt:MTWV = 0.4488, THRESHOLD = 0.601 +exp/tri6_nnet/decode_dev10h.pem/kws_10/metrics.txt:MTWV = 0.4926, THRESHOLD = 0.576 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_15/metrics.txt:MTWV = 0.4589, THRESHOLD = 0.635 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4477, THRESHOLD = 0.591 + +#KWS on the dev kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_8/metrics.txt:MTWV = 0.0001, THRESHOLD = 0.778 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_11/metrics.txt:MTWV = 0.0024, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_16/metrics.txt:MTWV = 0.0012, THRESHOLD = 0.596 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_15/metrics.txt:MTWV = 0.0017, THRESHOLD = 0.817 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_8/metrics.txt:MTWV = 0.2886, THRESHOLD = 0.513 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.3672, THRESHOLD = 0.693 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.2999, THRESHOLD = 0.792 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.3041, THRESHOLD = 0.693 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_10/metrics.txt:MTWV = 0.0000, THRESHOLD = 0 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.873 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.214 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.831 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_kws_9/metrics.txt:MTWV = 0.3791, THRESHOLD = 0.564 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_12/metrics.txt:MTWV = 0.4444, THRESHOLD = 0.406 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_15/metrics.txt:MTWV = 0.3780, THRESHOLD = 0.609 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.3904, THRESHOLD = 0.51 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_10/metrics.txt:MTWV = 0.0021, THRESHOLD = 0.724 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0040, THRESHOLD = 0.491 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_15/metrics.txt:MTWV = 0.0032, THRESHOLD = 0.867 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0039, THRESHOLD = 0.105 + +############################################################################################################################ + diff --git a/egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 b/egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 new file mode 100644 index 00000000000..1fdad0615e1 --- /dev/null +++ b/egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 @@ -0,0 +1,211 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:00:20-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4114 OTWV=0.5171 STWV=0.6713 MTWV=0.4128 THRESHOLD=0.453 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3606 , #FA=1935 , #Miss=2988 , Contributed ATWV= 0.4114, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4121 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4639 OTWV=0.5790 STWV=0.7779 MTWV=0.4639 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3948 , #FA=2450 , #Miss=2646 , Contributed ATWV= 0.4639, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4646 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4670 OTWV=0.5932 STWV=0.7799 MTWV=0.4685 THRESHOLD=0.453 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3914 , #FA=2016 , #Miss=2680 , Contributed ATWV= 0.4670, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4677 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4940 OTWV=0.6072 STWV=0.7751 MTWV=0.4940 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4297 , #FA=2623 , #Miss=2297 , Contributed ATWV= 0.4940, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4948 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4970 OTWV=0.6016 STWV=0.7837 MTWV=0.4985 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4145 , #FA=2538 , #Miss=2449 , Contributed ATWV= 0.4970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4977 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.5174 OTWV=0.6324 STWV=0.7958 MTWV=0.5183 THRESHOLD=0.433 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4312 , #FA=2156 , #Miss=2282 , Contributed ATWV= 0.5174, Best Possible Contributed ATWV= 0.9984, ATWV= 0.5182 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), for the set [kwlist2] evaluated on 2016-03-31T12:00:28-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4371 OTWV=0.5527 STWV=0.6904 MTWV=0.4372 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=7695 , #FA=8671 , #Miss=6784 , Contributed ATWV= 0.4356, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4423 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=32 , #Miss=50 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.4822 OTWV=0.6082 STWV=0.7912 MTWV=0.4822 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8278 , #FA=9303 , #Miss=6201 , Contributed ATWV= 0.4808, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4882 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=12 , #FA=60 , #Miss=48 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0924 +ATWV=0.4920 OTWV=0.6156 STWV=0.7891 MTWV=0.4920 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8698 , #FA=10346, #Miss=5781 , Contributed ATWV= 0.4913, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4989 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=59 , #Miss=52 , Contributed ATWV= 0.0006, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0427 +ATWV=0.5006 OTWV=0.6216 STWV=0.7975 MTWV=0.5006 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8552 , #FA=9419 , #Miss=5927 , Contributed ATWV= 0.4992, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5069 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=55 , #Miss=49 , Contributed ATWV= 0.0013, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0873 +ATWV=0.5077 OTWV=0.6291 STWV=0.7819 MTWV=0.5077 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9060 , #FA=10188, #Miss=5419 , Contributed ATWV= 0.5073, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5150 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=7 , #FA=64 , #Miss=53 , Contributed ATWV= 0.0005, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0325 +ATWV=0.5203 OTWV=0.6486 STWV=0.7952 MTWV=0.5218 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9144 , #FA=8922 , #Miss=5335 , Contributed ATWV= 0.5191, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5271 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=44 , #Miss=51 , Contributed ATWV= 0.0012, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0821 +# +# KWS Task performance (TWV), for the set [kwlist3] evaluated on 2016-03-31T12:00:40-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3527 OTWV=0.4568 STWV=0.6002 MTWV=0.3537 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=6954 , #FA=5353 , #Miss=7254 , Contributed ATWV= 0.3477, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3778 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=23 , #FA=232 , #Miss=223 , Contributed ATWV= 0.0049, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0605 +ATWV=0.3997 OTWV=0.5121 STWV=0.7021 MTWV=0.4002 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist3_12/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7407 , #FA=5449 , #Miss=6801 , Contributed ATWV= 0.3919, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4259 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=39 , #FA=307 , #Miss=207 , Contributed ATWV= 0.0076, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0939 +ATWV=0.4102 OTWV=0.5277 STWV=0.7047 MTWV=0.4102 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7795 , #FA=5927 , #Miss=6413 , Contributed ATWV= 0.4033, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4382 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=288 , #Miss=210 , Contributed ATWV= 0.0067, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0822 +ATWV=0.4222 OTWV=0.5278 STWV=0.7066 MTWV=0.4222 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7820 , #FA=5808 , #Miss=6388 , Contributed ATWV= 0.4152, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4511 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=326 , #Miss=210 , Contributed ATWV= 0.0068, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0839 +ATWV=0.4285 OTWV=0.5406 STWV=0.6965 MTWV=0.4286 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8050 , #FA=5500 , #Miss=6158 , Contributed ATWV= 0.4213, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4578 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=34 , #FA=264 , #Miss=212 , Contributed ATWV= 0.0070, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0858 +ATWV=0.4361 OTWV=0.5517 STWV=0.7032 MTWV=0.4361 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8487 , #FA=6339 , #Miss=5721 , Contributed ATWV= 0.4310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4683 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=311 , #Miss=210 , Contributed ATWV= 0.0048, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0594 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:00:53-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2471 OTWV=0.2986 STWV=0.3521 MTWV=0.2471 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1536 , #FA=1187 , #Miss=5058 , Contributed ATWV= 0.2471, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2475 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2738 OTWV=0.3312 STWV=0.3984 MTWV=0.2738 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1588 , #FA=1164 , #Miss=5006 , Contributed ATWV= 0.2738, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2742 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2762 OTWV=0.3345 STWV=0.4011 MTWV=0.2762 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1613 , #FA=1156 , #Miss=4981 , Contributed ATWV= 0.2762, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2766 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2932 OTWV=0.3415 STWV=0.3985 MTWV=0.2981 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1082 , #Miss=4970 , Contributed ATWV= 0.2934, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2938 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2970 OTWV=0.3432 STWV=0.4014 MTWV=0.2970 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1702 , #FA=1132 , #Miss=4892 , Contributed ATWV= 0.2970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2975 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2978 OTWV=0.3444 STWV=0.4035 MTWV=0.2978 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1683 , #FA=1050 , #Miss=4911 , Contributed ATWV= 0.2978, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2983 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:01:05-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2335 OTWV=0.2867 STWV=0.3609 MTWV=0.2337 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/phones/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1443 , #FA=1310 , #Miss=5151 , Contributed ATWV= 0.2336, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2339 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2513 OTWV=0.3174 STWV=0.4034 MTWV=0.2513 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1518 , #FA=1442 , #Miss=5076 , Contributed ATWV= 0.2515, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2519 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2525 OTWV=0.3188 STWV=0.4069 MTWV=0.2583 THRESHOLD=0.444 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1564 , #FA=1489 , #Miss=5030 , Contributed ATWV= 0.2526, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2530 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2575 OTWV=0.3184 STWV=0.3902 MTWV=0.2608 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1544 , #FA=1319 , #Miss=5050 , Contributed ATWV= 0.2575, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2579 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2759 OTWV=0.3294 STWV=0.4067 MTWV=0.2766 THRESHOLD=0.511 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1369 , #Miss=4970 , Contributed ATWV= 0.2760, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2764 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2793 OTWV=0.3306 STWV=0.4042 MTWV=0.2812 THRESHOLD=0.529 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1693 , #FA=1495 , #Miss=4901 , Contributed ATWV= 0.2785, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2790 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:01:23-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2516 OTWV=0.2931 STWV=0.3457 MTWV=0.2518 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1433 , #FA=916 , #Miss=5161 , Contributed ATWV= 0.2516, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2520 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2710 OTWV=0.3243 STWV=0.3971 MTWV=0.2720 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1527 , #FA=1006 , #Miss=5067 , Contributed ATWV= 0.2710, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2715 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2864 OTWV=0.3330 STWV=0.3928 MTWV=0.2864 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1673 , #FA=1135 , #Miss=4921 , Contributed ATWV= 0.2864, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2869 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2874 OTWV=0.3386 STWV=0.4018 MTWV=0.2881 THRESHOLD=0.403 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1591 , #FA=1010 , #Miss=5003 , Contributed ATWV= 0.2874, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2879 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2946 OTWV=0.3463 STWV=0.4046 MTWV=0.2952 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1666 , #FA=1036 , #Miss=4928 , Contributed ATWV= 0.2946, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2951 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist2] evaluated on 2016-03-31T12:01:28-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3298 OTWV=0.4064 STWV=0.4925 MTWV=0.3305 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist2_8/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4881 , #FA=5838 , #Miss=9598 , Contributed ATWV= 0.3281, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3331 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=23 , #Miss=51 , Contributed ATWV= 0.0017, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1105 +ATWV=0.3636 OTWV=0.4527 STWV=0.5672 MTWV=0.3638 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5215 , #FA=6311 , #Miss=9264 , Contributed ATWV= 0.3608, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3663 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=15 , #FA=26 , #Miss=45 , Contributed ATWV= 0.0028, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1873 +ATWV=0.3784 OTWV=0.4622 STWV=0.5703 MTWV=0.3792 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5416 , #FA=6432 , #Miss=9063 , Contributed ATWV= 0.3766, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3824 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=33 , #Miss=49 , Contributed ATWV= 0.0018, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1208 +ATWV=0.3795 OTWV=0.4643 STWV=0.5595 MTWV=0.3795 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5620 , #FA=6171 , #Miss=8859 , Contributed ATWV= 0.3781, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3839 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=29 , #Miss=52 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.3973 OTWV=0.4799 STWV=0.5716 MTWV=0.4011 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5747 , #FA=5988 , #Miss=8732 , Contributed ATWV= 0.3952, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4013 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=26 , #Miss=49 , Contributed ATWV= 0.0020, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1346 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist3] evaluated on 2016-03-31T12:01:38-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2442 OTWV=0.2994 STWV=0.3760 MTWV=0.2442 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3580 , #FA=3520 , #Miss=10628, Contributed ATWV= 0.2378, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2584 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=28 , #FA=145 , #Miss=218 , Contributed ATWV= 0.0064, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0787 +ATWV=0.2681 OTWV=0.3407 STWV=0.4407 MTWV=0.2684 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3688 , #FA=3305 , #Miss=10520, Contributed ATWV= 0.2574, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2797 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=45 , #FA=195 , #Miss=201 , Contributed ATWV= 0.0106, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1306 +ATWV=0.2844 OTWV=0.3499 STWV=0.4441 MTWV=0.2857 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3840 , #FA=3340 , #Miss=10368, Contributed ATWV= 0.2733, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2970 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=44 , #FA=197 , #Miss=202 , Contributed ATWV= 0.0111, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1367 +ATWV=0.2946 OTWV=0.3581 STWV=0.4423 MTWV=0.2948 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3882 , #FA=2874 , #Miss=10326, Contributed ATWV= 0.2804, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3047 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=138 , #Miss=193 , Contributed ATWV= 0.0142, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1751 +ATWV=0.2958 OTWV=0.3658 STWV=0.4485 MTWV=0.2988 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=4068 , #FA=3344 , #Miss=10140, Contributed ATWV= 0.2835, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3081 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=47 , #FA=136 , #Miss=199 , Contributed ATWV= 0.0122, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1504 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:01:55-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1869 OTWV=0.2380 STWV=0.3024 MTWV=0.1869 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1180 , #FA=1168 , #Miss=5414 , Contributed ATWV= 0.1870, Best Possible Contributed ATWV= 0.9984, ATWV= 0.1873 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2043 OTWV=0.2598 STWV=0.3427 MTWV=0.2043 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1281 , #FA=1263 , #Miss=5313 , Contributed ATWV= 0.2045, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2048 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2055 OTWV=0.2591 STWV=0.3340 MTWV=0.2055 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1267 , #FA=1206 , #Miss=5327 , Contributed ATWV= 0.2057, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2060 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=5 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1379 +ATWV=0.2123 OTWV=0.2766 STWV=0.3581 MTWV=0.2149 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1333 , #FA=1274 , #Miss=5261 , Contributed ATWV= 0.2125, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2128 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2216 OTWV=0.2852 STWV=0.3565 MTWV=0.2240 THRESHOLD=0.403 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1371 , #FA=1067 , #Miss=5223 , Contributed ATWV= 0.2209, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2213 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=2 , #Miss=1 , Contributed ATWV= 0.0007, Best Possible Contributed ATWV= 0.0016, ATWV= 0.4448 +ATWV=0.2532 OTWV=0.3121 STWV=0.3808 MTWV=0.2539 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1547 , #FA=1310 , #Miss=5047 , Contributed ATWV= 0.2524, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2528 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist2] evaluated on 2016-03-31T12:02:01-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2686 OTWV=0.3459 STWV=0.4328 MTWV=0.2690 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=3870 , #FA=5258 , #Miss=10609, Contributed ATWV= 0.2670, Best Possible Contributed ATWV= 0.9849, ATWV= 0.2711 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=42 , #Miss=50 , Contributed ATWV= 0.0016, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1055 +ATWV=0.3044 OTWV=0.3970 STWV=0.5154 MTWV=0.3044 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4285 , #FA=5644 , #Miss=10194, Contributed ATWV= 0.3011, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3057 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=16 , #FA=54 , #Miss=44 , Contributed ATWV= 0.0033, Best Possible Contributed ATWV= 0.0151, ATWV= 0.2152 +ATWV=0.3073 OTWV=0.3944 STWV=0.4998 MTWV=0.3079 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch2/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4457 , #FA=6120 , #Miss=10022, Contributed ATWV= 0.3051, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3098 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=13 , #FA=55 , #Miss=47 , Contributed ATWV= 0.0022, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1487 +ATWV=0.3092 OTWV=0.4100 STWV=0.5226 MTWV=0.3125 THRESHOLD=0.465 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4431 , #FA=5723 , #Miss=10048, Contributed ATWV= 0.3078, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3125 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=69 , #Miss=49 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0977 +ATWV=0.3280 OTWV=0.4225 STWV=0.5216 MTWV=0.3291 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4940 , #FA=6266 , #Miss=9539 , Contributed ATWV= 0.3266, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3316 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=63 , #Miss=50 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0911 +ATWV=0.3586 OTWV=0.4552 STWV=0.5519 MTWV=0.3614 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5261 , #FA=6266 , #Miss=9218 , Contributed ATWV= 0.3563, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3618 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=14 , #FA=67 , #Miss=46 , Contributed ATWV= 0.0023, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1531 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist3] evaluated on 2016-03-31T12:02:11-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1931 OTWV=0.2569 STWV=0.3444 MTWV=0.1931 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist3_9/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3015 , #FA=3772 , #Miss=11193, Contributed ATWV= 0.1875, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2037 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=33 , #FA=303 , #Miss=213 , Contributed ATWV= 0.0062, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0759 +ATWV=0.2228 OTWV=0.2982 STWV=0.4154 MTWV=0.2231 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3232 , #FA=3853 , #Miss=10976, Contributed ATWV= 0.2092, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2273 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=57 , #FA=332 , #Miss=189 , Contributed ATWV= 0.0141, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1738 +ATWV=0.2247 OTWV=0.2962 STWV=0.4001 MTWV=0.2247 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3131 , #FA=3232 , #Miss=11077, Contributed ATWV= 0.2122, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2306 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=48 , #FA=278 , #Miss=198 , Contributed ATWV= 0.0131, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1606 +ATWV=0.2320 OTWV=0.3081 STWV=0.4229 MTWV=0.2326 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3378 , #FA=3831 , #Miss=10830, Contributed ATWV= 0.2194, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2384 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=299 , #Miss=193 , Contributed ATWV= 0.0126, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1544 +ATWV=0.2474 OTWV=0.3186 STWV=0.4206 MTWV=0.2476 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3615 , #FA=3812 , #Miss=10593, Contributed ATWV= 0.2310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2510 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=63 , #FA=306 , #Miss=183 , Contributed ATWV= 0.0165, Best Possible Contributed ATWV= 0.0814, ATWV= 0.2023 +ATWV=0.2668 OTWV=0.3433 STWV=0.4457 MTWV=0.2668 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3825 , #FA=3913 , #Miss=10383, Contributed ATWV= 0.2535, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2755 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=59 , #FA=305 , #Miss=187 , Contributed ATWV= 0.0138, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1699 diff --git a/egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 b/egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 new file mode 100644 index 00000000000..1bbdc7dc33a --- /dev/null +++ b/egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:04:48-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4230 OTWV=0.5203 STWV=0.6189 MTWV=0.4235 THRESHOLD=0.473 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6311 , #FA=6437 , #Miss=5805 , Contributed ATWV= 0.4023, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4691 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=89 , #FA=402 , #Miss=391 , Contributed ATWV= 0.0206, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1450 +ATWV=0.4491 OTWV=0.5597 STWV=0.7023 MTWV=0.4494 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6205 , #FA=5950 , #Miss=5911 , Contributed ATWV= 0.4196, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4893 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=107 , #FA=429 , #Miss=373 , Contributed ATWV= 0.0295, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2070 +ATWV=0.4529 OTWV=0.5702 STWV=0.7084 MTWV=0.4529 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6656 , #FA=7401 , #Miss=5460 , Contributed ATWV= 0.4228, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4929 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=120 , #FA=600 , #Miss=360 , Contributed ATWV= 0.0301, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2113 +ATWV=0.4606 OTWV=0.5758 STWV=0.7195 MTWV=0.4606 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6612 , #FA=6706 , #Miss=5504 , Contributed ATWV= 0.4292, Best Possible Contributed ATWV= 0.8576, ATWV= 0.5004 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=118 , #FA=517 , #Miss=362 , Contributed ATWV= 0.0314, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2204 +ATWV=0.4728 OTWV=0.5842 STWV=0.7081 MTWV=0.4728 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6938 , #FA=6197 , #Miss=5178 , Contributed ATWV= 0.4482, Best Possible Contributed ATWV= 0.8576, ATWV= 0.5226 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=96 , #FA=415 , #Miss=384 , Contributed ATWV= 0.0246, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1729 +ATWV=0.4845 OTWV=0.5929 STWV=0.7193 MTWV=0.4847 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=7106 , #FA=6592 , #Miss=5010 , Contributed ATWV= 0.4522, Best Possible Contributed ATWV= 0.8576, ATWV= 0.5273 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=116 , #FA=464 , #Miss=364 , Contributed ATWV= 0.0322, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2264 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:05:02-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3898 OTWV=0.4933 STWV=0.6145 MTWV=0.3899 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4970 , #FA=6619 , #Miss=7146 , Contributed ATWV= 0.3630, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4232 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=99 , #FA=372 , #Miss=381 , Contributed ATWV= 0.0268, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1882 +ATWV=0.4031 OTWV=0.5200 STWV=0.6682 MTWV=0.4031 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5121 , #FA=7137 , #Miss=6995 , Contributed ATWV= 0.3719, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4336 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=114 , #FA=463 , #Miss=366 , Contributed ATWV= 0.0312, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2194 +ATWV=0.4084 OTWV=0.5225 STWV=0.6694 MTWV=0.4094 THRESHOLD=0.465 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5236 , #FA=6998 , #Miss=6880 , Contributed ATWV= 0.3785, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4413 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=108 , #FA=459 , #Miss=372 , Contributed ATWV= 0.0299, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2103 +ATWV=0.4168 OTWV=0.5258 STWV=0.6705 MTWV=0.4171 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5251 , #FA=6798 , #Miss=6865 , Contributed ATWV= 0.3850, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4489 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=116 , #FA=437 , #Miss=364 , Contributed ATWV= 0.0318, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2231 +ATWV=0.4202 OTWV=0.5321 STWV=0.6687 MTWV=0.4209 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5526 , #FA=7152 , #Miss=6590 , Contributed ATWV= 0.3947, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4602 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=92 , #FA=418 , #Miss=388 , Contributed ATWV= 0.0254, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1788 +ATWV=0.4298 OTWV=0.5434 STWV=0.6798 MTWV=0.4315 THRESHOLD=0.453 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5351 , #FA=6564 , #Miss=6765 , Contributed ATWV= 0.3971, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4630 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=116 , #FA=433 , #Miss=364 , Contributed ATWV= 0.0327, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2296 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:05:15-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3717 OTWV=0.4826 STWV=0.6206 MTWV=0.3717 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4873 , #FA=7400 , #Miss=7243 , Contributed ATWV= 0.3453, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4026 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=102 , #FA=464 , #Miss=378 , Contributed ATWV= 0.0264, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1855 +ATWV=0.3794 OTWV=0.4921 STWV=0.6366 MTWV=0.3794 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4976 , #FA=7283 , #Miss=7140 , Contributed ATWV= 0.3504, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4086 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=107 , #FA=475 , #Miss=373 , Contributed ATWV= 0.0290, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2039 +ATWV=0.3803 OTWV=0.4989 STWV=0.6527 MTWV=0.3811 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4802 , #FA=7174 , #Miss=7314 , Contributed ATWV= 0.3507, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4089 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=108 , #FA=485 , #Miss=372 , Contributed ATWV= 0.0296, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2079 +ATWV=0.3865 OTWV=0.5032 STWV=0.6597 MTWV=0.3865 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5057 , #FA=7617 , #Miss=7059 , Contributed ATWV= 0.3569, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4161 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=111 , #FA=541 , #Miss=369 , Contributed ATWV= 0.0297, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2085 +ATWV=0.3987 OTWV=0.5141 STWV=0.6609 MTWV=0.4000 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5421 , #FA=7991 , #Miss=6695 , Contributed ATWV= 0.3758, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4382 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=89 , #FA=545 , #Miss=391 , Contributed ATWV= 0.0229, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1606 +ATWV=0.4089 OTWV=0.5226 STWV=0.6702 MTWV=0.4089 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5226 , #FA=7295 , #Miss=6890 , Contributed ATWV= 0.3793, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4423 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=109 , #FA=490 , #Miss=371 , Contributed ATWV= 0.0296, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2077 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:05:32-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3846 OTWV=0.4898 STWV=0.6140 MTWV=0.3849 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4698 , #FA=6363 , #Miss=7418 , Contributed ATWV= 0.3348, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3904 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=169 , #FA=376 , #Miss=311 , Contributed ATWV= 0.0498, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3497 +ATWV=0.4084 OTWV=0.5296 STWV=0.6808 MTWV=0.4084 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5112 , #FA=7507 , #Miss=7004 , Contributed ATWV= 0.3551, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4140 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=188 , #FA=531 , #Miss=292 , Contributed ATWV= 0.0533, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3747 +ATWV=0.4147 OTWV=0.5426 STWV=0.6942 MTWV=0.4164 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4903 , #FA=6600 , #Miss=7213 , Contributed ATWV= 0.3565, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4157 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=197 , #FA=506 , #Miss=283 , Contributed ATWV= 0.0582, Best Possible Contributed ATWV= 0.1424, ATWV= 0.4086 +ATWV=0.4205 OTWV=0.5421 STWV=0.6920 MTWV=0.4207 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5179 , #FA=7269 , #Miss=6937 , Contributed ATWV= 0.3621, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4222 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=199 , #FA=547 , #Miss=281 , Contributed ATWV= 0.0584, Best Possible Contributed ATWV= 0.1424, ATWV= 0.4099 +ATWV=0.4386 OTWV=0.5595 STWV=0.7003 MTWV=0.4400 THRESHOLD=0.484 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5245 , #FA=6308 , #Miss=6871 , Contributed ATWV= 0.3822, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4456 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=187 , #FA=418 , #Miss=293 , Contributed ATWV= 0.0564, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3962 +ATWV=0.4394 OTWV=0.5585 STWV=0.6927 MTWV=0.4397 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5554 , #FA=7486 , #Miss=6562 , Contributed ATWV= 0.3789, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4418 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=207 , #FA=548 , #Miss=273 , Contributed ATWV= 0.0604, Best Possible Contributed ATWV= 0.1424, ATWV= 0.4246 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:05:47-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1595 OTWV=0.2619 STWV=0.3850 MTWV=0.1602 THRESHOLD=0.503 exp/nnet3/tdnn_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=1949 , #FA=3706 , #Miss=10167, Contributed ATWV= 0.1404, Best Possible Contributed ATWV= 0.8576, ATWV= 0.1637 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=74 , #FA=313 , #Miss=406 , Contributed ATWV= 0.0190, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1338 +ATWV=0.3032 OTWV=0.4062 STWV=0.5289 MTWV=0.3032 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=3866 , #FA=6132 , #Miss=8250 , Contributed ATWV= 0.2606, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3039 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=149 , #FA=450 , #Miss=331 , Contributed ATWV= 0.0425, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2988 +ATWV=0.3355 OTWV=0.4619 STWV=0.6238 MTWV=0.3355 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4070 , #FA=6524 , #Miss=8046 , Contributed ATWV= 0.2849, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3322 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=176 , #FA=559 , #Miss=304 , Contributed ATWV= 0.0506, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3553 +ATWV=0.3368 OTWV=0.4568 STWV=0.6010 MTWV=0.3403 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=3997 , #FA=5729 , #Miss=8119 , Contributed ATWV= 0.2888, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3367 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=169 , #FA=513 , #Miss=311 , Contributed ATWV= 0.0480, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3371 +ATWV=0.3690 OTWV=0.4945 STWV=0.6419 MTWV=0.3701 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4757 , #FA=7077 , #Miss=7359 , Contributed ATWV= 0.3202, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3734 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=169 , #FA=521 , #Miss=311 , Contributed ATWV= 0.0488, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3428 +ATWV=0.3782 OTWV=0.4916 STWV=0.6313 MTWV=0.3786 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4496 , #FA=6091 , #Miss=7620 , Contributed ATWV= 0.3256, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3797 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=183 , #FA=480 , #Miss=297 , Contributed ATWV= 0.0525, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3691 diff --git a/egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 b/egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 new file mode 100644 index 00000000000..f218056412a --- /dev/null +++ b/egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:13:21-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2072 OTWV=0.3242 STWV=0.4752 MTWV=0.2072 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4225 , #FA=6109 , #Miss=10665, Contributed ATWV= 0.2011, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2290 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=38 , #FA=404 , #Miss=388 , Contributed ATWV= 0.0061, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0500 +ATWV=0.2234 OTWV=0.3660 STWV=0.5806 MTWV=0.2244 THRESHOLD=0.473 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4305 , #FA=6650 , #Miss=10585, Contributed ATWV= 0.2101, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2393 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=62 , #FA=515 , #Miss=364 , Contributed ATWV= 0.0133, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1094 +ATWV=0.2386 OTWV=0.3711 STWV=0.5954 MTWV=0.2386 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4267 , #FA=5993 , #Miss=10623, Contributed ATWV= 0.2234, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2544 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=66 , #FA=446 , #Miss=360 , Contributed ATWV= 0.0152, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1243 +ATWV=0.2461 OTWV=0.3869 STWV=0.6098 MTWV=0.2469 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4423 , #FA=5883 , #Miss=10467, Contributed ATWV= 0.2327, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2651 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=63 , #FA=453 , #Miss=363 , Contributed ATWV= 0.0134, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1098 +ATWV=0.2654 OTWV=0.4100 STWV=0.6005 MTWV=0.2672 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=5273 , #FA=6428 , #Miss=9617 , Contributed ATWV= 0.2495, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2842 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=68 , #FA=450 , #Miss=358 , Contributed ATWV= 0.0159, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1302 +ATWV=0.2681 OTWV=0.4076 STWV=0.6090 MTWV=0.2697 THRESHOLD=0.473 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=5143 , #FA=6378 , #Miss=9747 , Contributed ATWV= 0.2519, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2868 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=69 , #FA=462 , #Miss=357 , Contributed ATWV= 0.0163, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1335 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:13:34-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1796 OTWV=0.2970 STWV=0.5017 MTWV=0.1796 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2117 , #FA=5124 , #Miss=12773, Contributed ATWV= 0.1716, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1954 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=45 , #FA=519 , #Miss=381 , Contributed ATWV= 0.0080, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0658 +ATWV=0.1946 OTWV=0.3201 STWV=0.5540 MTWV=0.1946 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2218 , #FA=5557 , #Miss=12672, Contributed ATWV= 0.1817, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2069 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=67 , #FA=599 , #Miss=359 , Contributed ATWV= 0.0129, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1059 +ATWV=0.2035 OTWV=0.3267 STWV=0.5663 MTWV=0.2035 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2384 , #FA=6085 , #Miss=12506, Contributed ATWV= 0.1902, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2167 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=70 , #FA=669 , #Miss=356 , Contributed ATWV= 0.0132, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1084 +ATWV=0.2125 OTWV=0.3434 STWV=0.5569 MTWV=0.2147 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2802 , #FA=6994 , #Miss=12088, Contributed ATWV= 0.2032, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2314 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=63 , #FA=692 , #Miss=363 , Contributed ATWV= 0.0092, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0758 +ATWV=0.2146 OTWV=0.3363 STWV=0.5757 MTWV=0.2146 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2243 , #FA=4805 , #Miss=12647, Contributed ATWV= 0.2025, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2306 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=60 , #FA=550 , #Miss=366 , Contributed ATWV= 0.0121, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0991 +ATWV=0.2233 OTWV=0.3537 STWV=0.5753 MTWV=0.2233 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2472 , #FA=5516 , #Miss=12418, Contributed ATWV= 0.2070, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2357 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=72 , #FA=548 , #Miss=354 , Contributed ATWV= 0.0164, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1343 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:13:46-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1757 OTWV=0.2941 STWV=0.5188 MTWV=0.1757 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2109 , #FA=5112 , #Miss=12781, Contributed ATWV= 0.1661, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1892 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=51 , #FA=566 , #Miss=375 , Contributed ATWV= 0.0096, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0787 +ATWV=0.1885 OTWV=0.3156 STWV=0.5586 MTWV=0.1885 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2171 , #FA=5603 , #Miss=12719, Contributed ATWV= 0.1751, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1994 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=72 , #FA=672 , #Miss=354 , Contributed ATWV= 0.0134, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1103 +ATWV=0.1935 OTWV=0.3237 STWV=0.5717 MTWV=0.1935 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2122 , #FA=4885 , #Miss=12768, Contributed ATWV= 0.1811, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2062 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=62 , #FA=595 , #Miss=364 , Contributed ATWV= 0.0124, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1019 +ATWV=0.2013 OTWV=0.3267 STWV=0.5641 MTWV=0.2014 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2195 , #FA=4829 , #Miss=12695, Contributed ATWV= 0.1891, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2153 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=60 , #FA=592 , #Miss=366 , Contributed ATWV= 0.0123, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1005 +ATWV=0.2087 OTWV=0.3368 STWV=0.5610 MTWV=0.2087 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2348 , #FA=5077 , #Miss=12542, Contributed ATWV= 0.1967, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2240 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=63 , #FA=579 , #Miss=363 , Contributed ATWV= 0.0120, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0984 +ATWV=0.2116 OTWV=0.3465 STWV=0.5804 MTWV=0.2116 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2398 , #FA=5507 , #Miss=12492, Contributed ATWV= 0.1960, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2232 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=69 , #FA=609 , #Miss=357 , Contributed ATWV= 0.0156, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1279 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:14:01-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1708 OTWV=0.2816 STWV=0.4795 MTWV=0.1711 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1913 , #FA=4978 , #Miss=12977, Contributed ATWV= 0.1615, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1839 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=50 , #FA=507 , #Miss=376 , Contributed ATWV= 0.0093, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0764 +ATWV=0.1926 OTWV=0.3156 STWV=0.5617 MTWV=0.1926 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1964 , #FA=4856 , #Miss=12926, Contributed ATWV= 0.1751, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1994 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=79 , #FA=566 , #Miss=347 , Contributed ATWV= 0.0176, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1442 +ATWV=0.1985 OTWV=0.3240 STWV=0.5820 MTWV=0.1985 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1945 , #FA=4804 , #Miss=12945, Contributed ATWV= 0.1794, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2044 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=82 , #FA=557 , #Miss=344 , Contributed ATWV= 0.0191, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1563 +ATWV=0.2054 OTWV=0.3342 STWV=0.5882 MTWV=0.2054 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch3/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2125 , #FA=5218 , #Miss=12765, Contributed ATWV= 0.1875, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2135 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=80 , #FA=626 , #Miss=346 , Contributed ATWV= 0.0179, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1466 +ATWV=0.2126 OTWV=0.3434 STWV=0.5827 MTWV=0.2126 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2207 , #FA=4958 , #Miss=12683, Contributed ATWV= 0.1920, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2186 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=81 , #FA=521 , #Miss=345 , Contributed ATWV= 0.0206, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1692 +ATWV=0.2148 OTWV=0.3452 STWV=0.5808 MTWV=0.2148 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2497 , #FA=6035 , #Miss=12393, Contributed ATWV= 0.1978, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2252 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=79 , #FA=661 , #Miss=347 , Contributed ATWV= 0.0170, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1397 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:14:14-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1379 OTWV=0.2528 STWV=0.4632 MTWV=0.1385 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1422 , #FA=3603 , #Miss=13468, Contributed ATWV= 0.1268, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1444 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=50 , #FA=431 , #Miss=376 , Contributed ATWV= 0.0111, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0907 +ATWV=0.1718 OTWV=0.2920 STWV=0.5386 MTWV=0.1718 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1789 , #FA=4843 , #Miss=13101, Contributed ATWV= 0.1564, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1781 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=71 , #FA=619 , #Miss=355 , Contributed ATWV= 0.0153, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1258 +ATWV=0.1754 OTWV=0.3002 STWV=0.5589 MTWV=0.1754 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1764 , #FA=4651 , #Miss=13126, Contributed ATWV= 0.1573, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1791 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=78 , #FA=592 , #Miss=348 , Contributed ATWV= 0.0181, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1484 +ATWV=0.1768 OTWV=0.3015 STWV=0.5378 MTWV=0.1768 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1823 , #FA=4605 , #Miss=13067, Contributed ATWV= 0.1624, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1849 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=68 , #FA=613 , #Miss=358 , Contributed ATWV= 0.0145, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1188 +ATWV=0.1851 OTWV=0.3170 STWV=0.5671 MTWV=0.1853 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1990 , #FA=4815 , #Miss=12900, Contributed ATWV= 0.1680, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1913 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=74 , #FA=576 , #Miss=352 , Contributed ATWV= 0.0171, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1406 +ATWV=0.1973 OTWV=0.3276 STWV=0.5722 MTWV=0.1981 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2073 , #FA=4741 , #Miss=12817, Contributed ATWV= 0.1803, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2053 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=77 , #FA=601 , #Miss=349 , Contributed ATWV= 0.0170, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1395 diff --git a/egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 b/egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 new file mode 100644 index 00000000000..4e20fac4f56 --- /dev/null +++ b/egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:22:17-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4218 OTWV=0.5044 STWV=0.5838 MTWV=0.4218 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=5437 , #FA=4638 , #Miss=5683 , Contributed ATWV= 0.4123, Best Possible Contributed ATWV= 0.8474, ATWV= 0.4865 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=29 , #FA=122 , #Miss=445 , Contributed ATWV= 0.0096, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0627 +ATWV=0.4619 OTWV=0.5643 STWV=0.6680 MTWV=0.4626 THRESHOLD=0.465 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=6171 , #FA=5956 , #Miss=4949 , Contributed ATWV= 0.4498, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5308 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=41 , #FA=214 , #Miss=433 , Contributed ATWV= 0.0121, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0791 +ATWV=0.4641 OTWV=0.5581 STWV=0.6612 MTWV=0.4641 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=5856 , #FA=4921 , #Miss=5264 , Contributed ATWV= 0.4543, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5361 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=33 , #FA=191 , #Miss=441 , Contributed ATWV= 0.0098, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0644 +ATWV=0.4733 OTWV=0.5691 STWV=0.6747 MTWV=0.4733 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=5952 , #FA=4840 , #Miss=5168 , Contributed ATWV= 0.4608, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5438 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=38 , #FA=188 , #Miss=436 , Contributed ATWV= 0.0125, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0818 +ATWV=0.4843 OTWV=0.5738 STWV=0.6585 MTWV=0.4847 THRESHOLD=0.484 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=6469 , #FA=5159 , #Miss=4651 , Contributed ATWV= 0.4745, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5599 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=34 , #FA=141 , #Miss=440 , Contributed ATWV= 0.0098, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0644 +ATWV=0.4867 OTWV=0.5849 STWV=0.6767 MTWV=0.4879 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=6428 , #FA=4944 , #Miss=4692 , Contributed ATWV= 0.4746, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5601 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=37 , #FA=170 , #Miss=437 , Contributed ATWV= 0.0121, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0792 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:22:31-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2659 OTWV=0.3048 STWV=0.3360 MTWV=0.2665 THRESHOLD=0.444 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/syllabs/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2986 , #FA=2225 , #Miss=8134 , Contributed ATWV= 0.2583, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3048 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=25 , #FA=85 , #Miss=449 , Contributed ATWV= 0.0077, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0502 +ATWV=0.2858 OTWV=0.3350 STWV=0.3711 MTWV=0.2885 THRESHOLD=0.424 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3140 , #FA=2218 , #Miss=7980 , Contributed ATWV= 0.2774, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3274 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=27 , #FA=87 , #Miss=447 , Contributed ATWV= 0.0084, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0548 +ATWV=0.2876 OTWV=0.3334 STWV=0.3663 MTWV=0.2912 THRESHOLD=0.424 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3114 , #FA=2121 , #Miss=8006 , Contributed ATWV= 0.2819, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3327 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=20 , #FA=103 , #Miss=454 , Contributed ATWV= 0.0057, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0371 +ATWV=0.2912 OTWV=0.3367 STWV=0.3742 MTWV=0.2921 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3132 , #FA=2210 , #Miss=7988 , Contributed ATWV= 0.2844, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3357 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=22 , #FA=105 , #Miss=452 , Contributed ATWV= 0.0067, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0441 +ATWV=0.2984 OTWV=0.3436 STWV=0.3773 MTWV=0.2984 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3360 , #FA=2233 , #Miss=7760 , Contributed ATWV= 0.2906, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3429 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=26 , #FA=76 , #Miss=448 , Contributed ATWV= 0.0078, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0511 +ATWV=0.3002 OTWV=0.3415 STWV=0.3713 MTWV=0.3010 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3335 , #FA=2039 , #Miss=7785 , Contributed ATWV= 0.2924, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3451 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=28 , #FA=67 , #Miss=446 , Contributed ATWV= 0.0078, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0511 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:22:44-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2620 OTWV=0.3049 STWV=0.3381 MTWV=0.2624 THRESHOLD=0.444 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2896 , #FA=2268 , #Miss=8224 , Contributed ATWV= 0.2505, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2956 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=36 , #FA=98 , #Miss=438 , Contributed ATWV= 0.0115, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0756 +ATWV=0.2714 OTWV=0.3185 STWV=0.3513 MTWV=0.2753 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2951 , #FA=2075 , #Miss=8169 , Contributed ATWV= 0.2618, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3090 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=31 , #FA=101 , #Miss=443 , Contributed ATWV= 0.0096, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0632 +ATWV=0.2714 OTWV=0.3245 STWV=0.3588 MTWV=0.2765 THRESHOLD=0.365 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2956 , #FA=2157 , #Miss=8164 , Contributed ATWV= 0.2619, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3091 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=32 , #FA=106 , #Miss=442 , Contributed ATWV= 0.0095, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0626 +ATWV=0.2755 OTWV=0.3278 STWV=0.3617 MTWV=0.2787 THRESHOLD=0.41 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3044 , #FA=2236 , #Miss=8076 , Contributed ATWV= 0.2639, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3115 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=38 , #FA=104 , #Miss=436 , Contributed ATWV= 0.0116, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0761 +ATWV=0.2876 OTWV=0.3347 STWV=0.3658 MTWV=0.2941 THRESHOLD=0.41 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3271 , #FA=2252 , #Miss=7849 , Contributed ATWV= 0.2787, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3289 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=31 , #FA=80 , #Miss=443 , Contributed ATWV= 0.0089, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0582 +ATWV=0.2935 OTWV=0.3400 STWV=0.3723 MTWV=0.2953 THRESHOLD=0.473 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3273 , #FA=2245 , #Miss=7847 , Contributed ATWV= 0.2818, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3325 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=38 , #FA=104 , #Miss=436 , Contributed ATWV= 0.0117, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0768 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:23:03-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2646 OTWV=0.3049 STWV=0.3331 MTWV=0.2666 THRESHOLD=0.453 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it3/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2763 , #FA=2036 , #Miss=8357 , Contributed ATWV= 0.2363, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2789 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=86 , #FA=77 , #Miss=388 , Contributed ATWV= 0.0283, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1853 +ATWV=0.3019 OTWV=0.3543 STWV=0.3889 MTWV=0.3067 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2933 , #FA=2017 , #Miss=8187 , Contributed ATWV= 0.2623, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3096 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=120 , #FA=120 , #Miss=354 , Contributed ATWV= 0.0395, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2589 +ATWV=0.3102 OTWV=0.3595 STWV=0.3949 MTWV=0.3138 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2986 , #FA=1904 , #Miss=8134 , Contributed ATWV= 0.2695, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3180 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=123 , #FA=124 , #Miss=351 , Contributed ATWV= 0.0407, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2666 +ATWV=0.3108 OTWV=0.3586 STWV=0.3933 MTWV=0.3121 THRESHOLD=0.424 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3021 , #FA=2041 , #Miss=8099 , Contributed ATWV= 0.2674, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3156 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=136 , #FA=103 , #Miss=338 , Contributed ATWV= 0.0434, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2841 +ATWV=0.3149 OTWV=0.3630 STWV=0.3931 MTWV=0.3198 THRESHOLD=0.399 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3228 , #FA=1870 , #Miss=7892 , Contributed ATWV= 0.2780, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3280 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=118 , #FA=93 , #Miss=356 , Contributed ATWV= 0.0369, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2420 +ATWV=0.3200 OTWV=0.3670 STWV=0.3985 MTWV=0.3222 THRESHOLD=0.403 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3204 , #FA=2050 , #Miss=7916 , Contributed ATWV= 0.2783, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3285 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=132 , #FA=113 , #Miss=342 , Contributed ATWV= 0.0416, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2729 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:23:18-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1461 OTWV=0.1765 STWV=0.1935 MTWV=0.1477 THRESHOLD=0.444 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1504 , #FA=1378 , #Miss=9616 , Contributed ATWV= 0.1281, Best Possible Contributed ATWV= 0.8474, ATWV= 0.1512 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=58 , #FA=79 , #Miss=416 , Contributed ATWV= 0.0180, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1178 +ATWV=0.1866 OTWV=0.2378 STWV=0.2636 MTWV=0.1962 THRESHOLD=0.386 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1677 , #FA=1381 , #Miss=9443 , Contributed ATWV= 0.1586, Best Possible Contributed ATWV= 0.8474, ATWV= 0.1872 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=89 , #FA=93 , #Miss=385 , Contributed ATWV= 0.0281, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1838 +ATWV=0.1946 OTWV=0.2484 STWV=0.2754 MTWV=0.2051 THRESHOLD=0.399 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1704 , #FA=1422 , #Miss=9416 , Contributed ATWV= 0.1643, Best Possible Contributed ATWV= 0.8474, ATWV= 0.1939 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=96 , #FA=110 , #Miss=378 , Contributed ATWV= 0.0303, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1986 +ATWV=0.2026 OTWV=0.2545 STWV=0.2817 MTWV=0.2089 THRESHOLD=0.41 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1821 , #FA=1441 , #Miss=9299 , Contributed ATWV= 0.1707, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2014 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=105 , #FA=121 , #Miss=369 , Contributed ATWV= 0.0319, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2088 +ATWV=0.2288 OTWV=0.2860 STWV=0.3121 MTWV=0.2409 THRESHOLD=0.328 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2169 , #FA=1525 , #Miss=8951 , Contributed ATWV= 0.1938, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2287 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=107 , #FA=86 , #Miss=367 , Contributed ATWV= 0.0350, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2292 +ATWV=0.2408 OTWV=0.2959 STWV=0.3216 MTWV=0.2512 THRESHOLD=0.345 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2322 , #FA=1553 , #Miss=8798 , Contributed ATWV= 0.2054, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2423 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=109 , #FA=92 , #Miss=365 , Contributed ATWV= 0.0355, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2323 diff --git a/egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 b/egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 new file mode 100644 index 00000000000..792b9ca097d --- /dev/null +++ b/egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:25:49-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3254 OTWV=0.4191 STWV=0.5168 MTWV=0.3254 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5022 , #FA=5294 , #Miss=6940 , Contributed ATWV= 0.3223, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3690 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=26 , #FA=235 , #Miss=491 , Contributed ATWV= 0.0031, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0245 +ATWV=0.3668 OTWV=0.4878 STWV=0.6467 MTWV=0.3672 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5496 , #FA=6611 , #Miss=6466 , Contributed ATWV= 0.3598, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4119 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=48 , #FA=403 , #Miss=469 , Contributed ATWV= 0.0071, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0558 +ATWV=0.3767 OTWV=0.4957 STWV=0.6459 MTWV=0.3767 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5698 , #FA=6556 , #Miss=6264 , Contributed ATWV= 0.3715, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4253 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=40 , #FA=378 , #Miss=477 , Contributed ATWV= 0.0051, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0406 +ATWV=0.3866 OTWV=0.5082 STWV=0.6665 MTWV=0.3866 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5644 , #FA=6329 , #Miss=6318 , Contributed ATWV= 0.3801, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4352 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=48 , #FA=401 , #Miss=469 , Contributed ATWV= 0.0065, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0511 +ATWV=0.4033 OTWV=0.5188 STWV=0.6543 MTWV=0.4034 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=6255 , #FA=6413 , #Miss=5707 , Contributed ATWV= 0.3950, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4522 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=52 , #FA=364 , #Miss=465 , Contributed ATWV= 0.0083, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0655 +ATWV=0.4131 OTWV=0.5198 STWV=0.6353 MTWV=0.4131 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=6396 , #FA=6133 , #Miss=5566 , Contributed ATWV= 0.4068, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4657 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=41 , #FA=330 , #Miss=476 , Contributed ATWV= 0.0063, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0496 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:26:02-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3102 OTWV=0.4044 STWV=0.5008 MTWV=0.3102 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3950 , #FA=5521 , #Miss=8012 , Contributed ATWV= 0.3063, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3506 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=29 , #FA=261 , #Miss=488 , Contributed ATWV= 0.0039, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0311 +ATWV=0.3475 OTWV=0.4589 STWV=0.6057 MTWV=0.3482 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4245 , #FA=6029 , #Miss=7717 , Contributed ATWV= 0.3417, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3912 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=39 , #FA=325 , #Miss=478 , Contributed ATWV= 0.0058, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0461 +ATWV=0.3567 OTWV=0.4704 STWV=0.6093 MTWV=0.3575 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4372 , #FA=6005 , #Miss=7590 , Contributed ATWV= 0.3513, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4022 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=38 , #FA=329 , #Miss=479 , Contributed ATWV= 0.0054, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0429 +ATWV=0.3667 OTWV=0.4738 STWV=0.6193 MTWV=0.3674 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4281 , #FA=5292 , #Miss=7681 , Contributed ATWV= 0.3606, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4128 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=40 , #FA=311 , #Miss=477 , Contributed ATWV= 0.0061, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0481 +ATWV=0.3798 OTWV=0.4888 STWV=0.6133 MTWV=0.3799 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4820 , #FA=5800 , #Miss=7142 , Contributed ATWV= 0.3729, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4269 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=47 , #FA=314 , #Miss=470 , Contributed ATWV= 0.0070, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0551 +ATWV=0.3907 OTWV=0.4929 STWV=0.6005 MTWV=0.3907 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5147 , #FA=6646 , #Miss=6815 , Contributed ATWV= 0.3840, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4397 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=45 , #FA=314 , #Miss=472 , Contributed ATWV= 0.0066, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0523 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:26:15-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2937 OTWV=0.3986 STWV=0.5124 MTWV=0.2937 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4370 , #FA=6963 , #Miss=7592 , Contributed ATWV= 0.2878, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3294 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=40 , #FA=371 , #Miss=477 , Contributed ATWV= 0.0059, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0467 +ATWV=0.3182 OTWV=0.4371 STWV=0.5936 MTWV=0.3182 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4479 , #FA=7039 , #Miss=7483 , Contributed ATWV= 0.3118, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3570 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=44 , #FA=444 , #Miss=473 , Contributed ATWV= 0.0063, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0500 +ATWV=0.3274 OTWV=0.4496 STWV=0.6019 MTWV=0.3282 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4562 , #FA=6505 , #Miss=7400 , Contributed ATWV= 0.3206, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3671 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=42 , #FA=370 , #Miss=475 , Contributed ATWV= 0.0068, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0538 +ATWV=0.3444 OTWV=0.4580 STWV=0.6076 MTWV=0.3446 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4491 , #FA=6127 , #Miss=7471 , Contributed ATWV= 0.3361, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3848 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=49 , #FA=396 , #Miss=468 , Contributed ATWV= 0.0083, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0657 +ATWV=0.3515 OTWV=0.4684 STWV=0.6055 MTWV=0.3520 THRESHOLD=0.465 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4966 , #FA=6072 , #Miss=6996 , Contributed ATWV= 0.3427, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3923 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=51 , #FA=321 , #Miss=466 , Contributed ATWV= 0.0088, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0694 +ATWV=0.3624 OTWV=0.4732 STWV=0.5981 MTWV=0.3624 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5315 , #FA=6787 , #Miss=6647 , Contributed ATWV= 0.3542, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4055 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=51 , #FA=356 , #Miss=466 , Contributed ATWV= 0.0082, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0645 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:26:32-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2989 OTWV=0.3850 STWV=0.4696 MTWV=0.2989 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3952 , #FA=4049 , #Miss=8010 , Contributed ATWV= 0.2858, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3272 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=62 , #FA=188 , #Miss=455 , Contributed ATWV= 0.0132, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1041 +ATWV=0.3556 OTWV=0.4644 STWV=0.6019 MTWV=0.3556 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4743 , #FA=6603 , #Miss=7219 , Contributed ATWV= 0.3340, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3823 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=101 , #FA=434 , #Miss=416 , Contributed ATWV= 0.0216, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1708 +ATWV=0.3697 OTWV=0.4791 STWV=0.6110 MTWV=0.3711 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4891 , #FA=6482 , #Miss=7071 , Contributed ATWV= 0.3430, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3927 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=117 , #FA=391 , #Miss=400 , Contributed ATWV= 0.0267, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2109 +ATWV=0.3746 OTWV=0.4805 STWV=0.6185 MTWV=0.3746 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4733 , #FA=5416 , #Miss=7229 , Contributed ATWV= 0.3540, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4053 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=97 , #FA=371 , #Miss=420 , Contributed ATWV= 0.0206, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1629 +ATWV=0.3906 OTWV=0.4954 STWV=0.6114 MTWV=0.3933 THRESHOLD=0.453 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5223 , #FA=5433 , #Miss=6739 , Contributed ATWV= 0.3644, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4172 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=113 , #FA=300 , #Miss=404 , Contributed ATWV= 0.0262, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2072 +ATWV=0.4026 OTWV=0.5039 STWV=0.6123 MTWV=0.4026 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5625 , #FA=6694 , #Miss=6337 , Contributed ATWV= 0.3753, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4296 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=134 , #FA=378 , #Miss=383 , Contributed ATWV= 0.0273, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2160 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:26:46-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2248 OTWV=0.3051 STWV=0.3995 MTWV=0.2248 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3143 , #FA=4419 , #Miss=8819 , Contributed ATWV= 0.2067, Best Possible Contributed ATWV= 0.8735, ATWV= 0.2366 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=87 , #FA=253 , #Miss=430 , Contributed ATWV= 0.0181, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1433 +ATWV=0.2716 OTWV=0.3791 STWV=0.5246 MTWV=0.2724 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3613 , #FA=5664 , #Miss=8349 , Contributed ATWV= 0.2479, Best Possible Contributed ATWV= 0.8735, ATWV= 0.2838 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=110 , #FA=432 , #Miss=407 , Contributed ATWV= 0.0237, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1873 +ATWV=0.2862 OTWV=0.3915 STWV=0.5291 MTWV=0.2874 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3593 , #FA=5029 , #Miss=8369 , Contributed ATWV= 0.2615, Best Possible Contributed ATWV= 0.8735, ATWV= 0.2993 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=111 , #FA=379 , #Miss=406 , Contributed ATWV= 0.0248, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1959 +ATWV=0.2923 OTWV=0.4062 STWV=0.5508 MTWV=0.2923 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4069 , #FA=6808 , #Miss=7893 , Contributed ATWV= 0.2672, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3059 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=122 , #FA=492 , #Miss=395 , Contributed ATWV= 0.0251, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1987 +ATWV=0.3254 OTWV=0.4319 STWV=0.5579 MTWV=0.3254 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4524 , #FA=6093 , #Miss=7438 , Contributed ATWV= 0.2980, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3411 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=130 , #FA=445 , #Miss=387 , Contributed ATWV= 0.0274, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2169 +ATWV=0.3392 OTWV=0.4519 STWV=0.5786 MTWV=0.3392 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4870 , #FA=6666 , #Miss=7092 , Contributed ATWV= 0.3122, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3574 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=133 , #FA=466 , #Miss=384 , Contributed ATWV= 0.0270, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2133 diff --git a/egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 b/egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 new file mode 100644 index 00000000000..1997692642e --- /dev/null +++ b/egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:28:25-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2977 OTWV=0.3916 STWV=0.4944 MTWV=0.2977 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4395 , #FA=5271 , #Miss=7275 , Contributed ATWV= 0.2900, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3348 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=47 , #FA=376 , #Miss=465 , Contributed ATWV= 0.0077, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0574 +ATWV=0.3094 OTWV=0.4251 STWV=0.5824 MTWV=0.3094 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4362 , #FA=5310 , #Miss=7308 , Contributed ATWV= 0.2966, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3424 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=69 , #FA=394 , #Miss=443 , Contributed ATWV= 0.0128, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0957 +ATWV=0.3215 OTWV=0.4319 STWV=0.5834 MTWV=0.3228 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4438 , #FA=4879 , #Miss=7232 , Contributed ATWV= 0.3089, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3566 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=68 , #FA=381 , #Miss=444 , Contributed ATWV= 0.0126, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0942 +ATWV=0.3272 OTWV=0.4381 STWV=0.5897 MTWV=0.3272 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4487 , #FA=4749 , #Miss=7183 , Contributed ATWV= 0.3106, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3585 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=83 , #FA=376 , #Miss=429 , Contributed ATWV= 0.0166, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1242 +ATWV=0.3477 OTWV=0.4611 STWV=0.5871 MTWV=0.3478 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=5253 , #FA=5693 , #Miss=6417 , Contributed ATWV= 0.3363, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3883 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=67 , #FA=450 , #Miss=445 , Contributed ATWV= 0.0114, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0849 +ATWV=0.3543 OTWV=0.4720 STWV=0.6040 MTWV=0.3543 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=5318 , #FA=5634 , #Miss=6352 , Contributed ATWV= 0.3414, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3942 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=72 , #FA=448 , #Miss=440 , Contributed ATWV= 0.0129, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0965 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:28:39-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2682 OTWV=0.3601 STWV=0.4696 MTWV=0.2690 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3255 , #FA=5332 , #Miss=8415 , Contributed ATWV= 0.2627, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3032 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=40 , #FA=397 , #Miss=472 , Contributed ATWV= 0.0056, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0418 +ATWV=0.2748 OTWV=0.3792 STWV=0.5133 MTWV=0.2748 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3036 , #FA=4436 , #Miss=8634 , Contributed ATWV= 0.2646, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3055 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=54 , #FA=371 , #Miss=458 , Contributed ATWV= 0.0102, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0759 +ATWV=0.2823 OTWV=0.3883 STWV=0.5214 MTWV=0.2823 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3344 , #FA=5218 , #Miss=8326 , Contributed ATWV= 0.2711, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3130 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=61 , #FA=428 , #Miss=451 , Contributed ATWV= 0.0112, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0836 +ATWV=0.2874 OTWV=0.3903 STWV=0.5191 MTWV=0.2874 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3274 , #FA=4680 , #Miss=8396 , Contributed ATWV= 0.2740, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3163 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=66 , #FA=364 , #Miss=446 , Contributed ATWV= 0.0134, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1001 +ATWV=0.3076 OTWV=0.4101 STWV=0.5223 MTWV=0.3089 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3617 , #FA=4797 , #Miss=8053 , Contributed ATWV= 0.2968, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3427 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=61 , #FA=376 , #Miss=451 , Contributed ATWV= 0.0107, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0802 +ATWV=0.3083 OTWV=0.4154 STWV=0.5354 MTWV=0.3085 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3813 , #FA=5545 , #Miss=7857 , Contributed ATWV= 0.2996, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3459 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=64 , #FA=463 , #Miss=448 , Contributed ATWV= 0.0087, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0649 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:28:54-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2624 OTWV=0.3740 STWV=0.5112 MTWV=0.2624 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3562 , #FA=6509 , #Miss=8108 , Contributed ATWV= 0.2483, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2866 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=74 , #FA=531 , #Miss=438 , Contributed ATWV= 0.0141, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1056 +ATWV=0.2657 OTWV=0.3687 STWV=0.4899 MTWV=0.2657 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3151 , #FA=4641 , #Miss=8519 , Contributed ATWV= 0.2540, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2932 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=52 , #FA=319 , #Miss=460 , Contributed ATWV= 0.0117, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0876 +ATWV=0.2779 OTWV=0.3829 STWV=0.5065 MTWV=0.2779 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3384 , #FA=5003 , #Miss=8286 , Contributed ATWV= 0.2626, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3032 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=74 , #FA=397 , #Miss=438 , Contributed ATWV= 0.0153, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1140 +ATWV=0.2802 OTWV=0.3862 STWV=0.5240 MTWV=0.2802 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3436 , #FA=5211 , #Miss=8234 , Contributed ATWV= 0.2655, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3065 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=73 , #FA=443 , #Miss=439 , Contributed ATWV= 0.0147, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1099 +ATWV=0.2970 OTWV=0.4030 STWV=0.5268 MTWV=0.2974 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3835 , #FA=5650 , #Miss=7835 , Contributed ATWV= 0.2816, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3251 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=75 , #FA=434 , #Miss=437 , Contributed ATWV= 0.0154, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1153 +ATWV=0.2994 OTWV=0.4095 STWV=0.5369 MTWV=0.2994 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3723 , #FA=4888 , #Miss=7947 , Contributed ATWV= 0.2855, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3295 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=67 , #FA=390 , #Miss=445 , Contributed ATWV= 0.0139, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1042 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:29:11-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2666 OTWV=0.3554 STWV=0.4513 MTWV=0.2666 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3223 , #FA=4631 , #Miss=8447 , Contributed ATWV= 0.2475, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2857 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=76 , #FA=317 , #Miss=436 , Contributed ATWV= 0.0190, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1423 +ATWV=0.2823 OTWV=0.3864 STWV=0.5230 MTWV=0.2823 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3220 , #FA=4390 , #Miss=8450 , Contributed ATWV= 0.2590, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2990 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=94 , #FA=356 , #Miss=418 , Contributed ATWV= 0.0233, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1743 +ATWV=0.2944 OTWV=0.3988 STWV=0.5324 MTWV=0.2946 THRESHOLD=0.511 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3523 , #FA=5214 , #Miss=8147 , Contributed ATWV= 0.2679, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3092 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=115 , #FA=439 , #Miss=397 , Contributed ATWV= 0.0265, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1981 +ATWV=0.2985 OTWV=0.4008 STWV=0.5278 MTWV=0.2995 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3476 , #FA=4605 , #Miss=8194 , Contributed ATWV= 0.2747, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3171 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=105 , #FA=395 , #Miss=407 , Contributed ATWV= 0.0238, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1776 +ATWV=0.3182 OTWV=0.4262 STWV=0.5392 MTWV=0.3205 THRESHOLD=0.465 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3855 , #FA=4523 , #Miss=7815 , Contributed ATWV= 0.2976, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3436 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=91 , #FA=395 , #Miss=421 , Contributed ATWV= 0.0205, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1535 +ATWV=0.3279 OTWV=0.4355 STWV=0.5492 MTWV=0.3295 THRESHOLD=0.484 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4086 , #FA=5301 , #Miss=7584 , Contributed ATWV= 0.3054, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3525 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=98 , #FA=407 , #Miss=414 , Contributed ATWV= 0.0225, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1685 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:29:26-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2158 OTWV=0.3080 STWV=0.4193 MTWV=0.2158 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it1/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2501 , #FA=4203 , #Miss=9169 , Contributed ATWV= 0.1991, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2298 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=80 , #FA=387 , #Miss=432 , Contributed ATWV= 0.0167, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1247 +ATWV=0.2176 OTWV=0.3302 STWV=0.4778 MTWV=0.2176 THRESHOLD=0.491 exp/nnet3/tdnn_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2243 , #FA=3837 , #Miss=9427 , Contributed ATWV= 0.1924, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2222 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=105 , #FA=385 , #Miss=407 , Contributed ATWV= 0.0252, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1882 +ATWV=0.2444 OTWV=0.3568 STWV=0.4926 MTWV=0.2444 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2734 , #FA=4943 , #Miss=8936 , Contributed ATWV= 0.2189, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2527 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=110 , #FA=453 , #Miss=402 , Contributed ATWV= 0.0255, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1904 +ATWV=0.2464 OTWV=0.3638 STWV=0.5166 MTWV=0.2471 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2730 , #FA=5051 , #Miss=8940 , Contributed ATWV= 0.2168, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2503 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=120 , #FA=492 , #Miss=392 , Contributed ATWV= 0.0296, Best Possible Contributed ATWV= 0.1338, ATWV= 0.2216 +ATWV=0.2765 OTWV=0.3905 STWV=0.5268 MTWV=0.2782 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3216 , #FA=4675 , #Miss=8454 , Contributed ATWV= 0.2526, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2916 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=109 , #FA=449 , #Miss=403 , Contributed ATWV= 0.0239, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1786 +ATWV=0.2787 OTWV=0.3901 STWV=0.5224 MTWV=0.2799 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3448 , #FA=5571 , #Miss=8222 , Contributed ATWV= 0.2547, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2940 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=106 , #FA=532 , #Miss=406 , Contributed ATWV= 0.0241, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1799 diff --git a/egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 b/egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 new file mode 100644 index 00000000000..87e1bef6be4 --- /dev/null +++ b/egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:30:41-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4364 OTWV=0.5200 STWV=0.6280 MTWV=0.4364 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=6368 , #FA=4863 , #Miss=6347 , Contributed ATWV= 0.4198, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4785 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=77 , #FA=402 , #Miss=401 , Contributed ATWV= 0.0167, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1359 +ATWV=0.4773 OTWV=0.5774 STWV=0.7209 MTWV=0.4782 THRESHOLD=0.444 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=6550 , #FA=4633 , #Miss=6165 , Contributed ATWV= 0.4556, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5193 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=95 , #FA=405 , #Miss=383 , Contributed ATWV= 0.0218, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1775 +ATWV=0.4854 OTWV=0.5811 STWV=0.7340 MTWV=0.4860 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=6988 , #FA=5965 , #Miss=5727 , Contributed ATWV= 0.4637, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5287 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=107 , #FA=644 , #Miss=371 , Contributed ATWV= 0.0216, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1761 +ATWV=0.4866 OTWV=0.5909 STWV=0.7347 MTWV=0.4870 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=7147 , #FA=6632 , #Miss=5568 , Contributed ATWV= 0.4645, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5295 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=119 , #FA=751 , #Miss=359 , Contributed ATWV= 0.0221, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1800 +ATWV=0.5068 OTWV=0.6090 STWV=0.7323 MTWV=0.5068 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=7333 , #FA=5227 , #Miss=5382 , Contributed ATWV= 0.4791, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5462 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=120 , #FA=509 , #Miss=358 , Contributed ATWV= 0.0277, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2252 +ATWV=0.5099 OTWV=0.6070 STWV=0.7211 MTWV=0.5099 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=7727 , #FA=5750 , #Miss=4988 , Contributed ATWV= 0.4855, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5534 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=125 , #FA=706 , #Miss=353 , Contributed ATWV= 0.0244, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1986 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:30:54-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3801 OTWV=0.4607 STWV=0.5692 MTWV=0.3801 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4182 , #FA=4840 , #Miss=8533 , Contributed ATWV= 0.3608, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4113 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=89 , #FA=390 , #Miss=389 , Contributed ATWV= 0.0192, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1567 +ATWV=0.4019 OTWV=0.4952 STWV=0.6210 MTWV=0.4021 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4476 , #FA=5265 , #Miss=8239 , Contributed ATWV= 0.3789, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4319 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=104 , #FA=460 , #Miss=374 , Contributed ATWV= 0.0230, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1877 +ATWV=0.4112 OTWV=0.5090 STWV=0.6304 MTWV=0.4127 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4556 , #FA=5228 , #Miss=8159 , Contributed ATWV= 0.3877, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4420 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=111 , #FA=427 , #Miss=367 , Contributed ATWV= 0.0235, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1917 +ATWV=0.4120 OTWV=0.5003 STWV=0.6302 MTWV=0.4120 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4705 , #FA=5876 , #Miss=8010 , Contributed ATWV= 0.3897, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4443 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=107 , #FA=541 , #Miss=371 , Contributed ATWV= 0.0223, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1819 +ATWV=0.4233 OTWV=0.5140 STWV=0.6209 MTWV=0.4233 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4715 , #FA=4781 , #Miss=8000 , Contributed ATWV= 0.3982, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4539 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=112 , #FA=375 , #Miss=366 , Contributed ATWV= 0.0251, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2045 +ATWV=0.4281 OTWV=0.5186 STWV=0.6279 MTWV=0.4284 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4813 , #FA=5241 , #Miss=7902 , Contributed ATWV= 0.4030, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4594 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=116 , #FA=452 , #Miss=362 , Contributed ATWV= 0.0251, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2042 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:31:07-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3739 OTWV=0.4614 STWV=0.5894 MTWV=0.3739 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4006 , #FA=4874 , #Miss=8709 , Contributed ATWV= 0.3525, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4018 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=95 , #FA=399 , #Miss=383 , Contributed ATWV= 0.0214, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1746 +ATWV=0.3912 OTWV=0.4914 STWV=0.6376 MTWV=0.3912 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4234 , #FA=5261 , #Miss=8481 , Contributed ATWV= 0.3664, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4177 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=112 , #FA=457 , #Miss=366 , Contributed ATWV= 0.0248, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2017 +ATWV=0.3944 OTWV=0.4934 STWV=0.6385 MTWV=0.3952 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4363 , #FA=5715 , #Miss=8352 , Contributed ATWV= 0.3691, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4208 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=116 , #FA=533 , #Miss=362 , Contributed ATWV= 0.0252, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2054 +ATWV=0.3992 OTWV=0.5032 STWV=0.6463 MTWV=0.3992 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4299 , #FA=5287 , #Miss=8416 , Contributed ATWV= 0.3731, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4254 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=116 , #FA=460 , #Miss=362 , Contributed ATWV= 0.0260, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2120 +ATWV=0.4131 OTWV=0.5074 STWV=0.6366 MTWV=0.4131 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4722 , #FA=5944 , #Miss=7993 , Contributed ATWV= 0.3883, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4427 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=118 , #FA=543 , #Miss=360 , Contributed ATWV= 0.0248, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2019 +ATWV=0.4192 OTWV=0.5136 STWV=0.6432 MTWV=0.4197 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4432 , #FA=4967 , #Miss=8283 , Contributed ATWV= 0.3911, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4458 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=117 , #FA=389 , #Miss=361 , Contributed ATWV= 0.0282, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2295 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:31:26-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3641 OTWV=0.4420 STWV=0.5488 MTWV=0.3641 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3840 , #FA=4117 , #Miss=8875 , Contributed ATWV= 0.3404, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3880 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=102 , #FA=267 , #Miss=376 , Contributed ATWV= 0.0237, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1931 +ATWV=0.4029 OTWV=0.4987 STWV=0.6333 MTWV=0.4039 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4170 , #FA=4646 , #Miss=8545 , Contributed ATWV= 0.3693, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4210 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=144 , #FA=414 , #Miss=334 , Contributed ATWV= 0.0336, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2737 +ATWV=0.4079 OTWV=0.5034 STWV=0.6391 MTWV=0.4079 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4361 , #FA=5099 , #Miss=8354 , Contributed ATWV= 0.3750, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4275 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=144 , #FA=464 , #Miss=334 , Contributed ATWV= 0.0329, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2682 +ATWV=0.4153 OTWV=0.5120 STWV=0.6440 MTWV=0.4159 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4550 , #FA=5777 , #Miss=8165 , Contributed ATWV= 0.3786, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4316 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=159 , #FA=500 , #Miss=319 , Contributed ATWV= 0.0367, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2992 +ATWV=0.4222 OTWV=0.5174 STWV=0.6342 MTWV=0.4224 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4814 , #FA=5842 , #Miss=7901 , Contributed ATWV= 0.3888, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4432 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=147 , #FA=483 , #Miss=331 , Contributed ATWV= 0.0334, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2724 +ATWV=0.4288 OTWV=0.5196 STWV=0.6299 MTWV=0.4288 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4781 , #FA=5182 , #Miss=7934 , Contributed ATWV= 0.3956, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4509 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=146 , #FA=414 , #Miss=332 , Contributed ATWV= 0.0333, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2709 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:31:41-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3217 OTWV=0.4088 STWV=0.5365 MTWV=0.3225 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3439 , #FA=4619 , #Miss=9276 , Contributed ATWV= 0.2980, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3397 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=107 , #FA=370 , #Miss=371 , Contributed ATWV= 0.0237, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1928 +ATWV=0.3625 OTWV=0.4603 STWV=0.6156 MTWV=0.3625 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3584 , #FA=4514 , #Miss=9131 , Contributed ATWV= 0.3277, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3736 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=145 , #FA=365 , #Miss=333 , Contributed ATWV= 0.0348, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2837 +ATWV=0.3648 OTWV=0.4659 STWV=0.6258 MTWV=0.3650 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3728 , #FA=4997 , #Miss=8987 , Contributed ATWV= 0.3283, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3742 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=153 , #FA=426 , #Miss=325 , Contributed ATWV= 0.0365, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2971 +ATWV=0.3776 OTWV=0.4805 STWV=0.6324 MTWV=0.3779 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4000 , #FA=5730 , #Miss=8715 , Contributed ATWV= 0.3437, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3918 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=150 , #FA=500 , #Miss=328 , Contributed ATWV= 0.0339, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2759 +ATWV=0.3885 OTWV=0.4943 STWV=0.6300 MTWV=0.3904 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4304 , #FA=5863 , #Miss=8411 , Contributed ATWV= 0.3553, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4051 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=149 , #FA=466 , #Miss=329 , Contributed ATWV= 0.0332, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2702 +ATWV=0.3993 OTWV=0.4998 STWV=0.6357 MTWV=0.4003 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4480 , #FA=5920 , #Miss=8235 , Contributed ATWV= 0.3667, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4180 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=146 , #FA=498 , #Miss=332 , Contributed ATWV= 0.0327, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2662 diff --git a/egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 b/egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 new file mode 100644 index 00000000000..0b03f645904 --- /dev/null +++ b/egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 @@ -0,0 +1,28 @@ +#Created on 2016-02-18T12:15:22-0500 by local/best_scores.sh +# +# +# STT Task performance (WER), evaluated on 2016-02-18T12:20:23-0500 +%WER 50.4 | 10001 82932 | 57.3 32.0 10.7 7.6 50.4 79.0 | -1.280 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.2 | 10001 82932 | 50.9 37.9 11.1 10.1 59.2 81.7 | -1.687 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.0 | 10001 82932 | 55.5 30.2 14.3 6.5 51.0 80.0 | -0.722 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.3 | 10001 82932 | 59.7 30.6 9.8 7.0 47.3 77.1 | -1.079 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (CER), evaluated on 2016-02-18T12:20:24-0500 +%WER 43.5 | 10001 104181 | 62.5 29.8 7.6 6.1 43.5 78.6 | -1.082 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.char.ctm.sys +%WER 52.3 | 10001 104181 | 55.5 35.9 8.6 7.8 52.3 81.5 | -1.384 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.char.ctm.sys +%WER 43.9 | 10001 104181 | 62.2 28.5 9.3 6.0 43.9 80.1 | -0.627 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.char.ctm.sys +%WER 40.5 | 10001 104181 | 64.3 28.2 7.5 4.8 40.5 76.7 | -0.854 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.char.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T22:47:05-0500 +%WER 50.4 | 10001 82932 | 57.3 32.0 10.7 7.6 50.4 79.0 | -1.280 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.2 | 10001 82932 | 50.9 37.9 11.1 10.1 59.2 81.7 | -1.687 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.0 | 10001 82932 | 55.5 30.2 14.3 6.5 51.0 80.0 | -0.722 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 43.6 | 10001 82932 | 60.4 27.7 11.9 4.0 43.6 73.1 | -0.439 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 47.3 | 10001 82932 | 59.7 30.6 9.8 7.0 47.3 77.1 | -1.079 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (CER), evaluated on 2016-02-19T22:47:09-0500 +%WER 43.5 | 10001 104181 | 62.5 29.8 7.6 6.1 43.5 78.6 | -1.082 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.char.ctm.sys +%WER 52.3 | 10001 104181 | 55.5 35.9 8.6 7.8 52.3 81.5 | -1.384 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.char.ctm.sys +%WER 43.9 | 10001 104181 | 62.2 28.5 9.3 6.0 43.9 80.1 | -0.627 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.char.ctm.sys +%WER 37.0 | 10001 104181 | 65.6 25.3 9.1 2.6 37.0 72.6 | -0.301 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.char.ctm.sys +%WER 40.5 | 10001 104181 | 64.3 28.2 7.5 4.8 40.5 76.7 | -0.854 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.char.ctm.sys diff --git a/egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 b/egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 new file mode 100644 index 00000000000..00aa7af8149 --- /dev/null +++ b/egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 @@ -0,0 +1,27 @@ +#Created on 2015-11-27T17:53:08-0500 +# +# STT Task performance (WER) +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 52.3 37.1 10.7 4.9 52.6 28.2 | -0.763 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.8 35.3 10.8 4.3 50.5 27.2 | -0.860 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:20:59-0500 +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 51.6 36.6 11.8 4.2 52.6 28.1 | -0.671 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.5 34.9 11.6 4.0 50.5 27.3 | -0.803 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:34:05-0500 +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 51.6 36.6 11.8 4.2 52.6 28.1 | -0.671 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.9 | 22313 52407 | 53.3 33.1 13.6 3.2 49.9 27.4 | -0.580 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/score_10/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.5 34.9 11.6 4.0 50.5 27.3 | -0.803 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T18:29:11-0500 +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 51.6 36.6 11.8 4.2 52.6 28.1 | -0.671 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.2 | 22313 52407 | 53.9 32.4 13.7 3.2 49.2 27.3 | -0.554 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.5 34.9 11.6 4.0 50.5 27.3 | -0.803 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 b/egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 new file mode 100644 index 00000000000..64b03ac3178 --- /dev/null +++ b/egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 @@ -0,0 +1,22 @@ +#Created on 2015-12-01T16:49:23-0500 +# +# STT Task performance (WER) +%WER 63.4 | 22224 57152 | 41.7 46.0 12.3 5.1 63.4 31.3 | -1.288 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 55.8 | 22224 57152 | 48.8 39.6 11.6 4.6 55.8 30.1 | -0.794 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 55.4 | 22224 57152 | 48.3 38.4 13.3 3.7 55.4 30.0 | -0.540 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 54.0 | 22224 57152 | 49.6 37.0 13.5 3.6 54.0 29.7 | -0.713 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_17/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:21:33-0500 +%WER 58.7 | 22224 57152 | 44.9 40.1 15.0 3.6 58.7 30.5 | -0.491 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.8 | 22224 57152 | 48.3 38.9 12.8 4.1 55.8 30.1 | -0.723 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 63.4 | 22224 57152 | 41.7 46.0 12.3 5.1 63.4 31.3 | -1.288 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 55.4 | 22224 57152 | 48.3 38.4 13.3 3.7 55.4 30.0 | -0.540 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 54.0 | 22224 57152 | 49.3 36.5 14.2 3.3 54.0 29.7 | -0.676 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:34:29-0500 +%WER 58.7 | 22224 57152 | 44.9 40.1 15.0 3.6 58.7 30.5 | -0.491 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.8 | 22224 57152 | 48.3 38.9 12.8 4.1 55.8 30.1 | -0.723 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 63.4 | 22224 57152 | 41.7 46.0 12.3 5.1 63.4 31.3 | -1.288 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 55.4 | 22224 57152 | 48.3 38.4 13.3 3.7 55.4 30.0 | -0.540 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 52.2 | 22224 57152 | 50.7 33.9 15.3 2.9 52.2 29.6 | -0.453 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 54.0 | 22224 57152 | 49.3 36.5 14.2 3.3 54.0 29.7 | -0.676 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 b/egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 new file mode 100644 index 00000000000..a085787d6d3 --- /dev/null +++ b/egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 @@ -0,0 +1,22 @@ +#Created on 2015-11-28T14:48:47-0500 +# +# STT Task performance (WER) +%WER 58.4 | 21825 101803 | 46.1 38.4 15.5 4.5 58.4 32.8 | -1.124 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.4 | 21825 101803 | 53.9 32.8 13.3 4.3 50.4 31.4 | -0.735 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 50.7 | 21825 101803 | 52.8 31.9 15.3 3.5 50.7 31.6 | -0.652 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 21825 101803 | 55.0 32.3 12.8 4.2 49.3 31.0 | -0.739 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:21:44-0500 +%WER 53.4 | 21825 101803 | 50.6 34.1 15.3 4.0 53.4 32.1 | -0.608 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21825 101803 | 53.9 32.8 13.3 4.3 50.4 31.4 | -0.735 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.4 | 21825 101803 | 46.1 38.4 15.5 4.5 58.4 32.8 | -1.124 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21825 101803 | 52.8 31.9 15.3 3.5 50.7 31.6 | -0.652 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 21825 101803 | 54.6 31.8 13.6 3.9 49.3 31.0 | -0.671 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:04-0500 +%WER 53.4 | 21825 101803 | 50.6 34.1 15.3 4.0 53.4 32.1 | -0.608 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21825 101803 | 53.9 32.8 13.3 4.3 50.4 31.4 | -0.735 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.4 | 21825 101803 | 46.1 38.4 15.5 4.5 58.4 32.8 | -1.124 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21825 101803 | 52.8 31.9 15.3 3.5 50.7 31.6 | -0.652 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 47.0 | 21825 101803 | 56.6 30.1 13.3 3.6 47.0 30.7 | -0.541 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 21825 101803 | 54.6 31.8 13.6 3.9 49.3 31.0 | -0.671 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 b/egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 new file mode 100644 index 00000000000..c1b66fb5daf --- /dev/null +++ b/egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 @@ -0,0 +1,242 @@ +#Created on 2016-02-25T15:45:46-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T15:45:46-05:00 +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-25T17:03:34-05:00 +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 21825 101803 | 48.5 39.7 11.8 6.6 58.1 32.6 | -1.226 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 21825 101803 | 56.4 31.2 12.3 4.6 48.1 31.0 | -0.638 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T19:48:53-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 50.4 | 21825 101803 | 55.5 34.1 10.4 5.9 50.4 31.0 | -0.669 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_16/dev10h.pem.ctm.sys +%WER 42.3 | 21825 101803 | 61.0 26.6 12.3 3.3 42.3 30.0 | -1.260 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 21825 101803 | 48.5 39.7 11.8 6.6 58.1 32.6 | -1.226 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21825 101803 | 44.2 42.6 13.3 6.5 62.3 32.9 | -0.955 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 21825 101803 | 56.4 31.2 12.3 4.6 48.1 31.0 | -0.638 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.8 | 21825 101803 | 57.7 28.7 13.7 4.4 46.8 30.8 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T10:07:57-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 50.4 | 21825 101803 | 55.5 34.1 10.4 5.9 50.4 31.0 | -0.669 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_16/dev10h.pem.ctm.sys +%WER 42.3 | 21825 101803 | 61.0 26.6 12.3 3.3 42.3 30.0 | -1.260 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 43.7 | 21825 101803 | 60.3 27.8 12.0 4.0 43.7 30.3 | -1.051 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 46.6 | 21825 101803 | 57.7 29.7 12.6 4.3 46.6 30.8 | -0.740 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 21825 101803 | 48.5 39.7 11.8 6.6 58.1 32.6 | -1.226 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21825 101803 | 44.2 42.6 13.3 6.5 62.3 32.9 | -0.955 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 21825 101803 | 56.4 31.2 12.3 4.6 48.1 31.0 | -0.638 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.8 | 21825 101803 | 57.7 28.7 13.7 4.4 46.8 30.8 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +# +# KWS Task performance (TWV), for the set kwlist evaluated on 2016-03-31T11:30:04-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4114 OTWV=0.5171 STWV=0.6713 MTWV=0.4128 THRESHOLD=0.453 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3606 , #FA=1935 , #Miss=2988 , Contributed ATWV= 0.4114, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4121 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4639 OTWV=0.5790 STWV=0.7779 MTWV=0.4639 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3948 , #FA=2450 , #Miss=2646 , Contributed ATWV= 0.4639, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4646 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4670 OTWV=0.5932 STWV=0.7799 MTWV=0.4685 THRESHOLD=0.453 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3914 , #FA=2016 , #Miss=2680 , Contributed ATWV= 0.4670, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4677 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4940 OTWV=0.6072 STWV=0.7751 MTWV=0.4940 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4297 , #FA=2623 , #Miss=2297 , Contributed ATWV= 0.4940, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4948 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4970 OTWV=0.6016 STWV=0.7837 MTWV=0.4985 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4145 , #FA=2538 , #Miss=2449 , Contributed ATWV= 0.4970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4977 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.5174 OTWV=0.6324 STWV=0.7958 MTWV=0.5183 THRESHOLD=0.433 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4312 , #FA=2156 , #Miss=2282 , Contributed ATWV= 0.5174, Best Possible Contributed ATWV= 0.9984, ATWV= 0.5182 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), for the set kwlist2 evaluated on 2016-03-31T11:30:12-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4371 OTWV=0.5527 STWV=0.6904 MTWV=0.4372 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=7695 , #FA=8671 , #Miss=6784 , Contributed ATWV= 0.4356, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4423 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=32 , #Miss=50 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.4822 OTWV=0.6082 STWV=0.7912 MTWV=0.4822 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8278 , #FA=9303 , #Miss=6201 , Contributed ATWV= 0.4808, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4882 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=12 , #FA=60 , #Miss=48 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0924 +ATWV=0.4920 OTWV=0.6156 STWV=0.7891 MTWV=0.4920 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8698 , #FA=10346, #Miss=5781 , Contributed ATWV= 0.4913, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4989 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=59 , #Miss=52 , Contributed ATWV= 0.0006, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0427 +ATWV=0.5006 OTWV=0.6216 STWV=0.7975 MTWV=0.5006 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8552 , #FA=9419 , #Miss=5927 , Contributed ATWV= 0.4992, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5069 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=55 , #Miss=49 , Contributed ATWV= 0.0013, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0873 +ATWV=0.5077 OTWV=0.6291 STWV=0.7819 MTWV=0.5077 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9060 , #FA=10188, #Miss=5419 , Contributed ATWV= 0.5073, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5150 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=7 , #FA=64 , #Miss=53 , Contributed ATWV= 0.0005, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0325 +ATWV=0.5203 OTWV=0.6486 STWV=0.7952 MTWV=0.5218 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9144 , #FA=8922 , #Miss=5335 , Contributed ATWV= 0.5191, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5271 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=44 , #Miss=51 , Contributed ATWV= 0.0012, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0821 +# +# KWS Task performance (TWV), for the set kwlist3 evaluated on 2016-03-31T11:30:24-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3527 OTWV=0.4568 STWV=0.6002 MTWV=0.3537 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=6954 , #FA=5353 , #Miss=7254 , Contributed ATWV= 0.3477, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3778 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=23 , #FA=232 , #Miss=223 , Contributed ATWV= 0.0049, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0605 +ATWV=0.3997 OTWV=0.5121 STWV=0.7021 MTWV=0.4002 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist3_12/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7407 , #FA=5449 , #Miss=6801 , Contributed ATWV= 0.3919, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4259 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=39 , #FA=307 , #Miss=207 , Contributed ATWV= 0.0076, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0939 +ATWV=0.4102 OTWV=0.5277 STWV=0.7047 MTWV=0.4102 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7795 , #FA=5927 , #Miss=6413 , Contributed ATWV= 0.4033, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4382 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=288 , #Miss=210 , Contributed ATWV= 0.0067, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0822 +ATWV=0.4222 OTWV=0.5278 STWV=0.7066 MTWV=0.4222 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7820 , #FA=5808 , #Miss=6388 , Contributed ATWV= 0.4152, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4511 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=326 , #Miss=210 , Contributed ATWV= 0.0068, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0839 +ATWV=0.4285 OTWV=0.5406 STWV=0.6965 MTWV=0.4286 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8050 , #FA=5500 , #Miss=6158 , Contributed ATWV= 0.4213, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4578 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=34 , #FA=264 , #Miss=212 , Contributed ATWV= 0.0070, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0858 +ATWV=0.4361 OTWV=0.5517 STWV=0.7032 MTWV=0.4361 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8487 , #FA=6339 , #Miss=5721 , Contributed ATWV= 0.4310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4683 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=311 , #Miss=210 , Contributed ATWV= 0.0048, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0594 +# +# KWS Task performance (TWV), syllabic search for the set kwlist evaluated on 2016-03-31T11:30:38-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2471 OTWV=0.2986 STWV=0.3521 MTWV=0.2471 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1536 , #FA=1187 , #Miss=5058 , Contributed ATWV= 0.2471, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2475 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2738 OTWV=0.3312 STWV=0.3984 MTWV=0.2738 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1588 , #FA=1164 , #Miss=5006 , Contributed ATWV= 0.2738, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2742 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2762 OTWV=0.3345 STWV=0.4011 MTWV=0.2762 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1613 , #FA=1156 , #Miss=4981 , Contributed ATWV= 0.2762, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2766 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2932 OTWV=0.3415 STWV=0.3985 MTWV=0.2981 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1082 , #Miss=4970 , Contributed ATWV= 0.2934, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2938 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2970 OTWV=0.3432 STWV=0.4014 MTWV=0.2970 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1702 , #FA=1132 , #Miss=4892 , Contributed ATWV= 0.2970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2975 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2978 OTWV=0.3444 STWV=0.4035 MTWV=0.2978 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1683 , #FA=1050 , #Miss=4911 , Contributed ATWV= 0.2978, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2983 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), phonetic search for the set kwlist evaluated on 2016-03-31T11:30:51-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2335 OTWV=0.2867 STWV=0.3609 MTWV=0.2337 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/phones/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1443 , #FA=1310 , #Miss=5151 , Contributed ATWV= 0.2336, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2339 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2513 OTWV=0.3174 STWV=0.4034 MTWV=0.2513 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1518 , #FA=1442 , #Miss=5076 , Contributed ATWV= 0.2515, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2519 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2525 OTWV=0.3188 STWV=0.4069 MTWV=0.2583 THRESHOLD=0.444 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1564 , #FA=1489 , #Miss=5030 , Contributed ATWV= 0.2526, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2530 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2575 OTWV=0.3184 STWV=0.3902 MTWV=0.2608 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1544 , #FA=1319 , #Miss=5050 , Contributed ATWV= 0.2575, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2579 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2759 OTWV=0.3294 STWV=0.4067 MTWV=0.2766 THRESHOLD=0.511 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1369 , #Miss=4970 , Contributed ATWV= 0.2760, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2764 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2793 OTWV=0.3306 STWV=0.4042 MTWV=0.2812 THRESHOLD=0.529 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1693 , #FA=1495 , #Miss=4901 , Contributed ATWV= 0.2785, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2790 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +declare -ax kwsets='([0]="kwlist" [1]="kwlist2" [2]="kwlist3")' +# +# KWS Task performance (TWV), for the set kwlist evaluated on 2016-03-31T11:31:11-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2516 OTWV=0.2931 STWV=0.3457 MTWV=0.2518 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1433 , #FA=916 , #Miss=5161 , Contributed ATWV= 0.2516, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2520 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2710 OTWV=0.3243 STWV=0.3971 MTWV=0.2720 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1527 , #FA=1006 , #Miss=5067 , Contributed ATWV= 0.2710, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2715 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2864 OTWV=0.3330 STWV=0.3928 MTWV=0.2864 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1673 , #FA=1135 , #Miss=4921 , Contributed ATWV= 0.2864, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2869 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2874 OTWV=0.3386 STWV=0.4018 MTWV=0.2881 THRESHOLD=0.403 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1591 , #FA=1010 , #Miss=5003 , Contributed ATWV= 0.2874, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2879 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2946 OTWV=0.3463 STWV=0.4046 MTWV=0.2952 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1666 , #FA=1036 , #Miss=4928 , Contributed ATWV= 0.2946, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2951 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +# +# KWS Task performance (TWV), for the set kwlist2 evaluated on 2016-03-31T11:31:16-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3298 OTWV=0.4064 STWV=0.4925 MTWV=0.3305 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist2_8/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4881 , #FA=5838 , #Miss=9598 , Contributed ATWV= 0.3281, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3331 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=23 , #Miss=51 , Contributed ATWV= 0.0017, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1105 +ATWV=0.3636 OTWV=0.4527 STWV=0.5672 MTWV=0.3638 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5215 , #FA=6311 , #Miss=9264 , Contributed ATWV= 0.3608, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3663 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=15 , #FA=26 , #Miss=45 , Contributed ATWV= 0.0028, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1873 +ATWV=0.3784 OTWV=0.4622 STWV=0.5703 MTWV=0.3792 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5416 , #FA=6432 , #Miss=9063 , Contributed ATWV= 0.3766, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3824 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=33 , #Miss=49 , Contributed ATWV= 0.0018, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1208 +ATWV=0.3795 OTWV=0.4643 STWV=0.5595 MTWV=0.3795 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5620 , #FA=6171 , #Miss=8859 , Contributed ATWV= 0.3781, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3839 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=29 , #Miss=52 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.3973 OTWV=0.4799 STWV=0.5716 MTWV=0.4011 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5747 , #FA=5988 , #Miss=8732 , Contributed ATWV= 0.3952, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4013 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=26 , #Miss=49 , Contributed ATWV= 0.0020, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1346 +# +# KWS Task performance (TWV), for the set kwlist3 evaluated on 2016-03-31T11:31:26-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2442 OTWV=0.2994 STWV=0.3760 MTWV=0.2442 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3580 , #FA=3520 , #Miss=10628, Contributed ATWV= 0.2378, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2584 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=28 , #FA=145 , #Miss=218 , Contributed ATWV= 0.0064, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0787 +ATWV=0.2681 OTWV=0.3407 STWV=0.4407 MTWV=0.2684 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3688 , #FA=3305 , #Miss=10520, Contributed ATWV= 0.2574, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2797 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=45 , #FA=195 , #Miss=201 , Contributed ATWV= 0.0106, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1306 +ATWV=0.2844 OTWV=0.3499 STWV=0.4441 MTWV=0.2857 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3840 , #FA=3340 , #Miss=10368, Contributed ATWV= 0.2733, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2970 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=44 , #FA=197 , #Miss=202 , Contributed ATWV= 0.0111, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1367 +ATWV=0.2946 OTWV=0.3581 STWV=0.4423 MTWV=0.2948 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3882 , #FA=2874 , #Miss=10326, Contributed ATWV= 0.2804, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3047 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=138 , #Miss=193 , Contributed ATWV= 0.0142, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1751 +ATWV=0.2958 OTWV=0.3658 STWV=0.4485 MTWV=0.2988 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=4068 , #FA=3344 , #Miss=10140, Contributed ATWV= 0.2835, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3081 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=47 , #FA=136 , #Miss=199 , Contributed ATWV= 0.0122, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1504 +declare -ax kwsets='([0]="kwlist" [1]="kwlist2" [2]="kwlist3")' +# +# KWS Task performance (TWV), for the set kwlist evaluated on 2016-03-31T11:31:47-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1869 OTWV=0.2380 STWV=0.3024 MTWV=0.1869 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1180 , #FA=1168 , #Miss=5414 , Contributed ATWV= 0.1870, Best Possible Contributed ATWV= 0.9984, ATWV= 0.1873 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2043 OTWV=0.2598 STWV=0.3427 MTWV=0.2043 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1281 , #FA=1263 , #Miss=5313 , Contributed ATWV= 0.2045, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2048 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2055 OTWV=0.2591 STWV=0.3340 MTWV=0.2055 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1267 , #FA=1206 , #Miss=5327 , Contributed ATWV= 0.2057, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2060 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=5 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1379 +ATWV=0.2123 OTWV=0.2766 STWV=0.3581 MTWV=0.2149 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1333 , #FA=1274 , #Miss=5261 , Contributed ATWV= 0.2125, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2128 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2216 OTWV=0.2852 STWV=0.3565 MTWV=0.2240 THRESHOLD=0.403 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1371 , #FA=1067 , #Miss=5223 , Contributed ATWV= 0.2209, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2213 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=2 , #Miss=1 , Contributed ATWV= 0.0007, Best Possible Contributed ATWV= 0.0016, ATWV= 0.4448 +ATWV=0.2532 OTWV=0.3121 STWV=0.3808 MTWV=0.2539 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1547 , #FA=1310 , #Miss=5047 , Contributed ATWV= 0.2524, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2528 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +# +# KWS Task performance (TWV), for the set kwlist2 evaluated on 2016-03-31T11:31:53-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2686 OTWV=0.3459 STWV=0.4328 MTWV=0.2690 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=3870 , #FA=5258 , #Miss=10609, Contributed ATWV= 0.2670, Best Possible Contributed ATWV= 0.9849, ATWV= 0.2711 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=42 , #Miss=50 , Contributed ATWV= 0.0016, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1055 +ATWV=0.3044 OTWV=0.3970 STWV=0.5154 MTWV=0.3044 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4285 , #FA=5644 , #Miss=10194, Contributed ATWV= 0.3011, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3057 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=16 , #FA=54 , #Miss=44 , Contributed ATWV= 0.0033, Best Possible Contributed ATWV= 0.0151, ATWV= 0.2152 +ATWV=0.3073 OTWV=0.3944 STWV=0.4998 MTWV=0.3079 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch2/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4457 , #FA=6120 , #Miss=10022, Contributed ATWV= 0.3051, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3098 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=13 , #FA=55 , #Miss=47 , Contributed ATWV= 0.0022, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1487 +ATWV=0.3092 OTWV=0.4100 STWV=0.5226 MTWV=0.3125 THRESHOLD=0.465 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4431 , #FA=5723 , #Miss=10048, Contributed ATWV= 0.3078, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3125 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=69 , #Miss=49 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0977 +ATWV=0.3280 OTWV=0.4225 STWV=0.5216 MTWV=0.3291 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4940 , #FA=6266 , #Miss=9539 , Contributed ATWV= 0.3266, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3316 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=63 , #Miss=50 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0911 +ATWV=0.3586 OTWV=0.4552 STWV=0.5519 MTWV=0.3614 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5261 , #FA=6266 , #Miss=9218 , Contributed ATWV= 0.3563, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3618 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=14 , #FA=67 , #Miss=46 , Contributed ATWV= 0.0023, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1531 +# +# KWS Task performance (TWV), for the set kwlist3 evaluated on 2016-03-31T11:32:05-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1931 OTWV=0.2569 STWV=0.3444 MTWV=0.1931 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist3_9/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3015 , #FA=3772 , #Miss=11193, Contributed ATWV= 0.1875, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2037 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=33 , #FA=303 , #Miss=213 , Contributed ATWV= 0.0062, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0759 +ATWV=0.2228 OTWV=0.2982 STWV=0.4154 MTWV=0.2231 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3232 , #FA=3853 , #Miss=10976, Contributed ATWV= 0.2092, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2273 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=57 , #FA=332 , #Miss=189 , Contributed ATWV= 0.0141, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1738 +ATWV=0.2247 OTWV=0.2962 STWV=0.4001 MTWV=0.2247 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3131 , #FA=3232 , #Miss=11077, Contributed ATWV= 0.2122, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2306 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=48 , #FA=278 , #Miss=198 , Contributed ATWV= 0.0131, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1606 +ATWV=0.2320 OTWV=0.3081 STWV=0.4229 MTWV=0.2326 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3378 , #FA=3831 , #Miss=10830, Contributed ATWV= 0.2194, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2384 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=299 , #Miss=193 , Contributed ATWV= 0.0126, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1544 +ATWV=0.2474 OTWV=0.3186 STWV=0.4206 MTWV=0.2476 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3615 , #FA=3812 , #Miss=10593, Contributed ATWV= 0.2310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2510 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=63 , #FA=306 , #Miss=183 , Contributed ATWV= 0.0165, Best Possible Contributed ATWV= 0.0814, ATWV= 0.2023 +ATWV=0.2668 OTWV=0.3433 STWV=0.4457 MTWV=0.2668 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3825 , #FA=3913 , #Miss=10383, Contributed ATWV= 0.2535, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2755 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=59 , #FA=305 , #Miss=187 , Contributed ATWV= 0.0138, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1699 diff --git a/egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 b/egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 new file mode 100644 index 00000000000..b76de49ffe3 --- /dev/null +++ b/egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 @@ -0,0 +1,22 @@ +#Created on 2015-11-28T14:43:17-0500 +# +# STT Task performance (WER) +%WER 57.6 | 22070 54382 | 47.9 41.3 10.8 5.4 57.6 30.8 | -1.174 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 22070 54382 | 57.0 34.0 9.0 4.7 47.7 29.1 | -0.571 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 47.3 | 22070 54382 | 56.9 33.4 9.6 4.2 47.3 29.1 | -0.489 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 46.2 | 22070 54382 | 58.2 32.6 9.2 4.3 46.2 28.5 | -0.560 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:21:55-0500 +%WER 50.7 | 22070 54382 | 53.6 35.5 10.9 4.2 50.7 29.9 | -0.382 | exp/sgmm5/decode_fmllr_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 47.7 | 22070 54382 | 56.5 33.6 9.9 4.2 47.7 29.1 | -0.506 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 57.6 | 22070 54382 | 47.9 41.3 10.8 5.4 57.6 30.8 | -1.174 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.3 | 22070 54382 | 56.5 33.0 10.5 3.7 47.3 29.1 | -0.389 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 22070 54382 | 58.2 32.6 9.2 4.3 46.2 28.5 | -0.560 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:31-0500 +%WER 50.7 | 22070 54382 | 53.6 35.5 10.9 4.2 50.7 29.9 | -0.382 | exp/sgmm5/decode_fmllr_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 47.7 | 22070 54382 | 56.5 33.6 9.9 4.2 47.7 29.1 | -0.506 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 57.6 | 22070 54382 | 47.9 41.3 10.8 5.4 57.6 30.8 | -1.174 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.3 | 22070 54382 | 56.5 33.0 10.5 3.7 47.3 29.1 | -0.389 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 43.8 | 22070 54382 | 60.1 30.1 9.8 3.8 43.8 27.8 | -0.361 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 46.2 | 22070 54382 | 58.2 32.6 9.2 4.3 46.2 28.5 | -0.560 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 b/egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 new file mode 100644 index 00000000000..efa5bc3288c --- /dev/null +++ b/egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 @@ -0,0 +1,22 @@ +#Created on 2015-11-27T16:50:17-0500 +# +# STT Task performance (WER) +%WER 56.5 | 25332 63009 | 49.6 37.6 12.8 6.0 56.5 32.0 | -1.196 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.0 | 25332 63009 | 56.7 31.7 11.6 4.7 48.0 30.2 | -0.746 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 48.1 | 25332 63009 | 56.2 31.0 12.8 4.3 48.1 30.3 | -0.477 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 25332 63009 | 58.0 30.5 11.5 4.2 46.2 30.0 | -0.682 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:22:58-0500 +%WER 51.1 | 25332 63009 | 53.2 32.5 14.3 4.3 51.1 31.1 | -0.459 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 48.0 | 25332 63009 | 56.7 31.7 11.6 4.7 48.0 30.2 | -0.746 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 56.5 | 25332 63009 | 49.6 37.6 12.8 6.0 56.5 32.0 | -1.196 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 25332 63009 | 56.2 31.0 12.8 4.3 48.1 30.3 | -0.477 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 25332 63009 | 58.0 30.5 11.5 4.2 46.2 30.0 | -0.682 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:40-0500 +%WER 51.1 | 25332 63009 | 53.2 32.5 14.3 4.3 51.1 31.1 | -0.459 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 48.0 | 25332 63009 | 56.7 31.7 11.6 4.7 48.0 30.2 | -0.746 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 56.5 | 25332 63009 | 49.6 37.6 12.8 6.0 56.5 32.0 | -1.196 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 25332 63009 | 56.2 31.0 12.8 4.3 48.1 30.3 | -0.477 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 43.9 | 25332 63009 | 59.5 28.8 11.7 3.4 43.9 29.2 | -0.386 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 25332 63009 | 58.0 30.5 11.5 4.2 46.2 30.0 | -0.682 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 b/egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 new file mode 100644 index 00000000000..7d5da8e0f39 --- /dev/null +++ b/egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 @@ -0,0 +1,21 @@ +#Created on 2015-11-27T16:51:53-0500 +# +# STT Task performance (WER) +%WER 58.0 | 21875 111957 | 45.0 42.3 12.7 3.0 58.0 36.6 | -1.024 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 52.5 36.5 11.0 2.9 50.4 35.8 | -0.644 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 49.0 | 21875 111957 | 53.4 33.4 13.3 2.4 49.0 35.8 | -0.442 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.6 | 21875 111957 | 53.1 36.2 10.7 2.7 49.6 35.4 | -0.606 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:23:27-0500 +%WER 53.6 | 21875 111957 | 49.4 38.4 12.2 3.0 53.6 36.4 | -0.501 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 52.5 36.5 11.0 2.9 50.4 35.8 | -0.644 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.0 | 21875 111957 | 45.0 42.3 12.7 3.0 58.0 36.6 | -1.024 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.0 | 21875 111957 | 53.4 33.4 13.3 2.4 49.0 35.8 | -0.442 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.6 | 21875 111957 | 53.0 35.6 11.4 2.6 49.6 35.4 | -0.548 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:55-0500 +%WER 53.6 | 21875 111957 | 49.4 38.4 12.2 3.0 53.6 36.4 | -0.501 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 52.5 36.5 11.0 2.9 50.4 35.8 | -0.644 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.0 | 21875 111957 | 45.0 42.3 12.7 3.0 58.0 36.6 | -1.024 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.0 | 21875 111957 | 53.4 33.4 13.3 2.4 49.0 35.8 | -0.442 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.6 | 21875 111957 | 53.0 35.6 11.4 2.6 49.6 35.4 | -0.548 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 b/egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 new file mode 100644 index 00000000000..143944daa01 --- /dev/null +++ b/egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 @@ -0,0 +1,21 @@ +#Created on 2016-02-18T11:46:09-0500 +# +# STT Task performance (WER) +%WER 56.6 | 21530 83682 | 47.1 35.9 16.9 3.8 56.6 33.1 | -0.984 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.5 | 21530 83682 | 54.3 31.2 14.5 3.8 49.5 32.1 | -0.672 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 49.2 | 21530 83682 | 54.2 30.8 15.0 3.4 49.2 32.0 | -0.537 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.1 | 21530 83682 | 54.3 30.2 15.5 3.4 49.1 31.9 | -0.636 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:42-0500 +%WER 56.6 | 21530 83682 | 47.1 35.9 16.9 3.8 56.6 33.1 | -0.984 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.5 | 21530 83682 | 54.3 31.2 14.5 3.8 49.5 32.1 | -0.672 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 49.2 | 21530 83682 | 54.2 30.8 15.0 3.4 49.2 32.0 | -0.537 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 46.2 | 21530 83682 | 56.4 27.0 16.6 2.6 46.2 31.4 | -0.484 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 49.1 | 21530 83682 | 54.3 30.2 15.5 3.4 49.1 31.9 | -0.636 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:37:00-0500 +%WER 49.5 | 21530 83682 | 54.3 31.2 14.5 3.8 49.5 32.1 | -0.672 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 56.6 | 21530 83682 | 47.1 35.9 16.9 3.8 56.6 33.1 | -0.984 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.2 | 21530 83682 | 53.7 30.2 16.1 2.9 49.2 31.9 | -0.465 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 21530 83682 | 56.4 27.0 16.6 2.6 46.2 31.4 | -0.484 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 49.1 | 21530 83682 | 54.3 30.2 15.5 3.4 49.1 31.9 | -0.636 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 b/egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 new file mode 100644 index 00000000000..faa73c05ecb --- /dev/null +++ b/egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 @@ -0,0 +1,27 @@ +#Created on 2016-02-21T10:25:47-0500 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-21T10:25:47-0500 +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T06:37:59-05:00 +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +%WER 54.1 | 23781 62345 | 53.8 37.5 8.7 7.9 54.1 30.7 | -1.869 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 43.7 | 23781 62345 | 61.1 30.2 8.7 4.8 43.7 28.6 | -0.713 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.0 | 23781 62345 | 61.1 26.9 12.0 4.1 43.0 28.7 | -0.631 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T20:34:46-05:00 +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +%WER 54.1 | 23781 62345 | 53.8 37.5 8.7 7.9 54.1 30.7 | -1.869 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 43.7 | 23781 62345 | 61.1 30.2 8.7 4.8 43.7 28.6 | -0.713 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.0 | 23781 62345 | 61.1 26.9 12.0 4.1 43.0 28.7 | -0.631 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_12/dev10h.pem.ctm.sys +%WER 44.8 | 23781 62345 | 60.1 30.8 9.2 4.9 44.8 28.6 | -0.702 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T22:36:11-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 44.8 | 23781 62345 | 60.1 30.8 9.2 4.9 44.8 28.6 | -0.702 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 60.0 | 23781 62345 | 43.2 35.3 21.5 3.2 60.0 32.4 | -0.909 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 63.9 | 23781 62345 | 39.6 36.7 23.6 3.5 63.9 33.1 | -1.153 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +%WER 54.1 | 23781 62345 | 53.8 37.5 8.7 7.9 54.1 30.7 | -1.869 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.7 | 23781 62345 | 47.9 40.2 11.9 6.6 58.7 31.8 | -1.355 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 43.7 | 23781 62345 | 61.1 30.2 8.7 4.8 43.7 28.6 | -0.713 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.0 | 23781 62345 | 61.1 26.9 12.0 4.1 43.0 28.7 | -0.631 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_12/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 b/egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 new file mode 100644 index 00000000000..66d7a71f598 --- /dev/null +++ b/egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 @@ -0,0 +1,14 @@ +#Created on 2015-12-01T16:50:41-0500 +# +# STT Task performance (WER) +%WER 53.4 | 25158 82801 | 51.8 35.4 12.7 5.2 53.4 34.4 | -1.131 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 46.6 | 25158 82801 | 58.2 31.2 10.6 4.8 46.6 33.2 | -0.792 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 47.0 | 25158 82801 | 57.3 30.6 12.2 4.3 47.0 33.5 | -0.645 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 44.3 | 25158 82801 | 59.9 30.1 10.0 4.2 44.3 32.6 | -0.740 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:37-0500 +%WER 53.4 | 25158 82801 | 51.8 35.4 12.7 5.2 53.4 34.4 | -1.131 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 46.6 | 25158 82801 | 58.2 31.2 10.6 4.8 46.6 33.2 | -0.792 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 47.0 | 25158 82801 | 57.3 30.6 12.2 4.3 47.0 33.5 | -0.645 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 42.9 | 25158 82801 | 60.2 27.5 12.2 3.1 42.9 32.5 | -0.492 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 44.3 | 25158 82801 | 59.9 30.1 10.0 4.2 44.3 32.6 | -0.740 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 b/egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 new file mode 100644 index 00000000000..e4dfcd5a5c2 --- /dev/null +++ b/egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 @@ -0,0 +1,8 @@ +#Created on 2015-12-01T19:55:42-0500 +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:14-0500 +%WER 74.2 | 22178 60033 | 30.1 51.6 18.3 4.3 74.2 36.3 | -1.744 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 67.8 | 22178 60033 | 36.1 47.5 16.4 3.8 67.8 35.0 | -1.220 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_10/dev10h.pem.ctm.sys +%WER 68.1 | 22178 60033 | 35.2 46.5 18.2 3.3 68.1 35.5 | -0.900 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 65.1 | 22178 60033 | 38.0 44.1 18.0 3.1 65.1 34.6 | -0.759 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 66.8 | 22178 60033 | 37.2 46.9 16.0 4.0 66.8 34.8 | -1.137 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 b/egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 new file mode 100644 index 00000000000..3196f08c26a --- /dev/null +++ b/egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 @@ -0,0 +1,96 @@ +#Created on 2016-02-21T10:24:13-0500 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-21T10:24:13-0500 +# +# STT Task performance (WER), evaluated on 2016-02-21T10:25:04-0500 +# +# STT Task performance (WER), evaluated on 2016-02-27T09:49:11-05:00 +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-27T17:16:07-05:00 +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:52:09-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.3 | 23078 60240 | 31.4 34.0 34.7 2.7 71.3 33.6 | -2.291 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.3 | 23078 60240 | 29.3 33.2 37.5 2.6 73.3 33.7 | -1.834 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 83.2 | 23078 60240 | 18.7 30.0 51.3 1.9 83.2 35.6 | -1.680 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T20:57:22-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.3 | 23078 60240 | 31.4 34.0 34.7 2.7 71.3 33.6 | -2.291 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.3 | 23078 60240 | 29.3 33.2 37.5 2.6 73.3 33.7 | -1.834 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 81.0 | 23078 60240 | 20.9 30.0 49.1 1.9 81.0 35.1 | -1.466 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 83.2 | 23078 60240 | 18.7 30.0 51.3 1.9 83.2 35.6 | -1.680 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T10:56:23-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.3 | 23078 60240 | 31.4 34.0 34.7 2.7 71.3 33.6 | -2.291 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.3 | 23078 60240 | 29.3 33.2 37.5 2.6 73.3 33.7 | -1.834 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 81.0 | 23078 60240 | 20.9 30.0 49.1 1.9 81.0 35.1 | -1.466 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 83.2 | 23078 60240 | 18.7 30.0 51.3 1.9 83.2 35.6 | -1.680 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T22:38:30-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 71.6 | 23078 60240 | 30.8 32.2 37.0 2.4 71.6 33.6 | -2.116 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 73.5 | 23078 60240 | 29.1 32.8 38.1 2.6 73.5 33.7 | -1.960 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T09:57:37-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.6 | 23078 60240 | 30.8 32.2 37.0 2.4 71.6 33.6 | -2.116 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 73.5 | 23078 60240 | 29.1 32.8 38.1 2.6 73.5 33.7 | -1.960 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 82.9 | 23078 60240 | 19.2 30.9 49.9 2.1 82.9 35.6 | -1.948 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-08T07:34:08-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 60.6 | 23078 60240 | 43.0 37.3 19.7 3.6 60.6 31.7 | -1.738 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.5 | 23078 60240 | 29.1 32.8 38.1 2.6 73.5 33.7 | -1.960 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 82.9 | 23078 60240 | 19.2 30.9 49.9 2.1 82.9 35.6 | -1.948 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-10T09:31:52-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 60.6 | 23078 60240 | 43.0 37.3 19.7 3.6 60.6 31.7 | -1.738 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 61.0 | 23078 60240 | 42.5 36.7 20.8 3.5 61.0 31.8 | -1.277 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 82.9 | 23078 60240 | 19.2 30.9 49.9 2.1 82.9 35.6 | -1.948 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 b/egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 new file mode 100644 index 00000000000..1e98cf911ea --- /dev/null +++ b/egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 @@ -0,0 +1,14 @@ +#Created on 2015-11-27T17:54:01-0500 +# +# STT Task performance (WER) +%WER 66.0 | 22805 52162 | 38.4 47.5 14.1 4.4 66.0 33.2 | -2.078 | exp/tri5/decode_dev10h.pem/score_17/dev10h.pem.ctm.sys +%WER 60.4 | 22805 52162 | 44.4 44.1 11.5 4.8 60.4 32.3 | -1.189 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.1 | 22805 52162 | 44.2 41.8 14.0 3.3 59.1 32.0 | -0.746 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 58.6 | 22805 52162 | 45.4 42.5 12.1 4.0 58.6 31.9 | -1.026 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:04-0500 +%WER 66.0 | 22805 52162 | 38.4 47.5 14.1 4.4 66.0 33.2 | -2.078 | exp/tri5/decode_dev10h.pem/score_17/dev10h.pem.ctm.sys +%WER 60.4 | 22805 52162 | 44.4 44.1 11.5 4.8 60.4 32.3 | -1.189 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.1 | 22805 52162 | 44.2 41.8 14.0 3.3 59.1 32.0 | -0.746 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 56.4 | 22805 52162 | 46.7 40.0 13.3 3.1 56.4 31.4 | -0.682 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 22805 52162 | 45.4 42.5 12.1 4.0 58.6 31.9 | -1.026 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 b/egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 new file mode 100644 index 00000000000..3d0dc67e8e3 --- /dev/null +++ b/egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 @@ -0,0 +1,34 @@ +#Created on 2016-02-21T10:25:25-0500 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-21T10:25:25-0500 +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T22:19:14-05:00 +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 24353 74481 | 61.0 28.7 10.3 6.8 45.8 29.9 | -1.441 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 36.4 | 24353 74481 | 68.0 21.9 10.1 4.3 36.4 28.1 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 36.2 | 24353 74481 | 67.9 20.9 11.2 4.2 36.2 28.0 | -0.533 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +%WER 36.8 | 24353 74481 | 68.2 23.0 8.8 5.0 36.8 27.8 | -0.602 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:49:10-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 36.8 | 24353 74481 | 68.2 23.0 8.8 5.0 36.8 27.8 | -0.602 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 33.2 | 24353 74481 | 70.6 20.4 9.0 3.8 33.2 26.7 | -1.367 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 33.3 | 24353 74481 | 70.9 20.5 8.7 4.2 33.3 26.7 | -1.038 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.1 | 24353 74481 | 64.7 23.9 11.4 4.8 40.1 29.2 | -0.825 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 24353 74481 | 61.0 28.7 10.3 6.8 45.8 29.9 | -1.441 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.5 | 24353 74481 | 55.3 31.7 13.0 6.8 51.5 30.7 | -1.076 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 36.4 | 24353 74481 | 68.0 21.9 10.1 4.3 36.4 28.1 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 36.2 | 24353 74481 | 67.9 20.9 11.2 4.2 36.2 28.0 | -0.533 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T08:07:38-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 36.8 | 24353 74481 | 68.2 23.0 8.8 5.0 36.8 27.8 | -0.602 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 33.2 | 24353 74481 | 70.6 20.4 9.0 3.8 33.2 26.7 | -1.367 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 33.3 | 24353 74481 | 70.9 20.5 8.7 4.2 33.3 26.7 | -1.038 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 37.6 | 24353 74481 | 66.9 22.5 10.5 4.5 37.6 28.5 | -0.642 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 40.1 | 24353 74481 | 64.7 23.9 11.4 4.8 40.1 29.2 | -0.825 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 24353 74481 | 61.0 28.7 10.3 6.8 45.8 29.9 | -1.441 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.5 | 24353 74481 | 55.3 31.7 13.0 6.8 51.5 30.7 | -1.076 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 36.4 | 24353 74481 | 68.0 21.9 10.1 4.3 36.4 28.1 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 36.2 | 24353 74481 | 67.9 20.9 11.2 4.2 36.2 28.0 | -0.533 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 b/egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 new file mode 100644 index 00000000000..e6af3c9f6f9 --- /dev/null +++ b/egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 @@ -0,0 +1,43 @@ +#Created on 2016-02-25T10:45:54-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T10:45:54-05:00 +# +# STT Task performance (WER), evaluated on 2016-02-25T22:40:27-05:00 +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T20:35:22-05:00 +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 50.9 | 21519 61705 | 54.5 33.1 12.5 5.4 50.9 32.1 | -0.813 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T08:22:19-05:00 +%WER 50.9 | 21519 61705 | 54.5 33.1 12.5 5.4 50.9 32.1 | -0.813 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 45.6 | 21519 61705 | 58.0 29.3 12.7 3.7 45.6 31.2 | -1.354 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.0 | 21519 61705 | 58.2 29.4 12.4 4.1 46.0 31.4 | -1.051 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 68.4 | 21519 61705 | 34.6 32.8 32.7 2.9 68.4 35.4 | -1.082 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.1 | 21519 61705 | 43.0 41.1 15.8 7.2 64.1 34.7 | -1.573 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T07:26:39-05:00 +%WER 50.9 | 21519 61705 | 54.5 33.1 12.5 5.4 50.9 32.1 | -0.813 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 45.6 | 21519 61705 | 58.0 29.3 12.7 3.7 45.6 31.2 | -1.354 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.0 | 21519 61705 | 58.2 29.4 12.4 4.1 46.0 31.4 | -1.051 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.2 | 21519 61705 | 53.1 31.1 15.8 4.3 51.2 32.4 | -0.826 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 68.4 | 21519 61705 | 34.6 32.8 32.7 2.9 68.4 35.4 | -1.082 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.1 | 21519 61705 | 43.0 41.1 15.8 7.2 64.1 34.7 | -1.573 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 b/egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 new file mode 100644 index 00000000000..464362cf7e3 --- /dev/null +++ b/egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 @@ -0,0 +1,52 @@ +#Created on 2016-03-05T10:56:45-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-03-05T10:56:48-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T13:53:27-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T15:21:54-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-07T10:43:21-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.0 | 21958 102699 | 45.6 34.5 19.9 3.7 58.0 33.7 | -1.097 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21958 102699 | 42.1 37.8 20.2 4.4 62.3 34.4 | -0.645 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-08T07:31:46-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.0 | 21958 102699 | 45.6 34.5 19.9 3.7 58.0 33.7 | -1.097 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21958 102699 | 42.1 37.8 20.2 4.4 62.3 34.4 | -0.645 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 61.5 | 21958 102699 | 43.6 36.1 20.3 5.1 61.5 34.2 | -0.641 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-10T23:23:15-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 62.2 | 21958 102699 | 43.3 39.2 17.4 5.6 62.2 34.1 | -0.795 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_19/dev10h.pem.ctm.sys +%WER 58.0 | 21958 102699 | 45.6 34.5 19.9 3.7 58.0 33.7 | -1.097 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 41.0 37.2 21.9 4.1 63.2 34.6 | -0.723 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21958 102699 | 42.1 37.8 20.2 4.4 62.3 34.4 | -0.645 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 61.5 | 21958 102699 | 43.6 36.1 20.3 5.1 61.5 34.2 | -0.641 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 b/egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 new file mode 100644 index 00000000000..9950a8f11a0 --- /dev/null +++ b/egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 @@ -0,0 +1,48 @@ +#Created on 2016-02-25T09:46:16-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T09:46:16-05:00 +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 48.8 | 21823 59749 | 55.6 34.0 10.3 4.4 48.8 30.5 | -0.773 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-25T15:01:39-05:00 +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 55.9 30.1 14.0 3.1 47.2 30.3 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_11/dev10h.pem.ctm.sys +%WER 48.8 | 21823 59749 | 55.6 34.0 10.3 4.4 48.8 30.5 | -0.773 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T06:40:02-05:00 +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 55.9 30.1 14.0 3.1 47.2 30.3 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_11/dev10h.pem.ctm.sys +%WER 48.8 | 21823 59749 | 55.6 34.0 10.3 4.4 48.8 30.5 | -0.773 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:49:16-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 48.8 | 21823 59749 | 55.6 34.3 10.1 4.5 48.8 30.5 | -0.743 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_17/dev10h.pem.ctm.sys +%WER 43.9 | 21823 59749 | 59.3 31.0 9.7 3.2 43.9 29.5 | -0.869 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.5 | 21823 59749 | 56.0 32.9 11.2 3.5 47.5 30.3 | -0.655 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 63.0 | 21823 59749 | 42.7 42.8 14.5 5.7 63.0 33.3 | -1.302 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 56.0 30.4 13.6 3.2 47.2 30.3 | -0.552 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T08:07:40-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 48.8 | 21823 59749 | 55.6 34.3 10.1 4.5 48.8 30.5 | -0.743 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_17/dev10h.pem.ctm.sys +%WER 43.5 | 21823 59749 | 59.6 31.1 9.3 3.1 43.5 29.3 | -1.116 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 43.9 | 21823 59749 | 59.3 31.0 9.7 3.2 43.9 29.5 | -0.869 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.5 | 21823 59749 | 56.0 32.9 11.2 3.5 47.5 30.3 | -0.655 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.7 | 21823 59749 | 53.8 33.6 12.6 3.5 49.7 31.0 | -0.709 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 63.0 | 21823 59749 | 42.7 42.8 14.5 5.7 63.0 33.3 | -1.302 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 56.0 30.4 13.6 3.2 47.2 30.3 | -0.552 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/score_11/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 b/egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 new file mode 100644 index 00000000000..051d40b6f10 --- /dev/null +++ b/egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 @@ -0,0 +1,34 @@ +#Created on 2016-02-26T16:17:55-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-26T16:17:55-05:00 +%WER 59.9 | 23997 87709 | 44.9 36.6 18.5 4.8 59.9 36.0 | -0.664 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 23997 87709 | 48.0 36.7 15.3 6.1 58.1 35.8 | -0.932 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 65.6 | 23997 87709 | 40.8 41.2 18.0 6.5 65.6 36.9 | -1.703 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 56.1 | 23997 87709 | 47.9 33.7 18.4 4.0 56.1 35.3 | -0.545 | exp/tri6_nnet/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 54.0 | 23997 87709 | 50.5 32.3 17.2 4.5 54.0 35.0 | -0.502 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 56.9 | 23997 87709 | 48.7 35.2 16.1 5.7 56.9 35.3 | -0.747 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T10:32:48-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 56.9 | 23997 87709 | 48.7 35.2 16.1 5.7 56.9 35.3 | -0.747 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +%WER 48.3 | 23997 87709 | 55.7 30.1 14.2 4.1 48.3 33.9 | -1.338 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 23997 87709 | 55.0 31.0 13.9 4.4 49.3 34.0 | -1.017 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 77.7 | 23997 87709 | 24.3 32.5 43.2 2.0 77.7 40.1 | -1.550 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 59.9 | 23997 87709 | 44.9 36.6 18.5 4.8 59.9 36.0 | -0.664 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 23997 87709 | 48.0 36.7 15.3 6.1 58.1 35.8 | -0.932 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 65.6 | 23997 87709 | 40.8 41.2 18.0 6.5 65.6 36.9 | -1.703 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 69.9 | 23997 87709 | 35.7 42.4 21.9 5.5 69.9 37.7 | -1.140 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 56.1 | 23997 87709 | 47.9 33.7 18.4 4.0 56.1 35.3 | -0.545 | exp/tri6_nnet/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 54.0 | 23997 87709 | 50.5 32.3 17.2 4.5 54.0 35.0 | -0.502 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:49:22-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 56.9 | 23997 87709 | 48.7 35.2 16.1 5.7 56.9 35.3 | -0.747 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +%WER 48.3 | 23997 87709 | 55.7 30.1 14.2 4.1 48.3 33.9 | -1.338 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 23997 87709 | 55.0 31.0 13.9 4.4 49.3 34.0 | -1.017 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 54.4 | 23997 87709 | 49.7 33.8 16.5 4.2 54.4 35.5 | -0.684 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 77.7 | 23997 87709 | 24.3 32.5 43.2 2.0 77.7 40.1 | -1.550 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 59.9 | 23997 87709 | 44.9 36.6 18.5 4.8 59.9 36.0 | -0.664 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 23997 87709 | 48.0 36.7 15.3 6.1 58.1 35.8 | -0.932 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 65.6 | 23997 87709 | 40.8 41.2 18.0 6.5 65.6 36.9 | -1.703 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 69.9 | 23997 87709 | 35.7 42.4 21.9 5.5 69.9 37.7 | -1.140 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 56.1 | 23997 87709 | 47.9 33.7 18.4 4.0 56.1 35.3 | -0.545 | exp/tri6_nnet/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 54.0 | 23997 87709 | 50.5 32.3 17.2 4.5 54.0 35.0 | -0.502 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 b/egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 new file mode 100644 index 00000000000..9ad464aa2e7 --- /dev/null +++ b/egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 @@ -0,0 +1,41 @@ +#Created on 2016-02-26T06:40:39-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-26T06:40:39-05:00 +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T12:50:32-05:00 +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +%WER 60.8 | 23669 65293 | 44.0 41.1 14.9 4.8 60.8 36.6 | -1.077 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T16:56:30-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 60.8 | 23669 65293 | 44.0 41.1 14.9 4.8 60.8 36.6 | -1.077 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 54.0 | 23669 65293 | 49.5 37.0 13.5 3.5 54.0 35.3 | -1.581 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.1 | 23669 65293 | 48.2 35.9 15.9 3.3 55.1 35.5 | -0.993 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 64.0 | 23669 65293 | 40.1 41.3 18.6 4.0 64.0 37.7 | -1.205 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 72.5 | 23669 65293 | 34.8 49.2 16.1 7.3 72.5 38.6 | -1.941 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:51:39-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 60.8 | 23669 65293 | 44.0 41.1 14.9 4.8 60.8 36.6 | -1.077 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 54.0 | 23669 65293 | 49.5 37.0 13.5 3.5 54.0 35.3 | -1.581 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.1 | 23669 65293 | 48.2 35.9 15.9 3.3 55.1 35.5 | -0.993 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 61.5 | 23669 65293 | 42.1 38.8 19.1 3.6 61.5 36.9 | -0.881 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.0 | 23669 65293 | 40.1 41.3 18.6 4.0 64.0 37.7 | -1.205 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 72.5 | 23669 65293 | 34.8 49.2 16.1 7.3 72.5 38.6 | -1.941 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 b/egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 new file mode 100644 index 00000000000..fc7382101b2 --- /dev/null +++ b/egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 @@ -0,0 +1,54 @@ +#Created on 2016-02-25T23:27:09-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T23:27:09-05:00 +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T20:37:15-05:00 +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T00:59:22-05:00 +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T01:34:27-05:00 +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.4 | 23451 78254 | 62.7 26.6 10.7 3.1 40.4 28.1 | -0.618 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 23451 78254 | 47.4 40.4 12.3 5.9 58.6 31.4 | -0.991 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T08:19:30-05:00 +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.4 | 23451 78254 | 62.7 26.6 10.7 3.1 40.4 28.1 | -0.618 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 58.4 30.4 11.3 4.0 45.6 29.4 | -0.575 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 23451 78254 | 47.4 40.4 12.3 5.9 58.6 31.4 | -0.991 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T08:38:46-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.4 | 23451 78254 | 62.7 26.6 10.7 3.1 40.4 28.1 | -0.618 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 44.1 | 23451 78254 | 59.9 29.6 10.5 4.0 44.1 29.1 | -0.535 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 58.4 30.4 11.3 4.0 45.6 29.4 | -0.575 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 23451 78254 | 47.4 40.4 12.3 5.9 58.6 31.4 | -0.991 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/run-1-main-extend-lex.sh b/egs/babel/s5d/run-1-main-extend-lex.sh new file mode 100755 index 00000000000..035049dad9c --- /dev/null +++ b/egs/babel/s5d/run-1-main-extend-lex.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +# Parameters for extended lexicon. +extend_lexicon=true +unk_fraction_boost=1.0 +num_sent_gen=12000000 +num_prons=1000000 + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev2h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV2H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev2h_data_dir" "$dev2h_data_list" ./data/raw_dev2h_data || exit 1 +fi + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi +nj_max=`cat $dev2h_data_list | wc -l` +if [[ "$nj_max" -lt "$decode_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max -- you have $decode_nj! (The training and decoding process has file-granularity)" + exit 1 + decode_nj=$nj_max +fi + +# Move data/dev2h preparation forward so we can get data/dev2h/text for +# diagnostic purpose when extending the lexicon. +if [[ ! -f data/dev2h/wav.scp || data/dev2h/wav.scp -ot ./data/raw_dev2h_data/audio ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h data lists in data/dev2h on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/dev2h + local/prepare_acoustic_training_data.pl \ + --fragmentMarkers \-\*\~ \ + `pwd`/data/raw_dev2h_data data/dev2h > data/dev2h/skipped_utts.log || exit 1 +fi + +if [[ ! -f data/dev2h/glm || data/dev2h/glm -ot "$glmFile" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h stm files in data/dev2h on" `date` + echo --------------------------------------------------------------------- + if [ -z $dev2h_stm_file ]; then + echo "WARNING: You should define the variable stm_file pointing to the IndusDB stm" + echo "WARNING: Doing that, it will give you scoring close to the NIST scoring. " + local/prepare_stm.pl --fragmentMarkers \-\*\~ data/dev2h || exit 1 + else + local/augment_original_stm.pl $dev2h_stm_file data/dev2h || exit 1 + fi + [ ! -z $glmFile ] && cp $glmFile data/dev2h/glm + +fi + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + local/make_lexicon_subset.sh $train_data_dir/transcription $lexicon_file data/local/filtered_lexicon.txt + local/prepare_lexicon.pl --phonemap "$phoneme_mapping" \ + $lexiconFlags data/local/filtered_lexicon.txt data/local + if $extend_lexicon; then + # Extend the original lexicon. + # Will creates the files data/local/extend/{lexiconp.txt,oov2prob}. + mv data/local/lexicon.txt data/local/lexicon_orig.txt + local/extend_lexicon.sh --cmd "$train_cmd" --cleanup false \ + --num-sent-gen $num_sent_gen --num-prons $num_prons \ + data/local/lexicon_orig.txt data/local/extend data/dev2h/text + cp data/local/extend/lexiconp.txt data/local/ + fi +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + # If extending the lexicon, use "--words-file data/local/lexicon_orig.txt" so + # that the LM is trained just on the vocab that appears in the text. Will add + # in the OOVs later. + words_file_param=() + if $extend_lexicon; then + words_file_param=(--words-file data/local/lexicon_orig.txt) + fi + local/train_lms_srilm.sh "${words_file_param[@]}" \ + --dev-text data/dev2h/text --oov-symbol "$oovSymbol"\ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ||\ + ( -f data/local/extend/oov2prob &&\ + data/lang/G.fst -ot data/local/extend/oov2prob ) ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + extend_lexicon_param=() + if $extend_lexicon; then + [ -f data/local/extend/original_oov_rates ] || exit 1; + unk_fraction=`cat data/local/extend/original_oov_rates |\ + grep "token" | awk -v x=$unk_fraction_boost '{print $NF/100.0*x}'` + extend_lexicon_param=(--cleanup false --unk-fraction $unk_fraction \ + --oov-prob-file data/local/extend/oov2prob) + fi + local/arpa2G.sh ${extend_lexicon_param[@]} \ + data/srilm/lm.gz data/lang data/lang +fi +decode_nj=$dev2h_nj + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +touch data/.extlex + +echo ------------------------------------------------------------------------- +echo "Extended lexicon finished on" `date`. Now running script run-1-main.sh +echo ------------------------------------------------------------------------- +./run-1-main.sh +exit 0 diff --git a/egs/babel/s5d/run-1-main-unicode-extend-lex.sh b/egs/babel/s5d/run-1-main-unicode-extend-lex.sh new file mode 100755 index 00000000000..f9de3e8e947 --- /dev/null +++ b/egs/babel/s5d/run-1-main-unicode-extend-lex.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +# Parameters for extended lexicon. +extend_lexicon=true +unk_fraction_boost=1.0 +num_sent_gen=12000000 +num_prons=1000000 +morfessor=true +tag_percentage=0.1 +denlats_only=false + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +./local/check_tools.sh || exit 1 + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev2h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV2H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev2h_data_dir" "$dev2h_data_list" ./data/raw_dev2h_data || exit 1 +fi + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi + +# Move data/dev2h preparation forward so we can get data/dev2h/text for +# diagnostic purpose when extending the lexicon. +if [[ ! -f data/dev2h/wav.scp || data/dev2h/wav.scp -ot ./data/raw_dev2h_data/audio ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h data lists in data/dev2h on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/dev2h + local/prepare_acoustic_training_data.pl \ + --fragmentMarkers \-\*\~ \ + `pwd`/data/raw_dev2h_data data/dev2h > data/dev2h/skipped_utts.log || exit 1 +fi + +if [[ ! -f data/dev2h/glm || data/dev2h/glm -ot "$glmFile" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h stm files in data/dev2h on" `date` + echo --------------------------------------------------------------------- + if [ -z $dev2h_stm_file ]; then + echo "WARNING: You should define the variable stm_file pointing to the IndusDB stm" + echo "WARNING: Doing that, it will give you scoring close to the NIST scoring. " + local/prepare_stm.pl --fragmentMarkers \-\*\~ data/dev2h || exit 1 + else + local/augment_original_stm.pl $dev2h_stm_file data/dev2h || exit 1 + fi + [ ! -z $glmFile ] && cp $glmFile data/dev2h/glm + +fi + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + + local/lexicon/make_word_list.py $train_data_dir/filelist.list $train_data_dir/transcription data/local/word_list.txt + echo -e " SIL\n \n \n " > data/local/nonspeech.txt + echo -e " " > data/local/extraspeech.txt + + fmt="word_list" + if $morfessor; then + fmt="morfessor" + morfessor-train --encoding=utf_8 --traindata-list -f"-_" -s data/local/morfessor.bin \ + data/local/word_list.txt + morfessor-segment --encoding=utf_8 --output-format-separator '.' --viterbi-maxlen 3 \ + -l data/local/morfessor.bin <(cut -d' ' -f2 data/local/word_list.txt) \ + | sed 's/\.[\_\-]\././g' > data/local/segments + cut -d' ' data/local/word_list.txt -f2 | paste -d' ' - data/local/segments > data/local/word_list_tmp.txt + mv data/local/word_list_tmp.txt data/local/word_list.txt + fi + + local/lexicon/make_unicode_lexicon.py --tag_percentage $tag_percentage --fmt $fmt \ + --nonspeech data/local/nonspeech.txt --extraspeech data/local/extraspeech.txt \ + --verbose data/local/word_list.txt data/local/lexicon.txt data/local/ + local/prepare_unicode_lexicon.py --nonspeech data/local/nonspeech.txt \ + --extraspeech data/local/extraspeech.txt data/local/lexicon_table.txt data/local + cp data/local/lexicon.txt data/local/filtered_lexicon.txt + if $extend_lexicon; then + # Extend the original lexicon. + # Will creates the files data/local/extend/{lexiconp.txt,oov2prob}. + mv data/local/lexicon.txt data/local/lexicon_orig.txt + local/extend_lexicon.sh --cmd "$train_cmd" --cleanup false \ + --num-sent-gen $num_sent_gen --num-prons $num_prons \ + data/local/lexicon_orig.txt data/local/extend data/dev2h/text + cp data/local/extend/lexiconp.txt data/local/ + fi +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + # If extending the lexicon, use "--words-file data/local/lexicon_orig.txt" so + # that the LM is trained just on the vocab that appears in the text. Will add + # in the OOVs later. + words_file_param=() + if $extend_lexicon; then + words_file_param=(--words-file data/local/lexicon_orig.txt) + fi + local/train_lms_srilm.sh --oov-symbol "$oovSymbol"\ + "${words_file_param[@]}" \ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ||\ + ( -f data/local/extend/oov2prob &&\ + data/lang/G.fst -ot data/local/extend/oov2prob ) ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + extend_lexicon_param=() + if $extend_lexicon; then + [ -f data/local/extend/original_oov_rates ] || exit 1; + unk_fraction=`cat data/local/extend/original_oov_rates |\ + grep "token" | awk -v x=$unk_fraction_boost '{print $NF/100.0*x}'` + extend_lexicon_param=(--cleanup false --unk-fraction $unk_fraction \ + --oov-prob-file data/local/extend/oov2prob) + fi + local/arpa2G.sh ${extend_lexicon_param[@]} \ + data/srilm/lm.gz data/lang data/lang +fi + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +touch data/.extlex +mkdir -p exp + +echo ------------------------------------------------------------------------- +echo "Extended lexicon finished on" `date`. Now running script run-1-main.sh +echo ------------------------------------------------------------------------- +./run-1-main-unicode.sh --denlats-only "$denlats_only" +exit 0 diff --git a/egs/babel/s5d/run-1-main-unicode.sh b/egs/babel/s5d/run-1-main-unicode.sh new file mode 100755 index 00000000000..acd2693cbef --- /dev/null +++ b/egs/babel/s5d/run-1-main-unicode.sh @@ -0,0 +1,385 @@ +#!/bin/bash + +# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first. +tri5_only=false +sgmm5_only=false +denlats_only=false +data_only=false +morfessor=true +tag_percentage=0.1 + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +./local/check_tools.sh || exit 1 + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi + + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + + local/lexicon/make_word_list.py $train_data_dir/filelist.list $train_data_dir/transcription data/local/word_list.txt + echo -e " SIL\n \n \n " > data/local/nonspeech.txt + echo -e " " > data/local/extraspeech.txt + + fmt="word_list" + if $morfessor; then + fmt="morfessor" + morfessor-train --encoding=utf_8 --traindata-list -f"-_" -s data/local/morfessor.bin \ + data/local/word_list.txt + morfessor-segment --encoding=utf_8 --output-format-separator '.' --viterbi-maxlen 3 \ + -l data/local/morfessor.bin <(cut -d' ' -f2 data/local/word_list.txt) \ + | sed 's/\.[\_\-]\././g' > data/local/segments + cut -d' ' data/local/word_list.txt -f2 | paste -d' ' - data/local/segments > data/local/word_list_tmp.txt + mv data/local/word_list_tmp.txt data/local/word_list.txt + fi + + local/lexicon/make_unicode_lexicon.py --tag_percentage $tag_percentage --fmt $fmt \ + --nonspeech data/local/nonspeech.txt --extraspeech data/local/extraspeech.txt \ + --verbose data/local/word_list.txt data/local/lexicon.txt data/local/ + local/prepare_unicode_lexicon.py --nonspeech data/local/nonspeech.txt \ + --extraspeech data/local/extraspeech.txt data/local/lexicon_table.txt data/local + cp data/local/lexicon.txt data/local/filtered_lexicon.txt +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + local/train_lms_srilm.sh --oov-symbol "$oovSymbol"\ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + local/arpa2G.sh data/srilm/lm.gz data/lang data/lang +fi + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +mkdir -p exp + +if [ ! -f data/train_sub3/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting monophone training data in data/train_sub[123] on" `date` + echo --------------------------------------------------------------------- + numutt=`cat data/train/feats.scp | wc -l`; + utils/subset_data_dir.sh data/train 5000 data/train_sub1 + if [ $numutt -gt 10000 ] ; then + utils/subset_data_dir.sh data/train 10000 data/train_sub2 + else + (cd data; ln -s train train_sub2 ) + fi + if [ $numutt -gt 20000 ] ; then + utils/subset_data_dir.sh data/train 20000 data/train_sub3 + else + (cd data; ln -s train train_sub3 ) + fi + + touch data/train_sub3/.done +fi + +if $data_only; then + echo "--data-only is true" && exit 0 +fi + +if [ ! -f exp/mono/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) monophone training in exp/mono on" `date` + echo --------------------------------------------------------------------- + steps/train_mono.sh \ + --boost-silence $boost_sil --nj 8 --cmd "$train_cmd" \ + data/train_sub1 data/lang exp/mono + touch exp/mono/.done +fi + +if [ ! -f exp/tri1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) triphone training in exp/tri1 on" `date` + echo --------------------------------------------------------------------- + steps/align_si.sh \ + --boost-silence $boost_sil --nj 12 --cmd "$train_cmd" \ + data/train_sub2 data/lang exp/mono exp/mono_ali_sub2 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 \ + data/train_sub2 data/lang exp/mono_ali_sub2 exp/tri1 + + touch exp/tri1/.done +fi + + +echo --------------------------------------------------------------------- +echo "Starting (medium) triphone training in exp/tri2 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri2/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj 24 --cmd "$train_cmd" \ + data/train_sub3 data/lang exp/tri1 exp/tri1_ali_sub3 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri2 $numGaussTri2 \ + data/train_sub3 data/lang exp/tri1_ali_sub3 exp/tri2 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train_sub3 data/lang data/local/ \ + exp/tri2 data/local/dictp/tri2 data/local/langp/tri2 data/langp/tri2 + + touch exp/tri2/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (full) triphone training in exp/tri3 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri3/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri2 exp/tri2 exp/tri2_ali + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesTri3 $numGaussTri3 data/train data/langp/tri2 exp/tri2_ali exp/tri3 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local/ \ + exp/tri3 data/local/dictp/tri3 data/local/langp/tri3 data/langp/tri3 + + touch exp/tri3/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (lda_mllt) triphone training in exp/tri4 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri4/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri3 exp/tri3 exp/tri3_ali + + steps/train_lda_mllt.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesMLLT $numGaussMLLT data/train data/langp/tri3 exp/tri3_ali exp/tri4 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri4 data/local/dictp/tri4 data/local/langp/tri4 data/langp/tri4 + + touch exp/tri4/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (SAT) triphone training in exp/tri5 on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f exp/tri5/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri4 exp/tri4 exp/tri4_ali + + steps/train_sat.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesSAT $numGaussSAT data/train data/langp/tri4 exp/tri4_ali exp/tri5 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5 data/local/dictp/tri5 data/local/langp/tri5 data/langp/tri5 + + touch exp/tri5/.done +fi + + +if [ ! -f exp/tri5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/tri5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri5 exp/tri5 exp/tri5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5_ali data/local/dictp/tri5_ali data/local/langp/tri5_ali data/langp/tri5_ali + + touch exp/tri5_ali/.done +fi + +if [ ! -f data/langp_test/.done ]; then + cp -R data/langp/tri5_ali/ data/langp_test + cp data/lang/G.fst data/langp_test + touch data/langp_test/.done +fi + +if $tri5_only ; then + echo "Exiting after stage TRI5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + +################################################################################ +# Ready to start SGMM training +################################################################################ + +if [ ! -f exp/ubm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/ubm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_ubm.sh \ + --cmd "$train_cmd" $numGaussUBM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5 + touch exp/ubm5/.done +fi + +if [ ! -f exp/sgmm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_sgmm2.sh \ + --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + #steps/train_sgmm2_group.sh \ + # --cmd "$train_cmd" "${sgmm_group_extra_opts[@]-}" $numLeavesSGMM $numGaussSGMM \ + # data/train data/lang exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + touch exp/sgmm5/.done +fi + +if $sgmm5_only ; then + echo "Exiting after stage SGMM5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi +################################################################################ +# Ready to start discriminative SGMM training +################################################################################ + +if [ ! -f exp/sgmm5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_sgmm2.sh \ + --nj $train_nj --cmd "$train_cmd" --transform-dir exp/tri5_ali \ + --use-graphs true --use-gselect true \ + data/train data/lang exp/sgmm5 exp/sgmm5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/sgmm5_ali data/local/dictp/sgmm5 data/local/langp/sgmm5 data/langp/sgmm5 + + touch exp/sgmm5_ali/.done +fi + + +if [ ! -f exp/sgmm5_denlats/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_denlats on" `date` + echo --------------------------------------------------------------------- + steps/make_denlats_sgmm2.sh \ + --nj $train_nj --sub-split $train_nj "${sgmm_denlats_extra_opts[@]}" \ + --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5_ali \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats + touch exp/sgmm5_denlats/.done +fi + + +if $denlats_only ; then + echo "Exiting after generating denlats, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + + +if [ ! -f exp/sgmm5_mmi_b0.1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_mmi_b0.1 on" `date` + echo --------------------------------------------------------------------- + steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" "${sgmm_mmi_extra_opts[@]}" \ + --drop-frames true --transform-dir exp/tri5_ali --boost 0.1 \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats \ + exp/sgmm5_mmi_b0.1 + touch exp/sgmm5_mmi_b0.1/.done +fi + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-1-main.sh b/egs/babel/s5d/run-1-main.sh new file mode 100755 index 00000000000..d85407f8db4 --- /dev/null +++ b/egs/babel/s5d/run-1-main.sh @@ -0,0 +1,363 @@ +#!/bin/bash + +# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first. +tri5_only=false +sgmm5_only=false +denlats_only=false +data_only=false + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +./local/check_tools.sh || exit 1 + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi + + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + local/make_lexicon_subset.sh $train_data_dir/transcription $lexicon_file data/local/filtered_lexicon.txt + local/prepare_lexicon.pl --phonemap "$phoneme_mapping" \ + $lexiconFlags data/local/filtered_lexicon.txt data/local +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + local/train_lms_srilm.sh --oov-symbol "$oovSymbol"\ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + local/arpa2G.sh data/srilm/lm.gz data/lang data/lang +fi + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +mkdir -p exp + +if [ ! -f data/train_sub3/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting monophone training data in data/train_sub[123] on" `date` + echo --------------------------------------------------------------------- + numutt=`cat data/train/feats.scp | wc -l`; + utils/subset_data_dir.sh data/train 5000 data/train_sub1 + if [ $numutt -gt 10000 ] ; then + utils/subset_data_dir.sh data/train 10000 data/train_sub2 + else + (cd data; ln -s train train_sub2 ) + fi + if [ $numutt -gt 20000 ] ; then + utils/subset_data_dir.sh data/train 20000 data/train_sub3 + else + (cd data; ln -s train train_sub3 ) + fi + + touch data/train_sub3/.done +fi + +if $data_only; then + echo "--data-only is true" && exit 0 +fi + +if [ ! -f exp/mono/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) monophone training in exp/mono on" `date` + echo --------------------------------------------------------------------- + steps/train_mono.sh \ + --boost-silence $boost_sil --nj 8 --cmd "$train_cmd" \ + data/train_sub1 data/lang exp/mono + touch exp/mono/.done +fi + +if [ ! -f exp/tri1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) triphone training in exp/tri1 on" `date` + echo --------------------------------------------------------------------- + steps/align_si.sh \ + --boost-silence $boost_sil --nj 12 --cmd "$train_cmd" \ + data/train_sub2 data/lang exp/mono exp/mono_ali_sub2 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 \ + data/train_sub2 data/lang exp/mono_ali_sub2 exp/tri1 + + touch exp/tri1/.done +fi + + +echo --------------------------------------------------------------------- +echo "Starting (medium) triphone training in exp/tri2 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri2/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj 24 --cmd "$train_cmd" \ + data/train_sub3 data/lang exp/tri1 exp/tri1_ali_sub3 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri2 $numGaussTri2 \ + data/train_sub3 data/lang exp/tri1_ali_sub3 exp/tri2 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train_sub3 data/lang data/local/ \ + exp/tri2 data/local/dictp/tri2 data/local/langp/tri2 data/langp/tri2 + + touch exp/tri2/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (full) triphone training in exp/tri3 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri3/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri2 exp/tri2 exp/tri2_ali + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesTri3 $numGaussTri3 data/train data/langp/tri2 exp/tri2_ali exp/tri3 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local/ \ + exp/tri3 data/local/dictp/tri3 data/local/langp/tri3 data/langp/tri3 + + touch exp/tri3/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (lda_mllt) triphone training in exp/tri4 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri4/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri3 exp/tri3 exp/tri3_ali + + steps/train_lda_mllt.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesMLLT $numGaussMLLT data/train data/langp/tri3 exp/tri3_ali exp/tri4 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri4 data/local/dictp/tri4 data/local/langp/tri4 data/langp/tri4 + + touch exp/tri4/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (SAT) triphone training in exp/tri5 on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f exp/tri5/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri4 exp/tri4 exp/tri4_ali + + steps/train_sat.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesSAT $numGaussSAT data/train data/langp/tri4 exp/tri4_ali exp/tri5 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5 data/local/dictp/tri5 data/local/langp/tri5 data/langp/tri5 + + touch exp/tri5/.done +fi + + +if [ ! -f exp/tri5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/tri5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri5 exp/tri5 exp/tri5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5_ali data/local/dictp/tri5_ali data/local/langp/tri5_ali data/langp/tri5_ali + + touch exp/tri5_ali/.done +fi + +if [ ! -f data/langp_test/.done ]; then + cp -R data/langp/tri5_ali/ data/langp_test + cp data/lang/G.fst data/langp_test + touch data/langp_test/.done +fi + +if $tri5_only ; then + echo "Exiting after stage TRI5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + +################################################################################ +# Ready to start SGMM training +################################################################################ + +if [ ! -f exp/ubm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/ubm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_ubm.sh \ + --cmd "$train_cmd" $numGaussUBM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5 + touch exp/ubm5/.done +fi + +if [ ! -f exp/sgmm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_sgmm2.sh \ + --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + #steps/train_sgmm2_group.sh \ + # --cmd "$train_cmd" "${sgmm_group_extra_opts[@]-}" $numLeavesSGMM $numGaussSGMM \ + # data/train data/lang exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + touch exp/sgmm5/.done +fi + +if $sgmm5_only ; then + echo "Exiting after stage SGMM5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi +################################################################################ +# Ready to start discriminative SGMM training +################################################################################ + +if [ ! -f exp/sgmm5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_sgmm2.sh \ + --nj $train_nj --cmd "$train_cmd" --transform-dir exp/tri5_ali \ + --use-graphs true --use-gselect true \ + data/train data/lang exp/sgmm5 exp/sgmm5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/sgmm5_ali data/local/dictp/sgmm5 data/local/langp/sgmm5 data/langp/sgmm5 + + touch exp/sgmm5_ali/.done +fi + + +if [ ! -f exp/sgmm5_denlats/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_denlats on" `date` + echo --------------------------------------------------------------------- + steps/make_denlats_sgmm2.sh \ + --nj $train_nj --sub-split $train_nj "${sgmm_denlats_extra_opts[@]}" \ + --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5_ali \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats + touch exp/sgmm5_denlats/.done +fi + + +if $denlats_only ; then + echo "Exiting after generating denlats, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + + +if [ ! -f exp/sgmm5_mmi_b0.1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_mmi_b0.1 on" `date` + echo --------------------------------------------------------------------- + steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" "${sgmm_mmi_extra_opts[@]}" \ + --drop-frames true --transform-dir exp/tri5_ali --boost 0.1 \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats \ + exp/sgmm5_mmi_b0.1 + touch exp/sgmm5_mmi_b0.1/.done +fi + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-2-segmentation.sh b/egs/babel/s5d/run-2-segmentation.sh new file mode 100755 index 00000000000..0ced3ffabac --- /dev/null +++ b/egs/babel/s5d/run-2-segmentation.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0 + +#Begin configuration section + +silence_segment_fraction=1.0 # What fraction of segment we should keep + +#end configuration section + +# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first. +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +set -u #Fail on an undefined variable + +#Later in the script we assume the run-1-main.sh was run (because we are using exp/tri4) +#So let's make it mandatory, instead of doing the work on our own. +[ ! -f data/raw_train_data/.done ] && echo "The source training data directory is not ready. Use the run-1-main.sh script to prepare it!" && exit 1 + +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -f data/train_seg/.done ]; then + + mkdir -p data/train_seg + + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + local/prepare_acoustic_training_data.pl --get-whole-transcripts "true" \ + --vocab data/local/lexicon.txt --fragmentMarkers \-\*\~ \ + $train_data_dir data/train_seg > data/train_seg/skipped_utts.log + mv data/train_seg/text data/train_seg/text_orig + + num_silence_segments=$(cat data/train_seg/text_orig | awk '{if (NF == 2 && $2 == "") {print $0}}' | wc -l) + num_keep_silence_segments=`perl -e "printf '%d', ($num_silence_segments * $silence_segment_fraction)"` + if [ $num_silence_segments -eq $num_keep_silence_segments ]; then + # Keep all segments including silence segments + cat data/train_seg/text_orig | awk '{if (NF == 2 && $2 == "") {print $1} else {print $0}}' > data/train_seg/text + else + # Keep only a fraction of silence segments + + cat data/train_seg/text_orig \ + | awk 'BEGIN{i=0} \ + { \ + if (NF == 2 && $2 == "") { \ + if (i<'$num_keep_silence_segments') { \ + print $1; \ + i++; \ + } \ + } else {print $0}\ + }' > data/train_seg/text + fi + #rm data/train_seg/text_orig + utils/fix_data_dir.sh data/train_seg + + echo --------------------------------------------------------------------- + echo "Starting plp feature extraction for data/train_seg in plp on" `date` + echo --------------------------------------------------------------------- + + if [ ! -f data/train_seg/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj \ + data/train_seg exp/make_plp_pitch/train_seg plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj \ + data/train_seg exp/make_plp/train_seg plp + fi + + utils/fix_data_dir.sh data/train_seg + steps/compute_cmvn_stats.sh data/train_seg exp/make_plp/train_seg plp + utils/fix_data_dir.sh data/train_seg + touch data/train_seg/.plp.done + fi + touch data/train_seg/.done +fi + +echo --------------------------------------------------------------------- +echo "Training segmentation model in exp/tri4b_seg" +echo --------------------------------------------------------------------- + +local/resegment/train_segmentation.sh \ + --boost-sil 1.0 --nj $train_nj --cmd "$decode_cmd" \ + exp/tri4 data/train_seg data/lang exp/tri4b_seg || exit 1 + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-2a-nnet-cpu.sh b/egs/babel/s5d/run-2a-nnet-cpu.sh new file mode 100755 index 00000000000..35e7d3ceab3 --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-cpu.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +. conf/common_vars.sh +. ./lang.conf + +# This parameter will be used when the training dies at a certain point. +train_stage=-100 +dir=exp/tri6_nnet +. ./utils/parse_options.sh + +set -e +set -o pipefail +set -u + +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. +echo "Waiting till exp/tri5_ali/.done exists...." +while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done +echo "...done waiting for exp/tri5_ali/.done" + +if [ ! -f $dir/.done ]; then + steps/nnet2/train_pnorm.sh \ + --stage $train_stage --mix-up $dnn_mixup \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_input_dim \ + --pnorm-output-dim $dnn_output_dim \ + --cmd "$train_cmd" \ + "${dnn_cpu_parallel_opts[@]}" \ + data/train data/lang exp/tri5_ali $dir || exit 1 + + touch $dir/.done +fi diff --git a/egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh b/egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh new file mode 100755 index 00000000000..06c9a330295 --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +. conf/common_vars.sh +. ./lang.conf + +train_stage=-10 +dir=exp/tri6b_nnet + +. ./utils/parse_options.sh + +set -e +set -o pipefail +set -u + +dnn_num_hidden_layers=4 +dnn_pnorm_input_dim=3000 +dnn_pnorm_output_dim=300 +dnn_init_learning_rate=0.004 +dnn_final_learning_rate=0.001 +temp_dir=`pwd`/nnet_gpu_egs +ensemble_size=4 +initial_beta=0.1 +final_beta=5 +egs_dir= + +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. +echo "Waiting till exp/tri5_ali/.done exists...." +while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done +echo "...done waiting for exp/tri5_ali/.done" + +if [ ! -f $dir/.done ]; then + steps/nnet2/train_pnorm_ensemble.sh \ + --stage $train_stage --mix-up $dnn_mixup --egs-dir "$egs_dir" \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_pnorm_input_dim \ + --pnorm-output-dim $dnn_pnorm_output_dim \ + --cmd "$train_cmd" \ + "${dnn_gpu_parallel_opts[@]}" \ + --ensemble-size $ensemble_size --initial-beta $initial_beta --final-beta $final_beta \ + data/train data/lang exp/tri5_ali $dir || exit 1 + touch $dir/.done +fi + diff --git a/egs/babel/s5d/run-2a-nnet-gpu.sh b/egs/babel/s5d/run-2a-nnet-gpu.sh new file mode 100755 index 00000000000..55733006d75 --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-gpu.sh @@ -0,0 +1,36 @@ +#!/bin/bash +dir=exp/tri6_nnet +train_stage=-10 + +. conf/common_vars.sh +. ./lang.conf + +# This parameter will be used when the training dies at a certain point. +train_stage=-100 +. ./utils/parse_options.sh + +set -e +set -o pipefail +set -u + +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. +echo "Waiting till exp/tri5_ali/.done exists...." +while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done +echo "...done waiting for exp/tri5_ali/.done" + +if [ ! -f $dir/.done ]; then + steps/nnet2/train_pnorm_fast.sh \ + --stage $train_stage --mix-up $dnn_mixup \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_input_dim \ + --pnorm-output-dim $dnn_output_dim \ + --cmd "$train_cmd" \ + "${dnn_gpu_parallel_opts[@]}" \ + data/train data/langp/tri5_ali exp/tri5_ali $dir || exit 1 + + touch $dir/.done +fi + diff --git a/egs/babel/s5d/run-2a-nnet-mpe.sh b/egs/babel/s5d/run-2a-nnet-mpe.sh new file mode 100755 index 00000000000..6ddddb4cfda --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-mpe.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +. conf/common_vars.sh +. ./lang.conf + +set -e +set -o pipefail +set -u + +# Wait for cross-entropy training. +echo "Waiting till exp/tri6_nnet/.done exists...." +while [ ! -f exp/tri6_nnet/.done ]; do sleep 30; done +echo "...done waiting for exp/tri6_nnet/.done" + +# Generate denominator lattices. +if [ ! -f exp/tri6_nnet_denlats/.done ]; then + steps/nnet2/make_denlats.sh --cmd "$decode_cmd" \ + --nj $train_nj --sub-split $train_nj \ + "${dnn_denlats_extra_opts[@]}" \ + --transform-dir exp/tri5_ali \ + data/train data/lang exp/tri6_nnet exp/tri6_nnet_denlats || exit 1 + + touch exp/tri6_nnet_denlats/.done +fi + +# Generate alignment. +if [ ! -f exp/tri6_nnet_ali/.done ]; then + steps/nnet2/align.sh --use-gpu yes \ + --cmd "$decode_cmd $dnn_parallel_opts" \ + --transform-dir exp/tri5_ali --nj $train_nj \ + data/train data/lang exp/tri6_nnet exp/tri6_nnet_ali || exit 1 + + touch exp/tri6_nnet_ali/.done +fi + +train_stage=-100 +if [ ! -f exp/tri6_nnet_mpe/.done ]; then + steps/nnet2/train_discriminative.sh \ + --stage $train_stage --cmd "$decode_cmd" \ + --learning-rate $dnn_mpe_learning_rate \ + --modify-learning-rates true \ + --last-layer-factor $dnn_mpe_last_layer_factor \ + --num-epochs 4 --cleanup true \ + --retroactive $dnn_mpe_retroactive \ + --transform-dir exp/tri5_ali \ + "${dnn_gpu_mpe_parallel_opts[@]}" data/train data/lang \ + exp/tri6_nnet_ali exp/tri6_nnet_denlats exp/tri6_nnet/final.mdl exp/tri6_nnet_mpe || exit 1 + + touch exp/tri6_nnet_mpe/.done +fi diff --git a/egs/babel/s5d/run-2b-bnf.sh b/egs/babel/s5d/run-2b-bnf.sh new file mode 100755 index 00000000000..bdca049d941 --- /dev/null +++ b/egs/babel/s5d/run-2b-bnf.sh @@ -0,0 +1,150 @@ +#!/bin/bash + +# Copyright 2014 Pegah Ghahremani +# Apache 2.0 + +#Run supervised and semisupervised BNF training +#This yields approx 70 hours of data + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +set -u #Fail on an undefined variable +skip_kws=true +skip_stt=false +semisupervised=true +unsup_string="_semisup" +train_stage=-100 +bnf_weight_threshold=0.35 +ali_dir= +ali_model=exp/tri6b_nnet/ +weights_dir=exp/best_path_weights/unsup.seg/decode_unsup.seg/ + +. ./utils/parse_options.sh + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + + +if $semisupervised ; then + egs_string="--egs-dir exp_bnf${unsup_string}/tri6_bnf/egs" + dirid=unsup.seg +else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + egs_string="" + dirid=train +fi + +[ ! -d $ali_model ] && echo "The alignment model $ali_model does not exist! Use --ali-model to specify it." && exit 1 + +datadir=data/${dirid} +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} +param_bnf_dir=param_bnf${unsup_string} + +if [ -z $ali_dir ] ; then + # If alignment directory is not done, use exp/tri6_nnet_ali as alignment + # directory + ali_dir=exp/tri6_nnet_ali +fi + +if [ ! -f $ali_dir/.done ]; then + echo "$0: Aligning supervised training data in exp/tri6_nnet_ali" + + [ ! -f $ali_model/final.mdl ] && echo -e "$ali_model/final.mdl not found!\nRun run-6-nnet.sh first!" && exit 1 + steps/nnet2/align.sh --cmd "$train_cmd " \ + --use-gpu no --transform-dir exp/tri5_ali --nj $train_nj \ + data/train data/langp/tri5_ali $ali_model $ali_dir || exit 1 + touch $ali_dir/.done +fi + +############################################################################### +# +# Semi-supervised BNF training +# +############################################################################### +mkdir -p $exp_dir/tri6_bnf +if [ ! -f $exp_dir/tri6_bnf/.done ]; then + if $semisupervised ; then + + [ ! -d $datadir ] && echo "Error: $datadir is not available!" && exit 1; + echo "$0: Generate examples using unsupervised data in $exp_dir/tri6_nnet" + if [ ! -f $exp_dir/tri6_bnf/egs/.done ]; then + local/nnet2/get_egs_semi_supervised.sh \ + --cmd "$train_cmd" \ + "${dnn_update_egs_opts[@]}" \ + --transform-dir-sup exp/tri5_ali \ + --transform-dir-unsup exp/tri5/decode_${dirid} \ + --weight-threshold $bnf_weight_threshold \ + data/train $datadir data/langp/tri5_ali/ \ + $ali_dir $weights_dir $exp_dir/tri6_bnf || exit 1; + touch $exp_dir/tri6_bnf/egs/.done + fi + + fi + + echo "$0: Train Bottleneck network" + steps/nnet2/train_tanh_bottleneck.sh \ + --stage $train_stage --num-jobs-nnet $bnf_num_jobs \ + --num-threads $bnf_num_threads --mix-up $bnf_mixup \ + --minibatch-size $bnf_minibatch_size \ + --initial-learning-rate $bnf_init_learning_rate \ + --final-learning-rate $bnf_final_learning_rate \ + --num-hidden-layers $bnf_num_hidden_layers \ + --bottleneck-dim $bottleneck_dim --hidden-layer-dim $bnf_hidden_layer_dim \ + --cmd "$train_cmd --mem 4G" $egs_string \ + "${dnn_gpu_parallel_opts[@]}" \ + data/train data/langp/tri5_ali/ $ali_dir $exp_dir/tri6_bnf || exit 1 + + touch $exp_dir/tri6_bnf/.done +fi + +[ ! -d $param_bnf_dir ] && mkdir -p $param_bnf_dir +if [ ! -f $data_bnf_dir/train_bnf/.done ]; then + mkdir -p $data_bnf_dir + # put the archives in ${param_bnf_dir}/. + steps/nnet2/dump_bottleneck_features.sh --nj $train_nj --cmd "$train_cmd" \ + --transform-dir exp/tri5 data/train $data_bnf_dir/train_bnf \ + $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf + touch $data_bnf_dir/train_bnf/.done +fi + +if [ ! $data_bnf_dir/train/.done -nt $data_bnf_dir/train_bnf/.done ]; then + steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd --max-jobs-run 10" \ + --nj $train_nj --transform-dir exp/tri5_ali $data_bnf_dir/train_sat data/train \ + exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir + + steps/append_feats.sh --cmd "$train_cmd" --nj 4 \ + $data_bnf_dir/train_bnf $data_bnf_dir/train_sat $data_bnf_dir/train \ + $exp_dir/append_feats/log $param_bnf_dir/ + steps/compute_cmvn_stats.sh --fake $data_bnf_dir/train \ + $exp_dir/make_fmllr_feats $param_bnf_dir + rm -r $data_bnf_dir/train_sat + + touch $data_bnf_dir/train/.done +fi + +if [ ! $exp_dir/tri5/.done -nt $data_bnf_dir/train/.done ]; then + steps/train_lda_mllt.sh --splice-opts "--left-context=1 --right-context=1" \ + --dim 60 --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesMLLT $numGaussMLLT $data_bnf_dir/train data/langp/tri5_ali/ exp/tri5_ali $exp_dir/tri5 ; + touch $exp_dir/tri5/.done +fi + +if [ ! $exp_dir/tri6/.done -nt $exp_dir/tri5/.done ]; then + steps/train_sat.sh --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesSAT $numGaussSAT $data_bnf_dir/train data/langp/tri5_ali \ + $exp_dir/tri5 $exp_dir/tri6 + touch $exp_dir/tri6/.done +fi + +echo --------------------------------------------------------------------- +echo "$0: next, run run-6-bnf-sgmm-semisupervised.sh" +echo --------------------------------------------------------------------- + +exit 0; diff --git a/egs/babel/s5d/run-3a-nnet-mpe.sh b/egs/babel/s5d/run-3a-nnet-mpe.sh new file mode 100755 index 00000000000..5271c58d816 --- /dev/null +++ b/egs/babel/s5d/run-3a-nnet-mpe.sh @@ -0,0 +1,54 @@ +#!/bin/bash + + +. conf/common_vars.sh +. ./lang.conf + +modeldir=exp/tri6_nnet + +. ./utils/parse_options.sh +set -e +set -o pipefail +set -u + +# Wait for cross-entropy training. +echo "Waiting till ${modeldir}/.done exists...." +while [ ! -f $modeldir/.done ]; do sleep 30; done +echo "...done waiting for ${modeldir}/.done" + +# Generate denominator lattices. +if [ ! -f exp/tri6_nnet_denlats/.done ]; then + steps/nnet2/make_denlats.sh --cmd "$decode_cmd" \ + --nj $train_nj --sub-split $train_nj \ + "${dnn_denlats_extra_opts[@]}" \ + --transform-dir exp/tri5_ali \ + data/train data/langp/tri5_ali ${modeldir} exp/tri6_nnet_denlats || exit 1 + + touch exp/tri6_nnet_denlats/.done +fi + +# Generate alignment. +if [ ! -f exp/tri6_nnet_ali/.done ]; then + steps/nnet2/align.sh --use-gpu yes \ + --cmd "$decode_cmd $dnn_parallel_opts" \ + --transform-dir exp/tri5_ali --nj $train_nj \ + data/train data/langp/tri5_ali ${modeldir} exp/tri6_nnet_ali || exit 1 + + touch exp/tri6_nnet_ali/.done +fi + +train_stage=-100 +if [ ! -f exp/tri6_nnet_mpe/.done ]; then + steps/nnet2/train_discriminative.sh \ + --stage $train_stage --cmd "$decode_cmd" \ + --learning-rate $dnn_mpe_learning_rate \ + --modify-learning-rates true \ + --last-layer-factor $dnn_mpe_last_layer_factor \ + --num-epochs 4 --cleanup true \ + --retroactive $dnn_mpe_retroactive \ + --transform-dir exp/tri5_ali \ + "${dnn_gpu_mpe_parallel_opts[@]}" data/train data/langp/tri5_ali/ \ + exp/tri6_nnet_ali exp/tri6_nnet_denlats ${modeldir}/final.mdl exp/tri6_nnet_mpe || exit 1 + + touch exp/tri6_nnet_mpe/.done +fi diff --git a/egs/babel/s5d/run-3b-bnf-nnet.sh b/egs/babel/s5d/run-3b-bnf-nnet.sh new file mode 100755 index 00000000000..169eec6f62f --- /dev/null +++ b/egs/babel/s5d/run-3b-bnf-nnet.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# Copyright 2014 Pegah Ghahremani +# 2014 Johns Hopkins (Yenda Trmal) + +# Apache 2.0 + +# This is really an alternative path to the BNF-SGMM, +# where we train a DNN instead of an SGMM. + + +. conf/common_vars.sh +. ./lang.conf +[ -f local.conf ] && . ./local.conf + +set -e +set -o pipefail +set -u + +semisupervised=true +dnn_train_stage=-100 +unsup_string= + +. ./utils/parse_options.sh + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + +if [ -z "$unsup_string" ]; then + if $semisupervised ; then + unsup_string="_semisup" + else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + fi +fi +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} + +if [ ! -e $exp_dir/tri6/.done ]; then + echo "$0: $exp_dir/tri6/.done does not exist" + echo "$0: this script needs to be run _AFTER_ the script run-2b-bnf.sh" + echo "$0: with the appropriate parameters -- mostly the same to the parameters" + echo "$0: of this script" + exit 1 +fi + +# We create an alignment with a lot of jobs, because the LDA accumulation +# when training the pnorm network will be slow, due to the large dimension. +if [ ! $exp_dir/tri6_ali_50/.done -nt $exp_dir/tri6/.done ]; then + echo --------------------------------------------------------------------- + echo "Aligning fMLLR system with 50 jobs" + echo --------------------------------------------------------------------- + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + $data_bnf_dir/train data/lang $exp_dir/tri6 $exp_dir/tri6_ali_50 + touch $exp_dir/tri6_ali_50/.done +fi + + +if [ ! $exp_dir/tri7_nnet/.done -nt $exp_dir/tri6_ali_50/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting hybrid system building (over bottleneck features)" + echo --------------------------------------------------------------------- + steps/nnet2/train_pnorm.sh \ + --stage $dnn_train_stage --mix-up $dnn_mixup \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_input_dim \ + --pnorm-output-dim $dnn_output_dim \ + --egs-opts "--feat-type raw" --lda-opts "--feat-type raw --lda-dim $dnn_output_dim" --splice-width 5 \ + "${dnn_gpu_parallel_opts[@]}" --cmd "$train_cmd" \ + $data_bnf_dir/train data/lang $exp_dir/tri6_ali_50 $exp_dir/tri7_nnet || exit 1 + + touch $exp_dir/tri7_nnet/.done +fi + + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo "To decode a data-set, use run-4b-anydecode-bnf.sh" +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-3b-bnf-sgmm.sh b/egs/babel/s5d/run-3b-bnf-sgmm.sh new file mode 100755 index 00000000000..341ea83565f --- /dev/null +++ b/egs/babel/s5d/run-3b-bnf-sgmm.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# Copyright 2014 Pegah Ghahremani +# 2014 Johns Hopkins (Yenda Trmal) + +# Apache 2.0 + +# This script builds the SGMM system on top of the kaldi internal bottleneck features. +# It comes after run-6-bnf-semisupervised.sh. + + +. conf/common_vars.sh +. ./lang.conf +[ -f local.conf ] && . ./local.conf + +set -e +set -o pipefail +set -u +semisupervised=true +unsup_string= + +. ./utils/parse_options.sh + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + +if [ -z "$unsup_string" ]; then + if $semisupervised ; then + unsup_string="_semisup" + else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + fi +fi +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} +param_bnf_dir=param_bnf${unsup_string} + +echo --------------------------------------------------------------------- +echo "Starting $exp_dir/ubm7 on" `date` +echo --------------------------------------------------------------------- +if [ ! $exp_dir/ubm7/.done -nt $exp_dir/tri6/.done ]; then + steps/train_ubm.sh --cmd "$train_cmd" \ + $bnf_num_gauss_ubm $data_bnf_dir/train data/lang $exp_dir/tri6 $exp_dir/ubm7 + touch $exp_dir/ubm7/.done +fi + +if [ ! $exp_dir/sgmm7/.done -nt $exp_dir/ubm7/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting $exp_dir/sgmm7 on" `date` + echo --------------------------------------------------------------------- + #steps/train_sgmm2_group.sh \ + steps/train_sgmm2.sh \ + --cmd "$train_cmd" "${sgmm_train_extra_opts[@]}"\ + $numLeavesSGMM $bnf_num_gauss_sgmm $data_bnf_dir/train data/lang \ + $exp_dir/tri6 $exp_dir/ubm7/final.ubm $exp_dir/sgmm7 + touch $exp_dir/sgmm7/.done +fi + +if [ ! $exp_dir/sgmm7_ali/.done -nt $exp_dir/sgmm7/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting $exp_dir/sgmm7_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_sgmm2.sh \ + --nj $train_nj --cmd "$train_cmd" --transform-dir $exp_dir/tri6 --use-graphs true \ + $data_bnf_dir/train data/lang $exp_dir/sgmm7 $exp_dir/sgmm7_ali + touch $exp_dir/sgmm7_ali/.done +fi + +if [ ! $exp_dir/sgmm7_denlats/.done -nt $exp_dir/sgmm7/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting $exp_dir/sgmm5_denlats on" `date` + echo --------------------------------------------------------------------- + steps/make_denlats_sgmm2.sh --cmd "$train_cmd" \ + --nj $train_nj --sub-split $train_nj "${sgmm_denlats_extra_opts[@]}" \ + --transform-dir $exp_dir/tri6 --beam 10.0 --acwt 0.06 --lattice-beam 6 \ + $data_bnf_dir/train data/lang $exp_dir/sgmm7_ali $exp_dir/sgmm7_denlats + touch $exp_dir/sgmm7_denlats/.done +fi + +if [ ! $exp_dir/sgmm7_mmi_b0.1/.done -nt $exp_dir/sgmm7_denlats/.done ]; then + steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" --acwt 0.06 \ + --transform-dir $exp_dir/tri6 --boost 0.1 --drop-frames true \ + $data_bnf_dir/train data/lang $exp_dir/sgmm7_ali $exp_dir/sgmm7_denlats \ + $exp_dir/sgmm7_mmi_b0.1 + touch $exp_dir/sgmm7_mmi_b0.1/.done; +fi + + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo "To decode a data-set, use run-4b-anydecode-bnf.sh" +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-4-anydecode.sh b/egs/babel/s5d/run-4-anydecode.sh new file mode 100755 index 00000000000..8ac0fde2621 --- /dev/null +++ b/egs/babel/s5d/run-4-anydecode.sh @@ -0,0 +1,750 @@ +#!/bin/bash +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.pem +kind= +data_only=false +fast_path=true +skip_kws=false +skip_stt=false +skip_scoring= +extra_kws=true +vocab_kws=false +tri5_only=false +wip=0.5 + +nnet3_model=nnet3/tdnn_sp +chain_model= +parent_dir_suffix=_cleaned +is_rnn=false +extra_left_context=40 +extra_right_context=40 +frames_per_chunk=20 + +echo "$0 $@" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h|dev2h|eval|shadow)" + exit 1 +fi + +echo "Dir: $dir" + +#This seems to be the only functioning way how to ensure the comple +#set of scripts will exit when sourcing several of them together +#Otherwise, the CTRL-C just terminates the deepest sourced script ? +# Let shell functions inherit ERR trap. Same as `set -E'. +set -o errtrace +trap "echo Exited!; exit;" SIGINT SIGTERM + +./local/check_tools.sh || exit 1 + +# Set proxy search parameters for the extended lexicon case. +if [ -f data/.extlex ]; then + proxy_phone_beam=$extlex_proxy_phone_beam + proxy_phone_nbest=$extlex_proxy_phone_nbest + proxy_beam=$extlex_proxy_beam + proxy_nbest=$extlex_proxy_nbest +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || \ + [ "$dataset_type" == "dev10h" ] || \ + [ "$dataset_type" == "train" ]; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ -z "${skip_scoring}" ] ; then + if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true + else + skip_scoring=false + fi +fi + +#The $dataset_type value will be the dataset name without any extrension +eval my_data_dir=( "\${${dataset_type}_data_dir[@]}" ) +eval my_data_list=( "\${${dataset_type}_data_list[@]}" ) +if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then + echo "Error: The dir you specified ($dataset_id) does not have existing config"; + exit 1 +fi + +if [ "$dataset_type" == "train" ] ; then + local/ali_to_rttm.sh --cmd "$decode_cmd" data/train data/langp_test exp/tri5_ali + bash -x local/qbe/wav_to_ecf.sh data/train/wav.scp > data/train/ecf.train.xml + train_rttm_file=./exp/tri5_ali/rttm + train_ecf_file=./data/train/ecf.train.xml +fi + + +eval my_stm_file=\$${dataset_type}_stm_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_rttm_file=\$${dataset_type}_rttm_file +eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate + +echo "my_stm_file=$my_stm_file" +echo "my_ecf_file=$my_ecf_file" +echo "my_rttm_file=$my_rttm_file" +echo "my_nj=$my_nj" + +if [ -z "$my_nj" ]; then + echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." + exit 1 +fi + +my_subset_ecf=false +eval ind=\${${dataset_type}_subset_ecf+x} +if [ "$ind" == "x" ] ; then + eval my_subset_ecf=\$${dataset_type}_subset_ecf +fi + +declare -A my_kwlists=() +eval my_kwlists_keys="\${!${dataset_type}_kwlists[@]}" +for key in $my_kwlists_keys # make sure you include the quotes there +do + eval my_kwlists_val="\${${dataset_type}_kwlists[$key]}" + #index=`echo $my_kwlists_val | sed 's/.*\.\([^.][^.]*\)\.xml/\1/g'` + index=$key + + my_kwlists["$index"]="${my_kwlists_val}" +done +declare -p my_kwlists +export my_kwlists + +#Just a minor safety precaution to prevent using incorrect settings +#The dataset_* variables should be used. +set -e +set -o pipefail +set -u +unset dir +unset kind + +function make_plp { + target=$1 + logdir=$2 + output=$3 + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + else + steps/make_plp.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + fi + utils/fix_data_dir.sh $target + steps/compute_cmvn_stats.sh $target $logdir $output + utils/fix_data_dir.sh $target +} + +function check_variables_are_set { + for variable in $mandatory_variables ; do + if ! declare -p $variable ; then + echo "Mandatory variable ${variable/my/$dataset_type} is not set! " + echo "You should probably set the variable in the config file " + exit 1 + else + declare -p $variable + fi + done + + if [ ! -z ${optional_variables+x} ] ; then + for variable in $optional_variables ; do + eval my_variable=\$${variable} + echo "$variable=$my_variable" + done + fi +} + +if [ ! -f data/raw_${dataset_type}_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the ${dataset_type} set" + echo --------------------------------------------------------------------- + + l1=${#my_data_dir[*]} + l2=${#my_data_list[*]} + if [ "$l1" -ne "$l2" ]; then + echo "Error, the number of source files lists is not the same as the number of source dirs!" + exit 1 + fi + + resource_string="" + if [ "$dataset_kind" == "unsupervised" ]; then + resource_string+=" --ignore-missing-txt true" + fi + + for i in `seq 0 $(($l1 - 1))`; do + resource_string+=" ${my_data_dir[$i]} " + resource_string+=" ${my_data_list[$i]} " + done + local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data + touch data/raw_${dataset_type}_data/.done +fi +my_data_dir=`readlink -f ./data/raw_${dataset_type}_data` +[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list +nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l` + +if [ "$nj_max" -lt "$my_nj" ] ; then + echo "Number of jobs ($my_nj) is too big!" + echo "The maximum reasonable number of jobs is $nj_max" + my_nj=$nj_max +fi + +##################################################################### +# +# Audio data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +if [ ! -f $dataset_dir/.done ] ; then + if [ "$dataset_kind" == "supervised" ]; then + if [ "$dataset_segments" == "seg" ]; then + . ./local/datasets/supervised_seg.sh || exit 1 + elif [ "$dataset_segments" == "uem" ]; then + . ./local/datasets/supervised_uem.sh || exit 1 + elif [ "$dataset_segments" == "train" ] ||\ + [ "$dataset_segments" == "pem" ]; then + . ./local/datasets/supervised_pem.sh || exit 1 + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + elif [ "$dataset_kind" == "unsupervised" ] ; then + if [ "$dataset_segments" == "seg" ] ; then + . ./local/datasets/unsupervised_seg.sh + elif [ "$dataset_segments" == "uem" ] ; then + . ./local/datasets/unsupervised_uem.sh + elif [ "$dataset_segments" == "pem" ] ; then + ##This combination does not really makes sense, + ##Because the PEM is that we get the segmentation + ##and because of the format of the segment files + ##the transcript as well + echo "ERROR: $dataset_segments combined with $dataset_type" + echo "does not really make any sense!" + exit 1 + #. ./local/datasets/unsupervised_pem.sh + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + else + echo "Unknown kind of the dataset: \"$dataset_kind\"!"; + echo "Valid dataset kinds are: supervised, unsupervised, shadow"; + exit 1 + fi + + if [ ! -f ${dataset_dir}/.plp.done ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} parametrization files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp + touch ${dataset_dir}/.plp.done + fi + touch $dataset_dir/.done +fi + +if [ ! -f ${dataset_dir}_hires/.mfcc.done ]; then + dataset=$(basename $dataset_dir) + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} MFCC features in ${dataset_dir}_hires and corresponding iVectors in exp/nnet3/ivectors_$dataset on" `date` + echo --------------------------------------------------------------------- + if [ ! -d ${dataset_dir}_hires ]; then + utils/copy_data_dir.sh data/$dataset data/${dataset}_hires + fi + + mfccdir=mfcc_hires + steps/make_mfcc.sh --nj $my_nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" ${dataset_dir}_hires exp/make_hires/$dataset $mfccdir; + steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; + utils/fix_data_dir.sh ${dataset_dir}_hires; + touch ${dataset_dir}_hires/.mfcc.done + + touch ${dataset_dir}_hires/.done +fi + +if [ -f exp/nnet3/extractor/final.ie ] && \ + [ ! -f exp/nnet3/ivectors_$(basename $dataset_dir)/.done ] ; then + dataset=$(basename $dataset_dir) + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $my_nj \ + ${dataset_dir}_hires exp/nnet3/extractor exp/nnet3/ivectors_$dataset || exit 1; + + touch exp/nnet3/ivectors_$dataset/.done +fi + +##################################################################### +# +# KWS data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing kws data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +lang=data/lang +if [ ! -f data/dev10h.pem/.done.kws.dev ] ; then + if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi + if [ ! -f data/lang.phn/G.fst ] ; then + ./local/syllab/run_phones.sh --stage -2 ${dataset_dir} + else + ./local/syllab/run_phones.sh ${dataset_dir} + fi + + if [ ! -f data/lang.syll/G.fst ] ; then + ./local/syllab/run_syllabs.sh --stage -2 ${dataset_dir} + else + ./local/syllab/run_syllabs.sh ${dataset_dir} + fi + + ./local/search/run_search.sh --dir ${dataset_dir##*/} + ./local/search/run_phn_search.sh --dir ${dataset_dir##*/} + ./local/search/run_syll_search.sh --dir ${dataset_dir##*/} + fi +fi + +if $data_only ; then + echo "Exiting, as data-only was requested..." + exit 0; +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +if [ ! -f data/langp_test/.done ]; then + cp -R data/langp/tri5_ali/ data/langp_test + cp data/lang/G.fst data/langp_test + touch data/langp_test/.done +fi + +if [ ! -L ./data/langp_test.syll ]; then + ln -s lang.syll data/langp_test.syll +fi +if [ ! -L ./data/langp_test.phn ]; then + ln -s lang.phn data/langp_test.phn +fi + + +decode=exp/tri5/decode_${dataset_id} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning decoding with SAT models on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test exp/tri5 exp/tri5/graph |tee exp/tri5/mkgraph.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + exp/tri5/graph ${dataset_dir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test ${decode} + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test ${decode}.si +fi + +if $tri5_only; then + echo "--tri5-only is true. So exiting." + exit 0 +fi + + +#################################################################### +## +## DNN ("compatibility") decoding -- also, just decode the "default" net +## +#################################################################### +if [ -f exp/tri6_nnet/.done ]; then + decode=exp/tri6_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +#################################################################### +## +## nnet3 model decoding +## +#################################################################### +if [ -f exp/nnet3/lstm_bidirectional_sp/final.mdl ]; then + decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id} + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/decode.sh + my_nj_backup=$my_nj + echo "Modifying the number of jobs as this is an RNN and decoding can be extremely slow." + my_nj=`cat ${dataset_dir}_hires/spk2utt|wc -l` + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + + my_nj=$my_nj_backup +fi + +if [ -f exp/nnet3/lstm_realigned_bidirectional_sp/final.mdl ]; then + decode=exp/nnet3/lstm_realigned_bidirectional_sp//decode_${dataset_id} + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi +if [ -f exp/nnet3/lstm_sp/final.mdl ]; then + decode=exp/nnet3/lstm_sp/decode_${dataset_id} + rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " + decode_script=steps/nnet3/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +if [ -f exp/$nnet3_model/final.mdl ]; then + decode=exp/$nnet3_model/decode_${dataset_id} + rnn_opts= + decode_script=steps/nnet3/decode.sh + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +#################################################################### +## +## chain model decoding +## +#################################################################### +if [ -f exp/$chain_model/final.mdl ]; then + dir=exp/$chain_model + + decode=$dir/decode_${dataset_id} + decode_script=steps/nnet3/decode.sh + + if [ ! -f exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/.done ] ; then + steps/online/nnet2/extract_ivectors_online.sh --cmd "$decode_cmd" --nj $my_nj \ + ${dataset_dir}_hires exp/nnet3$parent_dir_suffix/extractor exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/ || exit 1; + touch exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/.done + fi + + my_nj_backup=$my_nj + rnn_opts= + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + echo "Modifying the number of jobs as this is an RNN and decoding can be extremely slow." + my_nj=`cat ${dataset_dir}_hires/spk2utt|wc -l` + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + echo "Modifying the number of jobs as this is an RNN and decoding can be extremely slow." + my_nj=`cat ${dataset_dir}_hires/spk2utt|wc -l` + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3$parent_dir_suffix/ivectors_${dataset_id} \ + $dir/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_chain_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + my_nj=$my_nj_backup +else + echo "no chain model exp/$chain_model" +fi + +#################################################################### +## +## DNN (nextgen DNN) decoding +## +#################################################################### +if [ -f exp/tri6a_nnet/.done ]; then + decode=exp/tri6a_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + + +#################################################################### +## +## DNN (ensemble) decoding +## +#################################################################### +if [ -f exp/tri6b_nnet/.done ]; then + decode=exp/tri6b_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi +#################################################################### +## +## DNN_MPE decoding +## +#################################################################### +if [ -f exp/tri6_nnet_mpe/.done ]; then + for epoch in 1 2 3 4; do + decode=exp/tri6_nnet_mpe/decode_${dataset_id}_epoch$epoch + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --minimize $minimize \ + --cmd "$decode_cmd" --nj $my_nj --iter epoch$epoch \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + done +fi + +#################################################################### +## +## DNN semi-supervised training decoding +## +#################################################################### +for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ + tri6_nnet_supervised_tuning tri6_nnet_supervised_tuning2 ; do + if [ -f exp/$dnn/.done ]; then + decode=exp/$dnn/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + fi +done + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test exp/sgmm5 exp/sgmm5/graph |tee exp/sgmm5/mkgraph.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ -x exp/sgmm5_mmi_b0.1 ] && [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ -f $decode/.done ]; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + fi + done +fi + + +echo "Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-4-phn-anydecode.sh b/egs/babel/s5d/run-4-phn-anydecode.sh new file mode 100755 index 00000000000..054a4665529 --- /dev/null +++ b/egs/babel/s5d/run-4-phn-anydecode.sh @@ -0,0 +1,613 @@ +#!/bin/bash +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.phn.pem +kind= +data_only=false +fast_path=true +skip_kws=false +skip_stt=false +skip_scoring= +extra_kws=true +vocab_kws=false +tri5_only=false +wip=0.5 + +nnet3_model=nnet3/tdnn_sp +is_rnn=false +extra_left_context=0 +extra_right_context=0 +frames_per_chunk=0 + +echo $0 "$@" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h.phn|dev2h.phn|eval.phn|shadow.phn)" + exit 1 +fi + +#This seems to be the only functioning way how to ensure the comple +#set of scripts will exit when sourcing several of them together +#Otherwise, the CTRL-C just terminates the deepest sourced script ? +# Let shell functions inherit ERR trap. Same as `set -E'. +set -o errtrace +trap "echo Exited!; exit;" SIGINT SIGTERM + +./local/check_tools.sh || exit 1 + +# Set proxy search parameters for the extended lexicon case. +if [ -f data/.extlex ]; then + proxy_phone_beam=$extlex_proxy_phone_beam + proxy_phone_nbest=$extlex_proxy_phone_nbest + proxy_beam=$extlex_proxy_beam + proxy_nbest=$extlex_proxy_nbest +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.phn.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ -z "${skip_scoring}" ] ; then + if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true + else + skip_scoring=false + fi +fi + +#The $dataset_type value will be the dataset name without any extrension +eval my_data_dir=( "\${${dataset_type}_data_dir[@]}" ) +eval my_data_list=( "\${${dataset_type}_data_list[@]}" ) +if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then + echo "Error: The dir you specified ($dataset_id) does not have existing config"; + exit 1 +fi + +eval my_stm_file=\$${dataset_type}_stm_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_rttm_file=\$${dataset_type}_rttm_file +eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate + +if [ -z "$my_nj" ]; then + echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." + exit 1 +fi +my_nj=$(($my_nj * 2)) + +my_subset_ecf=false +eval ind=\${${dataset_type}_subset_ecf+x} +if [ "$ind" == "x" ] ; then + eval my_subset_ecf=\$${dataset_type}_subset_ecf +fi + +declare -A my_kwlists=() +eval my_kwlists_keys="\${!${dataset_type}_kwlists[@]}" +for key in $my_kwlists_keys # make sure you include the quotes there +do + eval my_kwlists_val="\${${dataset_type}_kwlists[$key]}" + my_kwlists["$key"]="${my_kwlists_val}" +done +declare -p my_kwlists +export my_kwlists + +#Just a minor safety precaution to prevent using incorrect settings +#The dataset_* variables should be used. +set -e +set -o pipefail +set -u +unset dir +unset kind + +function make_plp { + target=$1 + logdir=$2 + output=$3 + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + else + steps/make_plp.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + fi + utils/fix_data_dir.sh $target + steps/compute_cmvn_stats.sh $target $logdir $output + utils/fix_data_dir.sh $target +} + +function check_variables_are_set { + for variable in $mandatory_variables ; do + if ! declare -p $variable ; then + echo "Mandatory variable ${variable/my/$dataset_type} is not set! " + echo "You should probably set the variable in the config file " + exit 1 + else + declare -p $variable + fi + done + + if [ ! -z ${optional_variables+x} ] ; then + for variable in $optional_variables ; do + eval my_variable=\$${variable} + echo "$variable=$my_variable" + done + fi +} + +if [ ! -f data/raw_${dataset_type}_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the ${dataset_type} set" + echo --------------------------------------------------------------------- + + l1=${#my_data_dir[*]} + l2=${#my_data_list[*]} + if [ "$l1" -ne "$l2" ]; then + echo "Error, the number of source files lists is not the same as the number of source dirs!" + exit 1 + fi + + resource_string="" + if [ "$dataset_kind" == "unsupervised" ]; then + resource_string+=" --ignore-missing-txt true" + fi + + for i in `seq 0 $(($l1 - 1))`; do + resource_string+=" ${my_data_dir[$i]} " + resource_string+=" ${my_data_list[$i]} " + done + local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data + touch data/raw_${dataset_type}_data/.done +fi +my_data_dir=`readlink -f ./data/raw_${dataset_type}_data` +[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list +nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l` + +if [ "$nj_max" -lt "$my_nj" ] ; then + echo "Number of jobs ($my_nj) is too big!" + echo "The maximum reasonable number of jobs is $nj_max" + my_nj=$nj_max +fi + +##################################################################### +# +# Audio data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +if [ ! -f $dataset_dir/.done ] ; then + if [ "$dataset_kind" == "supervised" ]; then + if [ "$dataset_segments" == "seg" ]; then + . ./local/datasets/supervised_seg.sh || exit 1 + elif [ "$dataset_segments" == "uem" ]; then + . ./local/datasets/supervised_uem.sh || exit 1 + elif [ "$dataset_segments" == "pem" ]; then + . ./local/datasets/supervised_pem.sh || exit 1 + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + elif [ "$dataset_kind" == "unsupervised" ] ; then + if [ "$dataset_segments" == "seg" ] ; then + . ./local/datasets/unsupervised_seg.sh + elif [ "$dataset_segments" == "uem" ] ; then + . ./local/datasets/unsupervised_uem.sh + elif [ "$dataset_segments" == "pem" ] ; then + ##This combination does not really makes sense, + ##Because the PEM is that we get the segmentation + ##and because of the format of the segment files + ##the transcript as well + echo "ERROR: $dataset_segments combined with $dataset_type" + echo "does not really make any sense!" + exit 1 + #. ./local/datasets/unsupervised_pem.sh + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + else + echo "Unknown kind of the dataset: \"$dataset_kind\"!"; + echo "Valid dataset kinds are: supervised, unsupervised, shadow"; + exit 1 + fi + + if [ ! -f ${dataset_dir}/.plp.done ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} parametrization files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp + touch ${dataset_dir}/.plp.done + fi + touch $dataset_dir/.done +fi + +if [ -f exp/nnet3/extractor/final.ie ] && [ ! -f ${dataset_dir}_hires/.mfcc.done ]; then + dataset=$(basename $dataset_dir) + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} MFCC features in ${dataset_dir}_hires and corresponding iVectors in exp/nnet3/ivectors_$dataset on" `date` + echo --------------------------------------------------------------------- + if [ ! -d ${dataset_dir}_hires ]; then + utils/copy_data_dir.sh data/${dataset_type}.${dataset_segments}_hires data/${dataset}_hires + fi + ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true + touch ${dataset_dir}_hires/.done +fi +set -x +ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true +set +x + +##################################################################### +# +# KWS data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing kws data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +lang=data/lang.phn +if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/dict.phn/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi +fi + +if $data_only ; then + echo "Exiting, as data-only was requested..." + exit 0; +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +if [ ! -f data/langp_test.phn//.done ]; then + ln -sf lang.phn data/langp_test.phn || true + touch data/langp_test.phn/.done +fi + +decode=exp/tri5/decode_${dataset_id} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning decoding with SAT models on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.phn exp/tri5 exp/tri5/graph.phn |tee exp/tri5/mkgraph.phn.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + exp/tri5/graph.phn ${dataset_dir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn ${decode} + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn ${decode}.si +fi + +if $tri5_only; then + echo "--tri5-only is true. So exiting." + exit 0 +fi + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.phn exp/sgmm5 exp/sgmm5/graph.phn |tee exp/sgmm5/mkgraph.phn.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph.phn ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test.phn ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode + done +fi + + + +#################################################################### +## +## DNN ("compatibility") decoding -- also, just decode the "default" net +## +#################################################################### +if [ -f exp/tri6_nnet/.done ]; then + decode=exp/tri6_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + +#################################################################### +## +## nnet3 model decoding +## +#################################################################### +if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then + decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id}.phn + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + +if [ -f exp/nnet3/lstm_sp/.done ]; then + decode=exp/nnet3/lstm_sp/decode_${dataset_id}.phn + rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + +if [ -f exp/$nnet3_model/.done ]; then + decode=exp/$nnet3_model/decode_${dataset_id}.phn + rnn_opts= + decode_script=steps/nnet3/decode.sh + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + decode_script=steps/nnet3/lstm/decode.sh + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + + +#################################################################### +## +## DNN (nextgen DNN) decoding +## +#################################################################### +if [ -f exp/tri6a_nnet/.done ]; then + decode=exp/tri6a_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + + +#################################################################### +## +## DNN (ensemble) decoding +## +#################################################################### +if [ -f exp/tri6b_nnet/.done ]; then + decode=exp/tri6b_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi +#################################################################### +## +## DNN_MPE decoding +## +#################################################################### +if [ -f exp/tri6_nnet_mpe/.done ]; then + for epoch in 1 2 3 4; do + decode=exp/tri6_nnet_mpe/decode_${dataset_id}_epoch$epoch + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --minimize $minimize \ + --cmd "$decode_cmd" --nj $my_nj --iter epoch$epoch \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode + done +fi + +#################################################################### +## +## DNN semi-supervised training decoding +## +#################################################################### +for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ + tri6_nnet_supervised_tuning tri6_nnet_supervised_tuning2 ; do + if [ -f exp/$dnn/.done ]; then + decode=exp/$dnn/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode + fi +done +echo "Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-4-syll-anydecode.sh b/egs/babel/s5d/run-4-syll-anydecode.sh new file mode 100755 index 00000000000..e69b168cf49 --- /dev/null +++ b/egs/babel/s5d/run-4-syll-anydecode.sh @@ -0,0 +1,613 @@ +#!/bin/bash +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.syll.pem +kind= +data_only=false +fast_path=true +skip_kws=false +skip_stt=false +skip_scoring= +extra_kws=true +vocab_kws=false +tri5_only=false +wip=0.5 + +nnet3_model=nnet3/tdnn_sp +is_rnn=false +extra_left_context=0 +extra_right_context=0 +frames_per_chunk=0 + +echo $0 "$@" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h.syll|dev2h.syll|eval.syll|shadow.syll)" + exit 1 +fi + +#This seems to be the only functioning way how to ensure the comple +#set of scripts will exit when sourcing several of them together +#Otherwise, the CTRL-C just terminates the deepest sourced script ? +# Let shell functions inherit ERR trap. Same as `set -E'. +set -o errtrace +trap "echo Exited!; exit;" SIGINT SIGTERM + +./local/check_tools.sh || exit 1 + +# Set proxy search parameters for the extended lexicon case. +if [ -f data/.extlex ]; then + proxy_phone_beam=$extlex_proxy_phone_beam + proxy_phone_nbest=$extlex_proxy_phone_nbest + proxy_beam=$extlex_proxy_beam + proxy_nbest=$extlex_proxy_nbest +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.syll.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ -z "${skip_scoring}" ] ; then + if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true + else + skip_scoring=false + fi +fi + +#The $dataset_type value will be the dataset name without any extrension +eval my_data_dir=( "\${${dataset_type}_data_dir[@]}" ) +eval my_data_list=( "\${${dataset_type}_data_list[@]}" ) +if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then + echo "Error: The dir you specified ($dataset_id) does not have existing config"; + exit 1 +fi + +eval my_stm_file=\$${dataset_type}_stm_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_rttm_file=\$${dataset_type}_rttm_file +eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate + +if [ -z "$my_nj" ]; then + echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." + exit 1 +fi +my_nj=$(($my_nj * 2)) + +my_subset_ecf=false +eval ind=\${${dataset_type}_subset_ecf+x} +if [ "$ind" == "x" ] ; then + eval my_subset_ecf=\$${dataset_type}_subset_ecf +fi + +declare -A my_kwlists=() +eval my_kwlists_keys="\${!${dataset_type}_kwlists[@]}" +for key in $my_kwlists_keys # make sure you include the quotes there +do + eval my_kwlists_val="\${${dataset_type}_kwlists[$key]}" + my_kwlists["$key"]="${my_kwlists_val}" +done +declare -p my_kwlists +export my_kwlists + +#Just a minor safety precaution to prevent using incorrect settings +#The dataset_* variables should be used. +set -e +set -o pipefail +set -u +unset dir +unset kind + +function make_plp { + target=$1 + logdir=$2 + output=$3 + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + else + steps/make_plp.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + fi + utils/fix_data_dir.sh $target + steps/compute_cmvn_stats.sh $target $logdir $output + utils/fix_data_dir.sh $target +} + +function check_variables_are_set { + for variable in $mandatory_variables ; do + if ! declare -p $variable ; then + echo "Mandatory variable ${variable/my/$dataset_type} is not set! " + echo "You should probably set the variable in the config file " + exit 1 + else + declare -p $variable + fi + done + + if [ ! -z ${optional_variables+x} ] ; then + for variable in $optional_variables ; do + eval my_variable=\$${variable} + echo "$variable=$my_variable" + done + fi +} + +if [ ! -f data/raw_${dataset_type}_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the ${dataset_type} set" + echo --------------------------------------------------------------------- + + l1=${#my_data_dir[*]} + l2=${#my_data_list[*]} + if [ "$l1" -ne "$l2" ]; then + echo "Error, the number of source files lists is not the same as the number of source dirs!" + exit 1 + fi + + resource_string="" + if [ "$dataset_kind" == "unsupervised" ]; then + resource_string+=" --ignore-missing-txt true" + fi + + for i in `seq 0 $(($l1 - 1))`; do + resource_string+=" ${my_data_dir[$i]} " + resource_string+=" ${my_data_list[$i]} " + done + local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data + touch data/raw_${dataset_type}_data/.done +fi +my_data_dir=`readlink -f ./data/raw_${dataset_type}_data` +[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list +nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l` + +if [ "$nj_max" -lt "$my_nj" ] ; then + echo "Number of jobs ($my_nj) is too big!" + echo "The maximum reasonable number of jobs is $nj_max" + my_nj=$nj_max +fi + +##################################################################### +# +# Audio data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +if [ ! -f $dataset_dir/.done ] ; then + if [ "$dataset_kind" == "supervised" ]; then + if [ "$dataset_segments" == "seg" ]; then + . ./local/datasets/supervised_seg.sh || exit 1 + elif [ "$dataset_segments" == "uem" ]; then + . ./local/datasets/supervised_uem.sh || exit 1 + elif [ "$dataset_segments" == "pem" ]; then + . ./local/datasets/supervised_pem.sh || exit 1 + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + elif [ "$dataset_kind" == "unsupervised" ] ; then + if [ "$dataset_segments" == "seg" ] ; then + . ./local/datasets/unsupervised_seg.sh + elif [ "$dataset_segments" == "uem" ] ; then + . ./local/datasets/unsupervised_uem.sh + elif [ "$dataset_segments" == "pem" ] ; then + ##This combination does not really makes sense, + ##Because the PEM is that we get the segmentation + ##and because of the format of the segment files + ##the transcript as well + echo "ERROR: $dataset_segments combined with $dataset_type" + echo "does not really make any sense!" + exit 1 + #. ./local/datasets/unsupervised_pem.sh + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + else + echo "Unknown kind of the dataset: \"$dataset_kind\"!"; + echo "Valid dataset kinds are: supervised, unsupervised, shadow"; + exit 1 + fi + + if [ ! -f ${dataset_dir}/.plp.done ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} parametrization files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp + touch ${dataset_dir}/.plp.done + fi + touch $dataset_dir/.done +fi + +if [ -f exp/nnet3/extractor/final.ie ] && [ ! -f ${dataset_dir}_hires/.mfcc.done ]; then + dataset=$(basename $dataset_dir) + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} MFCC features in ${dataset_dir}_hires and corresponding iVectors in exp/nnet3/ivectors_$dataset on" `date` + echo --------------------------------------------------------------------- + if [ ! -d ${dataset_dir}_hires ]; then + utils/copy_data_dir.sh data/${dataset_type}.${dataset_segments}_hires data/${dataset}_hires + fi + ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true + touch ${dataset_dir}_hires/.done +fi +set -x +ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true +set +x + +##################################################################### +# +# KWS data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing kws data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +lang=data/lang.syll +if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/dict.syll/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi +fi + +if $data_only ; then + echo "Exiting, as data-only was requested..." + exit 0; +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +if [ ! -f data/langp_test.syll//.done ]; then + ln -sf lang.syll data/langp_test.syll || true + touch data/langp_test.syll/.done +fi + +decode=exp/tri5/decode_${dataset_id} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning decoding with SAT models on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.syll exp/tri5 exp/tri5/graph.syll |tee exp/tri5/mkgraph.syll.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + exp/tri5/graph.syll ${dataset_dir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll ${decode} + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll ${decode}.si +fi + +if $tri5_only; then + echo "--tri5-only is true. So exiting." + exit 0 +fi + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.syll exp/sgmm5 exp/sgmm5/graph.syll |tee exp/sgmm5/mkgraph.syll.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph.syll ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test.syll ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode + done +fi + + + +#################################################################### +## +## DNN ("compatibility") decoding -- also, just decode the "default" net +## +#################################################################### +if [ -f exp/tri6_nnet/.done ]; then + decode=exp/tri6_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + +#################################################################### +## +## nnet3 model decoding +## +#################################################################### +if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then + decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id}.syll + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + +if [ -f exp/nnet3/lstm_sp/.done ]; then + decode=exp/nnet3/lstm_sp/decode_${dataset_id}.syll + rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + +if [ -f exp/$nnet3_model/.done ]; then + decode=exp/$nnet3_model/decode_${dataset_id}.syll + rnn_opts= + decode_script=steps/nnet3/decode.sh + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + decode_script=steps/nnet3/lstm/decode.sh + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + + +#################################################################### +## +## DNN (nextgen DNN) decoding +## +#################################################################### +if [ -f exp/tri6a_nnet/.done ]; then + decode=exp/tri6a_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + + +#################################################################### +## +## DNN (ensemble) decoding +## +#################################################################### +if [ -f exp/tri6b_nnet/.done ]; then + decode=exp/tri6b_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi +#################################################################### +## +## DNN_MPE decoding +## +#################################################################### +if [ -f exp/tri6_nnet_mpe/.done ]; then + for epoch in 1 2 3 4; do + decode=exp/tri6_nnet_mpe/decode_${dataset_id}_epoch$epoch + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --minimize $minimize \ + --cmd "$decode_cmd" --nj $my_nj --iter epoch$epoch \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode + done +fi + +#################################################################### +## +## DNN semi-supervised training decoding +## +#################################################################### +for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ + tri6_nnet_supervised_tuning tri6_nnet_supervised_tuning2 ; do + if [ -f exp/$dnn/.done ]; then + decode=exp/$dnn/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode + fi +done +echo "Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-4b-anydecode-bnf.sh b/egs/babel/s5d/run-4b-anydecode-bnf.sh new file mode 100755 index 00000000000..8298021feb3 --- /dev/null +++ b/egs/babel/s5d/run-4b-anydecode-bnf.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# Copyright 2014 Pegah Ghahremani +# Apache 2.0 + +# decode BNF + sgmm_mmi system +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.pem +kind= +data_only=false +fast_path=true +skip_kws=false +extra_kws=false +skip_stt=false +skip_scoring=false +tmpdir=`pwd` +semisupervised=true +unsup_string= + +. utils/parse_options.sh + +type=$dir + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h|dev2h|eval|shadow)" + echo "--semisupervised #set to false to skip unsupervised training." + exit 1 +fi + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + +if [ -z "$unsup_string" ] ; then + if $semisupervised ; then + unsup_string="_semisup" + else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + fi +fi + +if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.pem,.uem,.seg} | grep -w "$type" >/dev/null; then + # note: echo dev10.uem | grep -w dev10h will produce a match, but this + # doesn't matter because dev10h is also a valid value. + echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg} + exit 1; +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ] ; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true +fi + +dirid=${type} +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} +param_bnf_dir=param_bnf${unsup_string} +datadir=$data_bnf_dir/${dirid} + +[ ! -d data/${dirid} ] && echo "No such directory data/${dirid}" && exit 1; +[ ! -d exp/tri5/decode_${dirid} ] && echo "No such directory exp/tri5/decode_${dirid}" && exit 1; + +# Set my_nj; typically 64. +my_nj=`cat exp/tri5/decode_${dirid}/num_jobs` || exit 1; + + +if [ ! $data_bnf_dir/${dirid}_bnf/.done -nt exp/tri5/decode_${dirid}/.done ] || \ + [ ! $data_bnf_dir/${dirid}_bnf/.done -nt $exp_dir/tri6_bnf/.done ]; then + # put the archives in $param_bnf_dir/. + steps/nnet2/dump_bottleneck_features.sh --nj $my_nj --cmd "$train_cmd" \ + --transform-dir exp/tri5/decode_${dirid} data/${dirid} $data_bnf_dir/${dirid}_bnf $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf + touch $data_bnf_dir/${dirid}_bnf/.done +fi + +if [ ! $data_bnf_dir/${dirid}/.done -nt $data_bnf_dir/${dirid}_bnf/.done ]; then + steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd --max-jobs-run 10" \ + --nj $train_nj --transform-dir exp/tri5/decode_${dirid} $data_bnf_dir/${dirid}_sat data/${dirid} \ + exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir/ + + steps/append_feats.sh --cmd "$train_cmd" --nj 4 \ + $data_bnf_dir/${dirid}_bnf $data_bnf_dir/${dirid}_sat $data_bnf_dir/${dirid} \ + $exp_dir/append_feats/log $param_bnf_dir/ + steps/compute_cmvn_stats.sh --fake $data_bnf_dir/${dirid} $exp_dir/make_fmllr_feats $param_bnf_dir + rm -r $data_bnf_dir/${dirid}_sat + if ! $skip_kws ; then + cp -r data/${dirid}/*kws* $data_bnf_dir/${dirid}/ || true + fi + touch $data_bnf_dir/${dirid}/.done +fi +if ! $skip_kws ; then + rm -rf $data_bnf_dir/${dirid}/*kws* + cp -r data/${dirid}/*kws* $data_bnf_dir/${dirid}/ || true +fi + + +if $data_only ; then + echo "Exiting, as data-only was requested... " +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +decode=$exp_dir/tri6/decode_${dirid} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Decoding with SAT models on top of bottleneck features on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test $exp_dir/tri6 $exp_dir/tri6/graph |tee $exp_dir/tri6/mkgraph.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4 \ + --acwt $bnf_decode_acwt \ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + $exp_dir/tri6/graph ${datadir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip\ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test ${decode} + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test ${decode}.si +fi + +#################################################################### +## SGMM2 decoding +#################################################################### +decode=$exp_dir/sgmm7/decode_fmllr_${dirid} +if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test $exp_dir/sgmm7 $exp_dir/sgmm7/graph |tee $exp_dir/sgmm7/mkgraph.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --acwt $bnf_decode_acwt \ + --cmd "$decode_cmd" --transform-dir $exp_dir/tri6/decode_${dirid} "${decode_extra_opts[@]}"\ + $exp_dir/sgmm7/graph ${datadir} $decode |tee $decode/decode.log + touch $decode/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test $exp_dir/sgmm7/decode_fmllr_${dirid} +fi + +#################################################################### +## +## SGMM_MMI rescoring +## +#################################################################### + +for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter + if [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir $exp_dir/tri6/decode_${dirid} \ + data/langp_test ${datadir} $exp_dir/sgmm7/decode_fmllr_${dirid} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi +done + +#We are done -- all lattices has been generated. We have to +#a)Run MBR decoding +#b)Run KW search +for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test $decode +done + + +if [ -f $exp_dir/tri7_nnet/.done ] && + [[ ( ! $exp_dir/tri7_nnet/decode_${dirid}/.done -nt $datadir/.done) || \ + (! $exp_dir/tri7_nnet/decode_${dirid}/.done -nt $exp_dir/tri7_nnet/.done ) ]]; then + + echo --------------------------------------------------------------------- + echo "Decoding hybrid system on top of bottleneck features on" `date` + echo --------------------------------------------------------------------- + + # We use the graph from tri6. + utils/mkgraph.sh \ + data/langp_test $exp_dir/tri6 $exp_dir/tri6/graph |tee $exp_dir/tri6/mkgraph.log + + decode=$exp_dir/tri7_nnet/decode_${dirid} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --cmd "$decode_cmd" --nj $my_nj \ + --acwt $bnf_decode_acwt \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --feat-type raw \ + $exp_dir/tri6/graph ${datadir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + decode=$exp_dir/tri7_nnet/decode_${dirid} + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test $decode + +fi + +echo "$0: Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-6-combine.sh b/egs/babel/s5d/run-6-combine.sh new file mode 100755 index 00000000000..81dc42caca3 --- /dev/null +++ b/egs/babel/s5d/run-6-combine.sh @@ -0,0 +1,73 @@ +#!/bin/bash + + +. conf/common_vars.sh +. ./lang.conf +. ./cmd.sh + +set -e +set -o pipefail +set -u + +function best_system_path_kws { + path_to_outputs=$1 + + best_out=`(find $path_to_outputs -name "sum.txt" | xargs grep "^| *Occ") | cut -f 1,13,17 -d '|' | sed 's/|//g' | sort -r -n -k 3 | head -n 1| awk '{print $1}'` + echo `dirname $best_out` +} + +function best_system_path_stt { + path_to_outputs=$1 + best_out=` (find $path_to_outputs -name *.ctm.sys | xargs grep Avg) | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1| awk '{print $1}' ` + echo `dirname $best_out` +} +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. + +function lm_offsets { + min=999 + for dir in "$@" ; do + lmw=${dir##*score_} + + [ $lmw -le $min ] && min=$lmw + done + + lat_offset_str="" + for dir in "$@" ; do + latdir_dir=`dirname $dir` + lmw=${dir##*score_} + + offset=$(( $lmw - $min )) + if [ $offset -gt 0 ] ; then + lat_offset_str="$lat_offset_str ${latdir_dir}:$offset " + else + lat_offset_str="$lat_offset_str ${latdir_dir} " + fi + done + + echo $lat_offset_str + +} + +plp_kws=`best_system_path_kws "exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h_it*/kws_*"` +plp_stt=`best_system_path_stt "exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h_it*"` + +dnn_kws=`best_system_path_kws "exp/tri6_nnet//decode_dev10h/kws_*"` +dnn_stt=`best_system_path_stt "exp/tri6_nnet/decode_dev10h/"` + +bnf_kws=`best_system_path_kws "exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h_it*/kws_*"` +bnf_stt=`best_system_path_stt "exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h_it*"` + + + +echo local/score_combine.sh --cmd "$decode_cmd" data/dev10h data/lang `lm_offsets $plp_stt $dnn_stt $bnf_stt` exp/combine/dev10h +#local/score_combine.sh --cmd "$decode_cmd" data/dev10h data/lang `lm_offsets $plp_stt $dnn_stt $bnf_stt` exp/combine/dev10h + +echo local/kws_combine.sh --cmd "$decode_cmd" data/dev10h data/lang $plp_kws $dnn_kws $bnf_kws +#local/kws_combine.sh --cmd "$decode_cmd" data/dev10h data/lang $plp_kws/kwslist.xml $dnn_kws/kwslist.xml $bnf_kws/kwslist.xml exp/combine/dev10h/ + +mkdir -p exp/combine/kws_rescore +#local/rescoring/rescore_repeats.sh --cmd "$decode_cmd" \ +# exp/combine/dev10h/ data/dev10h data/train/text exp/combine/kws_rescore + +exit 0 diff --git a/egs/babel/s5d/steps b/egs/babel/s5d/steps new file mode 120000 index 00000000000..6e99bf5b5ad --- /dev/null +++ b/egs/babel/s5d/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/babel/s5d/utils b/egs/babel/s5d/utils new file mode 120000 index 00000000000..b240885218f --- /dev/null +++ b/egs/babel/s5d/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file diff --git a/egs/bn_music_speech/v1/local/run_dnn_music_id.sh b/egs/bn_music_speech/v1/local/run_dnn_music_id.sh new file mode 100755 index 00000000000..bd30387ae2f --- /dev/null +++ b/egs/bn_music_speech/v1/local/run_dnn_music_id.sh @@ -0,0 +1,130 @@ +#! /bin/bash + +set -e +set -o pipefail +set -u + +stage=-1 +segmentation_config=conf/segmentation.conf +cmd=run.pl +nj=40 + +# Viterbi options +min_silence_duration=3 # minimum number of frames for silence +min_speech_duration=3 # minimum number of frames for speech +min_music_duration=3 # minimum number of frames for music +frame_subsampling_factor=1 +music_transition_probability=0.1 +sil_transition_probability=0.1 +speech_transition_probability=0.1 +sil_prior=0.3 +speech_prior=0.4 +music_prior=0.3 + +# Decoding options +acwt=1 +beam=10 +max_active=7000 + +. utils/parse_options.sh + +if [ $# -ne 4 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/bn exp/nnet3_sad_snr/tdnn_b_n4/sad_bn_whole exp/nnet3_sad_snr/tdnn_b_n4/music_bn_whole exp/nnet3_sad_snr/tdnn_b_n4/segmentation_bn_whole exp/nnet3_sad_snr/tdnn_b_n4/segmentation_music_bn_whole exp/dnn_music_id" + exit 1 +fi + +data=$1 +sad_likes_dir=$2 +music_likes_dir=$3 +dir=$4 + +min_silence_duration=`perl -e "print (int($min_silence_duration / $frame_subsampling_factor))"` +min_speech_duration=`perl -e "print (int($min_speech_duration / $frame_subsampling_factor))"` +min_music_duration=`perl -e "print (int($min_music_duration / $frame_subsampling_factor))"` + +lang=$dir/lang + +if [ $stage -le 1 ]; then + mkdir -p $lang + + # Create a lang directory with phones.txt and topo with + # silence, music and speech phones. + steps/segmentation/internal/prepare_sad_lang.py \ + --phone-transition-parameters="--phone-list=1 --min-duration=$min_silence_duration --end-transition-probability=$sil_transition_probability" \ + --phone-transition-parameters="--phone-list=2 --min-duration=$min_speech_duration --end-transition-probability=$speech_transition_probability" \ + --phone-transition-parameters="--phone-list=3 --min-duration=$min_music_duration --end-transition-probability=$music_transition_probability" \ + $lang + + cp $lang/phones.txt $lang/words.txt +fi + +feat_dim=2 # dummy. We don't need this. +if [ $stage -le 2 ]; then + $cmd $dir/log/create_transition_model.log gmm-init-mono \ + $lang/topo $feat_dim - $dir/tree \| \ + copy-transition-model --binary=false - $dir/trans.mdl || exit 1 +fi + +# Make unigram G.fst +if [ $stage -le 3 ]; then + cat > $lang/word2prior < $lang/G.fst +fi + +graph_dir=$dir/graph_test + +if [ $stage -le 4 ]; then + $cmd $dir/log/make_vad_graph.log \ + steps/segmentation/internal/make_sad_graph.sh --iter trans \ + $lang $dir $dir/graph_test || exit 1 +fi + +if [ $stage -le 5 ]; then + utils/split_data.sh $data $nj + sdata=$data/split$nj + + nj_sad=`cat $sad_likes_dir/num_jobs` + sad_likes= + for n in `seq $nj_sad`; do + sad_likes="$sad_likes $sad_likes_dir/log_likes.$n.gz" + done + + nj_music=`cat $music_likes_dir/num_jobs` + music_likes= + for n in `seq $nj_music`; do + music_likes="$music_likes $music_likes_dir/log_likes.$n.gz" + done + + decoder_opts+=(--acoustic-scale=$acwt --beam=$beam --max-active=$max_active) + $cmd JOB=1:$nj $dir/log/decode.JOB.log \ + paste-feats "ark:gunzip -c $sad_likes | extract-feature-segments ark,s,cs:- $sdata/JOB/segments ark:- |" \ + "ark,s,cs:gunzip -c $music_likes | extract-feature-segments ark,s,cs:- $sdata/JOB/segments ark:- | select-feats 1 ark:- ark:- |" \ + ark:- \| decode-faster-mapped ${decoder_opts[@]} \ + $dir/trans.mdl $graph_dir/HCLG.fst ark:- \ + ark:/dev/null ark:- \| \ + ali-to-phones --per-frame $dir/trans.mdl ark:- \ + "ark:|gzip -c > $dir/ali.JOB.gz" +fi + +include_silence=true +if [ $stage -le 6 ]; then + $cmd JOB=1:$nj $dir/log/get_class_id.JOB.log \ + ali-to-post "ark:gunzip -c $dir/ali.JOB.gz |" ark:- \| \ + post-to-feats --post-dim=4 ark:- ark:- \| \ + matrix-sum-rows --do-average ark:- ark,t:- \| \ + sid/vector_to_music_labels.pl ${include_silence:+--include-silence-in-music} '>' $dir/ratio.JOB +fi + +for n in `seq $nj`; do + cat $dir/ratio.$n +done > $dir/ratio + +cat $dir/ratio | local/print_scores.py /dev/stdin | compute-eer - diff --git a/egs/bn_music_speech/v1/local/run_nnet3_music_id.sh b/egs/bn_music_speech/v1/local/run_nnet3_music_id.sh new file mode 100644 index 00000000000..d96acdabaaa --- /dev/null +++ b/egs/bn_music_speech/v1/local/run_nnet3_music_id.sh @@ -0,0 +1,217 @@ +#!/bin/bash + +set -e +set -o pipefail +set -u + +. path.sh +. cmd.sh + +feat_affix=bp_vh +affix= +reco_nj=32 + +stage=-1 + +# SAD network config +iter=final +extra_left_context=100 # Set to some large value +extra_right_context=20 + + +# Configs +frame_subsampling_factor=1 + +min_silence_duration=3 # minimum number of frames for silence +min_speech_duration=3 # minimum number of frames for speech +min_music_duration=3 # minimum number of frames for music +music_transition_probability=0.1 +sil_transition_probability=0.1 +speech_transition_probability=0.1 +sil_prior=0.3 +speech_prior=0.4 +music_prior=0.3 + +# Decoding options +acwt=1 +beam=10 +max_active=7000 + +mfcc_config=conf/mfcc_hires_bp.conf + +echo $* + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/bn exp/nnet3_sad_snr/tdnn_j_n4 exp/dnn_music_id" + exit 1 +fi + +# Set to true if the test data has > 8kHz sampling frequency. +do_downsampling=true + +data_dir=$1 +sad_nnet_dir=$2 +dir=$3 + +data_id=`basename $data_dir` + +export PATH="$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH" +[ ! -z `which sph2pipe` ] + +for f in $sad_nnet_dir/$iter.raw $sad_nnet_dir/post_output-speech.vec $sad_nnet_dir/post_output-music.vec; do + if [ ! -f $f ]; then + echo "$0: Could not find $f. See the local/segmentation/run_train_sad.sh" + exit 1 + fi +done + +mkdir -p $dir + +new_data_dir=$dir/${data_id} +if [ $stage -le 0 ]; then + utils/data/convert_data_dir_to_whole.sh $data_dir ${new_data_dir}_whole + + freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` + sox=`which sox` + + cat $data_dir/wav.scp | python -c "import sys +for line in sys.stdin.readlines(): + splits = line.strip().split() + if splits[-1] == '|': + out_line = line.strip() + ' $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |' + else: + out_line = 'cat {0} {1} | $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |'.format(splits[0], ' '.join(splits[1:])) + print (out_line)" > ${new_data_dir}_whole/wav.scp + + utils/copy_data_dir.sh ${new_data_dir}_whole ${new_data_dir}_whole_bp_hires +fi + +test_data_dir=${new_data_dir}_whole_bp_hires + +if [ $stage -le 1 ]; then + steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $reco_nj --cmd "$train_cmd" \ + ${new_data_dir}_whole_bp_hires exp/make_hires/${data_id}_whole_bp mfcc_hires + steps/compute_cmvn_stats.sh ${new_data_dir}_whole_bp_hires exp/make_hires/${data_id}_whole_bp mfcc_hires +fi + +if [ $stage -le 2 ]; then + output_name=output-speech + post_vec=$sad_nnet_dir/post_${output_name}.vec + steps/nnet3/compute_output.sh --nj $reco_nj --cmd "$train_cmd" \ + --post-vec "$post_vec" \ + --iter $iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk 150 \ + --output-name $output_name \ + --frame-subsampling-factor $frame_subsampling_factor \ + --get-raw-nnet-from-am false ${test_data_dir} $sad_nnet_dir $dir/sad_${data_id}_whole_bp +fi + +if [ $stage -le 3 ]; then + output_name=output-music + post_vec=$sad_nnet_dir/post_${output_name}.vec + steps/nnet3/compute_output.sh --nj $reco_nj --cmd "$train_cmd" \ + --post-vec "$post_vec" \ + --iter $iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk 150 \ + --output-name $output_name \ + --frame-subsampling-factor $frame_subsampling_factor \ + --get-raw-nnet-from-am false ${test_data_dir} $sad_nnet_dir $dir/music_${data_id}_whole_bp +fi + +if [ $stage -le 4 ]; then + $train_cmd JOB=1:$reco_nj $dir/get_average_likes.JOB.log \ + paste-feats \ + "ark:gunzip -c $dir/sad_${data_id}_whole_bp/log_likes.JOB.gz | extract-feature-segments ark:- 'utils/filter_scp.pl -f 2 ${test_data_dir}/split$reco_nj/JOB/utt2spk $data_dir/segments |' ark:- |" \ + "ark:gunzip -c $dir/music_${data_id}_whole_bp/log_likes.JOB.gz | select-feats 1 ark:- ark:- | extract-feature-segments ark:- 'utils/filter_scp.pl -f 2 ${test_data_dir}/split$reco_nj/JOB/utt2spk $data_dir/segments |' ark:- |" \ + ark:- \| \ + matrix-sum-rows --do-average ark:- ark,t:$dir/average_likes.JOB.ark + + for n in `seq $reco_nj`; do + cat $dir/average_likes.$n.ark + done | awk '{print $1" "( exp($3) + exp($5) + 0.01) / (exp($4) + 0.01)}' | \ + local/print_scores.py /dev/stdin | compute-eer - +fi + +lang=$dir/lang + +if [ $stage -le 5 ]; then + mkdir -p $lang + + # Create a lang directory with phones.txt and topo with + # silence, music and speech phones. + steps/segmentation/internal/prepare_sad_lang.py \ + --phone-transition-parameters="--phone-list=1 --min-duration=$min_silence_duration --end-transition-probability=$sil_transition_probability" \ + --phone-transition-parameters="--phone-list=2 --min-duration=$min_speech_duration --end-transition-probability=$speech_transition_probability" \ + --phone-transition-parameters="--phone-list=3 --min-duration=$min_music_duration --end-transition-probability=$music_transition_probability" \ + $lang + + cp $lang/phones.txt $lang/words.txt +fi + +feat_dim=2 # dummy. We don't need this. +if [ $stage -le 6 ]; then + $train_cmd $dir/log/create_transition_model.log gmm-init-mono \ + $lang/topo $feat_dim - $dir/tree \| \ + copy-transition-model --binary=false - $dir/trans.mdl || exit 1 +fi + +# Make unigram G.fst +if [ $stage -le 7 ]; then + cat > $lang/word2prior < $lang/G.fst +fi + +graph_dir=$dir/graph_test + +if [ $stage -le 8 ]; then + $train_cmd $dir/log/make_vad_graph.log \ + steps/segmentation/internal/make_sad_graph.sh --iter trans \ + $lang $dir $dir/graph_test || exit 1 +fi + +seg_dir=$dir/segmentation_${data_id}_whole_bp +mkdir -p $seg_dir + +if [ $stage -le 9 ]; then + decoder_opts+=(--acoustic-scale=$acwt --beam=$beam --max-active=$max_active) + $train_cmd JOB=1:$reco_nj $dir/decode.JOB.log \ + paste-feats \ + "ark:gunzip -c $dir/sad_${data_id}_whole_bp/log_likes.JOB.gz | extract-feature-segments ark:- 'utils/filter_scp.pl -f 2 ${test_data_dir}/split$reco_nj/JOB/utt2spk $data_dir/segments |' ark:- |" \ + "ark:gunzip -c $dir/music_${data_id}_whole_bp/log_likes.JOB.gz | select-feats 1 ark:- ark:- | extract-feature-segments ark:- 'utils/filter_scp.pl -f 2 ${test_data_dir}/split$reco_nj/JOB/utt2spk $data_dir/segments |' ark:- |" \ + ark:- \| decode-faster-mapped ${decoder_opts[@]} \ + $dir/trans.mdl $graph_dir/HCLG.fst ark:- \ + ark:/dev/null ark:- \| \ + ali-to-phones --per-frame $dir/trans.mdl ark:- \ + "ark:|gzip -c > $seg_dir/ali.JOB.gz" +fi + +include_silence=true +if [ $stage -le 10 ]; then + $train_cmd JOB=1:$reco_nj $dir/log/get_class_id.JOB.log \ + ali-to-post "ark:gunzip -c $seg_dir/ali.JOB.gz |" ark:- \| \ + post-to-feats --post-dim=4 ark:- ark:- \| \ + matrix-sum-rows --do-average ark:- ark,t:- \| \ + sid/vector_to_music_labels.pl ${include_silence:+--include-silence-in-music} '>' $dir/ratio.JOB + + for n in `seq $reco_nj`; do + cat $dir/ratio.$n + done > $dir/ratio + + cat $dir/ratio | local/print_scores.py /dev/stdin | compute-eer - +fi + +# LOG (compute-eer:main():compute-eer.cc:136) Equal error rate is 0.860585%, at threshold 1.99361 diff --git a/egs/callhome_egyptian/s5/run.sh b/egs/callhome_egyptian/s5/run.sh index 9d1fa692da0..4d1359bea98 100755 --- a/egs/callhome_egyptian/s5/run.sh +++ b/egs/callhome_egyptian/s5/run.sh @@ -29,7 +29,7 @@ local/callhome_prepare_dict.sh $eca_lexicon utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang # Make sure that you do not use your test and your dev sets to train the LM -# Some form of cross validation is possible where you decode your dev/set based on an +# Some form of cross validation is possible where you decode your dev/set based on an # LM that is trained on everything but that that conversation local/callhome_train_lms.sh $split local/callhome_create_test_lang.sh @@ -100,7 +100,7 @@ steps/train_lda_mllt.sh --cmd "$train_cmd" \ exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1; )& -# Next we'll use fMLLR and train with SAT (i.e. on +# Next we'll use fMLLR and train with SAT (i.e. on # fMLLR features) steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ @@ -108,7 +108,7 @@ steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ steps/train_sat.sh --cmd "$train_cmd" \ 2200 25000 data/train data/lang exp/tri3a_ali exp/tri4a || exit 1; - + ( utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ @@ -140,9 +140,9 @@ steps/train_sat.sh --cmd "$train_cmd" \ )& dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \ - --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G") + --parallel-opts "--num-threads 16" --cmd "queue.pl --mem 1G") dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \ - --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G") + --parallel-opts "--gpu 1" --cmd "queue.pl --mem 1G") steps/nnet2/train_pnorm_ensemble.sh \ --mix-up 5000 --initial-learning-rate 0.008 --final-learning-rate 0.0008\ @@ -153,17 +153,17 @@ steps/nnet2/train_pnorm_ensemble.sh \ data/train data/lang exp/tri5a_ali exp/tri6a_dnn ( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev exp/tri5a/graph data/dev exp/tri6a_dnn/decode_dev ) & # Decode test sets ( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_test exp/tri5a/graph data/test exp/tri6a_dnn/decode_test - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_sup exp/tri5a/graph data/sup exp/tri6a_dnn/decode_sup - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_h5 exp/tri5a/graph data/h5 exp/tri6a_dnn/decode_h5 ) & diff --git a/egs/chime3/s5/cmd.sh b/egs/chime3/s5/cmd.sh index 7ee5fbcd73d..cf2570db1a9 100755 --- a/egs/chime3/s5/cmd.sh +++ b/egs/chime3/s5/cmd.sh @@ -6,9 +6,9 @@ # the number of cpus on your machine. #a) JHU cluster options -#export train_cmd="queue.pl -l arch=*64" -#export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" -#export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" +#export train_cmd="queue.pl" +#export decode_cmd="queue.pl --mem 4G" +#export mkgraph_cmd="queue.pl --mem 4G" #export cuda_cmd="..." diff --git a/egs/csj/s5/local/csj_run_rnnlm.sh b/egs/csj/s5/local/csj_run_rnnlm.sh index bf3976b8a1f..e02f19bb680 100755 --- a/egs/csj/s5/local/csj_run_rnnlm.sh +++ b/egs/csj/s5/local/csj_run_rnnlm.sh @@ -3,7 +3,7 @@ # Copyright 2016 Tokyo Institute of Technology (Authors: Tomohiro Tanaka, Takafumi Moriya and Takahiro Shinozaki) # 2016 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe) # Apache 2.0 -# Acknowledgement This work was supported by JSPS KAKENHI Grant Number 26280055. +# Acknowledgement This work was supported by JSPS KAKENHI Grant Number 26280055. [ -f ./path.sh ] && . ./path.sh . utils/parse_options.sh @@ -21,7 +21,7 @@ echo h30 Begin local/csj_train_rnnlms.sh --dict-suffix "_nosp" data/local/rnnlm.h30 sleep 20; # wait till tools compiled. -echo h100 Begin +echo h100 Begin local/csj_train_rnnlms.sh --dict-suffix "_nosp" \ --hidden 100 --nwords 10000 --class 200 \ --direct 0 data/local/rnnlm.h100 @@ -44,7 +44,7 @@ local/csj_train_rnnlms.sh --dict-suffix "_nosp" \ echo h500 Begin local/csj_train_rnnlms.sh --dict-suffix "_nosp" \ --hidden 500 --nwords 10000 --class 200 \ - --direct 0 data/local/rnnlm.h400 + --direct 0 data/local/rnnlm.h500 #SKIP @@ -60,9 +60,9 @@ for dict in rnnlm.h30 rnnlm.h100 rnnlm.h200 rnnlm.h300 rnnlm.h400 rnnlm.h500 ;do echo "rnnlm0.5" steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver \ - --N 100 --cmd "queue -l mem_free=1G" --inv-acwt $acwt 0.5 \ + --N 100 --cmd "$decode_cmd --mem 1G" --inv-acwt $acwt 0.5 \ data/lang_csj_tg $dir data/$eval_num $sourcedir ${resultsdir}_L0.5 - + rm -rf ${resultsdir}_L0.25 rm -rf ${resultsdir}_L0.75 cp -rp ${resultsdir}_L0.5 ${resultsdir}_L0.25 @@ -70,12 +70,12 @@ for dict in rnnlm.h30 rnnlm.h100 rnnlm.h200 rnnlm.h300 rnnlm.h400 rnnlm.h500 ;do echo "rnnlm0.25" steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver \ - --stage 7 --N 100 --cmd "$decode_cmd -l mem_free=1G" --inv-acwt $acwt 0.25 \ + --stage 7 --N 100 --cmd "$decode_cmd --mem 1G" --inv-acwt $acwt 0.25 \ data/lang_csj_tg $dir data/$eval_num $sourcedir ${resultsdir}_L0.25 echo "rnnlm0.75" steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver \ - --stage 7 --N 100 --cmd "$decode_cmd -l mem_free=1G" --inv-acwt $acwt 0.75 \ + --stage 7 --N 100 --cmd "$decode_cmd --mem 1G" --inv-acwt $acwt 0.75 \ data/lang_csj_tg $dir data/$eval_num $sourcedir ${resultsdir}_L0.75 done done diff --git a/egs/csj/s5/local/nnet/run_lstm.sh b/egs/csj/s5/local/nnet/run_lstm.sh index 3cc330c55a8..dc0f40dec24 100755 --- a/egs/csj/s5/local/nnet/run_lstm.sh +++ b/egs/csj/s5/local/nnet/run_lstm.sh @@ -34,10 +34,10 @@ stage=0 steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 10 $dir $dir/log $dir/data || exit 1; steps/compute_cmvn_stats.sh $dir $dir/log $dir/data || exit 1; done - + # Training set utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp - steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd -tc 10" \ + steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd --max-jobs-run 10" \ $train $train/log $train/data || exit 1; steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1; # Split the training set diff --git a/egs/fame/README.txt b/egs/fame/README.txt new file mode 100644 index 00000000000..d2ed39eef75 --- /dev/null +++ b/egs/fame/README.txt @@ -0,0 +1,15 @@ +The FAME! Speech Corpus + +The components of the Frisian data collection are speech and language resources gathered for building a large vocabulary ASR system for the Frisian language. Firstly, a new broadcast database is created by collecting recordings from the archives of the regional broadcaster Omrop Fryslân, and annotating them with various information such as the language switches and speaker details. The second component of this collection is a language model created on a text corpus with diverse vocabulary. Thirdly, a Frisian phonetic dictionary with the mappings between the Frisian words and phones is built to make the ASR viable for this under-resourced language. Finally, an ASR recipe is provided which uses all previous resources to perform recognition and present the recognition performances. + +The Corpus consists of short utterances extracted from 203 audio segments of approximately 5 minutes long which are parts of various radio programs covering a time span of almost 50 years (1966-2015), adding a longitudinal dimension to the database. The content of the recordings are very diverse including radio programs about culture, history, literature, sports, nature, agriculture, politics, society and languages. The total duration of the manually annotated radio broadcasts sums up to 18 hours, 33 minutes and 57 seconds. The stereo audio data has a sampling frequency of 48 kHz and 16-bit resolution per sample. The available meta-information helped the annotators to identify these speakers and mark them either using their names or the same label (if the name is not known). There are 309 identified speakers in the FAME! Speech Corpus, 21 of whom appear at least 3 times in the database. These speakers are mostly program presenters and celebrities appearing multiple times in different recordings over years. There are 233 unidentified speakers due to lack of meta-information. The total number of word- and sentence-level code-switching cases in the FAME! Speech Corpus is equal to 3837. Music portions have been removed, except where these overlap with speech. + +A full description of the FAME! Speech Corpus is provided in: + +Yilmaz, E., Heuvel, H. van den, Van de Velde, H., Kampstra, F., Algra, J., Leeuwen, D. van: + +Open Source Speech and Language Resources for Frisian Language. + +In: Proceedings Interspeech 2016, pp. 1536--1540, 8-12 September 2016, San Francisco + +Please check http://www.ru.nl/clst/datasets/ to get the FAME! Speech Corpus diff --git a/egs/fame/s5/RESULTS b/egs/fame/s5/RESULTS new file mode 100644 index 00000000000..a8541fba6b5 --- /dev/null +++ b/egs/fame/s5/RESULTS @@ -0,0 +1,28 @@ +%WER 41.10 [ 4974 / 12101, 522 ins, 1223 del, 3229 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_devel/wer_11_0.0 +%WER 38.10 [ 4909 / 12886, 527 ins, 1220 del, 3162 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_test/wer_11_0.0 +%WER 41.06 [ 4969 / 12101, 514 ins, 1277 del, 3178 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0 +%WER 40.38 [ 4886 / 12101, 515 ins, 1225 del, 3146 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.0 +%WER 40.15 [ 4859 / 12101, 514 ins, 1177 del, 3168 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_10_0.5 +%WER 37.86 [ 4879 / 12886, 596 ins, 1083 del, 3200 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it1/wer_10_0.0 +%WER 37.16 [ 4789 / 12886, 592 ins, 1056 del, 3141 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it3/wer_10_0.0 +%WER 36.92 [ 4757 / 12886, 618 ins, 1010 del, 3129 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it6/wer_10_0.0 +%WER 42.38 [ 5129 / 12101, 576 ins, 1171 del, 3382 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_devel/wer_11_0.0 +%WER 39.14 [ 5043 / 12886, 536 ins, 1172 del, 3335 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_test/wer_11_0.0 +%WER 42.05 [ 5088 / 12101, 525 ins, 1282 del, 3281 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0 +%WER 41.41 [ 5011 / 12101, 461 ins, 1345 del, 3205 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.5 +%WER 40.97 [ 4958 / 12101, 485 ins, 1279 del, 3194 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_11_0.5 +%WER 38.79 [ 4998 / 12886, 512 ins, 1194 del, 3292 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it1/wer_11_0.0 +%WER 38.16 [ 4917 / 12886, 544 ins, 1128 del, 3245 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it3/wer_11_0.0 +%WER 37.68 [ 4856 / 12886, 564 ins, 1068 del, 3224 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it6/wer_11_0.0 +%WER 70.85 [ 8574 / 12101, 414 ins, 2596 del, 5564 sub ] exp/mono/decode_devel/wer_9_0.0 +%WER 68.17 [ 8785 / 12886, 413 ins, 2704 del, 5668 sub ] exp/mono/decode_test/wer_9_0.0 +%WER 44.05 [ 5330 / 12101, 560 ins, 1467 del, 3303 sub ] exp/sgmm2/decode_devel/wer_10_0.0 +%WER 40.22 [ 5183 / 12886, 680 ins, 1142 del, 3361 sub ] exp/sgmm2/decode_test/wer_9_0.0 +%WER 54.39 [ 6582 / 12101, 695 ins, 1595 del, 4292 sub ] exp/tri1/decode_devel/wer_10_0.0 +%WER 51.60 [ 6649 / 12886, 630 ins, 1706 del, 4313 sub ] exp/tri1/decode_test/wer_11_0.0 +%WER 51.53 [ 6236 / 12101, 659 ins, 1675 del, 3902 sub ] exp/tri2/decode_devel/wer_11_0.0 +%WER 48.32 [ 6226 / 12886, 643 ins, 1669 del, 3914 sub ] exp/tri2/decode_test/wer_12_0.0 +%WER 47.15 [ 5706 / 12101, 580 ins, 1537 del, 3589 sub ] exp/tri3/decode_devel/wer_13_0.0 +%WER 52.13 [ 6308 / 12101, 623 ins, 1706 del, 3979 sub ] exp/tri3/decode_devel.si/wer_11_0.5 +%WER 43.71 [ 5632 / 12886, 594 ins, 1538 del, 3500 sub ] exp/tri3/decode_test/wer_14_0.0 +%WER 48.21 [ 6212 / 12886, 825 ins, 1358 del, 4029 sub ] exp/tri3/decode_test.si/wer_10_0.0 diff --git a/egs/fame/s5/cmd.sh b/egs/fame/s5/cmd.sh new file mode 120000 index 00000000000..19f7e836644 --- /dev/null +++ b/egs/fame/s5/cmd.sh @@ -0,0 +1 @@ +../../wsj/s5/cmd.sh \ No newline at end of file diff --git a/egs/fame/s5/conf/decode_dnn.config b/egs/fame/s5/conf/decode_dnn.config new file mode 100644 index 00000000000..89dd9929a62 --- /dev/null +++ b/egs/fame/s5/conf/decode_dnn.config @@ -0,0 +1,2 @@ +beam=18.0 # beam for decoding. Was 13.0 in the scripts. +lattice_beam=10.0 # this has most effect on size of the lattices. diff --git a/egs/fame/s5/conf/fbank.conf b/egs/fame/s5/conf/fbank.conf new file mode 100644 index 00000000000..c4b73674cab --- /dev/null +++ b/egs/fame/s5/conf/fbank.conf @@ -0,0 +1,2 @@ +# No non-default options for now. + diff --git a/egs/fame/s5/conf/mfcc.conf b/egs/fame/s5/conf/mfcc.conf new file mode 100644 index 00000000000..7361509099f --- /dev/null +++ b/egs/fame/s5/conf/mfcc.conf @@ -0,0 +1 @@ +--use-energy=false # only non-default option. diff --git a/egs/fame/s5/conf/mfcc_hires.conf b/egs/fame/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..434834a6725 --- /dev/null +++ b/egs/fame/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/fame/s5/conf/online_cmvn.conf b/egs/fame/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..cbdaf5f281c --- /dev/null +++ b/egs/fame/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh diff --git a/egs/fame/s5/local/fame_data_prep.sh b/egs/fame/s5/local/fame_data_prep.sh new file mode 100755 index 00000000000..2c2d1e79238 --- /dev/null +++ b/egs/fame/s5/local/fame_data_prep.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) + +# Apache 2.0 + +corpus=$1 +set -e -o pipefail +if [ -z "$corpus" ] ; then + echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech database" + exit 1 +fi +if [ ! -d "$corpus" ] ; then + echo >&2 "The directory $corpus does not exist" +fi + +echo "Preparing train, development and test data" +mkdir -p data data/local data/train data/devel data/test + +for x in train devel test; do + echo "Copy spk2utt, utt2spk, wav.scp, text for $x" + cp $corpus/data/$x/text data/$x/text || exit 1; + cp $corpus/data/$x/spk2utt data/$x/spk2utt || exit 1; + cp $corpus/data/$x/utt2spk data/$x/utt2spk || exit 1; + + # the corpus wav.scp contains physical paths, so we just re-generate + # the file again from scratchn instead of figuring out how to edit it + for rec in $(awk '{print $1}' $corpus/data/$x/text) ; do + spk=${rec%_*} + filename=$corpus/fame/wav/${x}/${rec:8}.wav + if [ ! -f "$filename" ] ; then + echo >&2 "The file $filename could not be found ($rec)" + exit 1 + fi + # we might want to store physical paths as a general rule + filename=$(readlink -f $filename) + echo "$rec $filename" + done > data/$x/wav.scp + + # fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp, + # duplicate entries and so on). Also, it regenerates the spk2utt from + # utt2sp + utils/fix_data_dir.sh data/$x +done + +echo "Copying language model" +if [ -f $corpus/lm/LM_FR_IKN3G ] ; then + gzip -c $corpus/lm/LM_FR_IKN3G > data/local/LM.gz +fi + +echo "Data preparation completed." + diff --git a/egs/fame/s5/local/fame_dict_prep.sh b/egs/fame/s5/local/fame_dict_prep.sh new file mode 100755 index 00000000000..c6530217a67 --- /dev/null +++ b/egs/fame/s5/local/fame_dict_prep.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) + +# Apache 2.0 + +corpus=$1 +if [ -z "$corpus" ] ; then + echo >&2 "The script $0 expects one parameter -- the location of the Iban corpus" + exit 1 +fi +if [ ! -d "$corpus" ] ; then + echo >&2 "The directory $corpus does not exist" +fi + +mkdir -p data/lang data/local/dict + + +cat $corpus/lexicon/lex.asr $corpus/lexicon/lex.oov > data/local/dict/lexicon.txt +echo "!SIL SIL" >> data/local/dict/lexicon.txt +echo " SPN" >> data/local/dict/lexicon.txt +env LC_ALL=C sort -u -o data/local/dict/lexicon.txt data/local/dict/lexicon.txt +cat data/local/dict/lexicon.txt | \ + perl -ane 'print join("\n", @F[1..$#F]) . "\n"; ' | \ + sort -u | grep -v 'SIL' > data/local/dict/nonsilence_phones.txt + + +touch data/local/dict/extra_questions.txt +touch data/local/dict/optional_silence.txt + +echo "SIL" > data/local/dict/optional_silence.txt +echo "SIL" > data/local/dict/silence_phones.txt +echo "" > data/local/dict/oov.txt + +echo "Dictionary preparation succeeded" diff --git a/egs/fame/s5/local/nnet/run_dnn.sh b/egs/fame/s5/local/nnet/run_dnn.sh new file mode 100755 index 00000000000..ca1efa5e0ac --- /dev/null +++ b/egs/fame/s5/local/nnet/run_dnn.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) +# Apache 2.0 + +# This example script trains a DNN on top of fMLLR features. +# The training is done in 3 stages, +# +# 1) RBM pre-training: +# in this unsupervised stage we train stack of RBMs, +# a good starting point for frame cross-entropy trainig. +# 2) frame cross-entropy training: +# the objective is to classify frames to correct pdfs. +# 3) sequence-training optimizing sMBR: +# the objective is to emphasize state-sequences with better +# frame accuracy w.r.t. reference alignment. + +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. + +. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. + +. ./path.sh ## Source the tools/utils (import the queue.pl) + +set -eu + +# Config: +gmm=exp/tri3 +data_fmllr=data-fmllr-tri3 +stage=0 # resume training with --stage=N +# End of config. +. utils/parse_options.sh +# + +[ ! -e $data_fmllr/test ] && if [ $stage -le 0 ]; then + # Store fMLLR features, so we can train on them easily, + # devel + dir=$data_fmllr/devel + steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir $gmm/decode_devel \ + $dir data/devel $gmm $dir/log $dir/data + # test + dir=$data_fmllr/test + steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir $gmm/decode_test \ + $dir data/test $gmm $dir/log $dir/data + # train + dir=$data_fmllr/train + steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir ${gmm}_ali \ + $dir data/train $gmm $dir/log $dir/data + # split the data : 90% train 10% cross-validation (held-out) + utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10 +fi + +if [ $stage -le 1 ]; then + # Pre-train DBN, i.e. a stack of RBMs (small database, smaller DNN) + dir=exp/dnn4b_pretrain-dbn + $cuda_cmd $dir/log/pretrain_dbn.log \ + steps/nnet/pretrain_dbn.sh --hid-dim 2048 --rbm-iter 10 $data_fmllr/train $dir +fi + +if [ $stage -le 2 ]; then + # Train the DNN optimizing per-frame cross-entropy. + dir=exp/dnn4b_pretrain-dbn_dnn + ali=${gmm}_ali + feature_transform=exp/dnn4b_pretrain-dbn/final.feature_transform + dbn=exp/dnn4b_pretrain-dbn/6.dbn + # Train + $cuda_cmd $dir/log/train_nnet.log \ + steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ + $data_fmllr/train_tr90 $data_fmllr/train_cv10 data/lang $ali $ali $dir + # Decode (reuse HCLG graph) + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $data_fmllr/devel $dir/decode_devel + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $data_fmllr/test $dir/decode_test +fi + + +# Sequence training using sMBR criterion, we do Stochastic-GD with per-utterance updates. +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. +dir=exp/dnn4b_pretrain-dbn_dnn_smbr +srcdir=exp/dnn4b_pretrain-dbn_dnn +acwt=0.1 + +if [ $stage -le 3 ]; then + # First we generate lattices and alignments: + steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \ + $data_fmllr/train data/lang $srcdir ${srcdir}_ali + steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \ + $data_fmllr/train data/lang $srcdir ${srcdir}_denlats +fi + +if [ $stage -le 4 ]; then + # Re-train the DNN by 6 iterations of sMBR + steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt --do-smbr true \ + $data_fmllr/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir + # Decode + for ITER in 6 3 1; do + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $data_fmllr/devel $dir/decode_devel_it${ITER} + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $data_fmllr/test $dir/decode_test_it${ITER} + done +fi + +echo Success +exit 0 + +# Getting results [see RESULTS file] +# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done + +# to see how model conversion to nnet2 works, run run_dnn_convert_nnet2.sh at this point. + diff --git a/egs/fame/s5/local/nnet/run_dnn_fbank.sh b/egs/fame/s5/local/nnet/run_dnn_fbank.sh new file mode 100755 index 00000000000..a81449ffbcf --- /dev/null +++ b/egs/fame/s5/local/nnet/run_dnn_fbank.sh @@ -0,0 +1,125 @@ +#!/bin/bash + +# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) +# Apache 2.0 + +# This example script trains a DNN on top of FBANK features. +# The training is done in 3 stages, +# +# 1) RBM pre-training: +# in this unsupervised stage we train stack of RBMs, +# a good starting point for frame cross-entropy trainig. +# 2) frame cross-entropy training: +# the objective is to classify frames to correct pdfs. +# 3) sequence-training optimizing sMBR: +# the objective is to emphasize state-sequences with better +# frame accuracy w.r.t. reference alignment. + +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. + +. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. + +. ./path.sh ## Source the tools/utils (import the queue.pl) + +dev=data-fbank/devel +tst=data-fbank/test +train=data-fbank/train + +dev_original=data/devel +tst_original=data/test +train_original=data/train + +gmm=exp/tri3 + +stage=0 +. utils/parse_options.sh || exit 1; + +set -eu + +# Make the FBANK features +[ ! -e $dev ] && if [ $stage -le 0 ]; then + # Dev set + utils/copy_data_dir.sh $dev_original $dev || exit 1; rm $dev/{cmvn,feats}.scp + steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ + $dev $dev/log $dev/data || exit 1; + steps/compute_cmvn_stats.sh $dev $dev/log $dev/data || exit 1; + # Test set + utils/copy_data_dir.sh $tst_original $tst || exit 1; rm $tst/{cmvn,feats}.scp + steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ + $tst $tst/log $tst/data || exit 1; + steps/compute_cmvn_stats.sh $tst $tst/log $tst/data || exit 1; + # Training set + utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp + steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ + $train $train/log $train/data || exit 1; + steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1; + # Split the training set + utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train ${train}_tr90 ${train}_cv10 +fi + +if [ $stage -le 1 ]; then + # Pre-train DBN, i.e. a stack of RBMs (small database, smaller DNN) + dir=exp/dnn4d-fbank_pretrain-dbn + $cuda_cmd $dir/log/pretrain_dbn.log \ + steps/nnet/pretrain_dbn.sh \ + --cmvn-opts "--norm-means=true --norm-vars=true" \ + --delta-opts "--delta-order=2" --splice 5 \ + --hid-dim 2048 --rbm-iter 10 $train $dir || exit 1; +fi + +if [ $stage -le 2 ]; then + # Train the DNN optimizing per-frame cross-entropy. + dir=exp/dnn4d-fbank_pretrain-dbn_dnn + ali=${gmm}_ali + feature_transform=exp/dnn4d-fbank_pretrain-dbn/final.feature_transform + dbn=exp/dnn4d-fbank_pretrain-dbn/6.dbn + # Train + $cuda_cmd $dir/log/train_nnet.log \ + steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ + ${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1; + # Decode (reuse HCLG graph) + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $dev $dir/decode_devel || exit 1; + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $tst $dir/decode_test || exit 1; +fi + + +# Sequence training using sMBR criterion, we do Stochastic-GD with per-utterance updates. +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. +dir=exp/dnn4d-fbank_pretrain-dbn_dnn_smbr +srcdir=exp/dnn4d-fbank_pretrain-dbn_dnn +acwt=0.1 + +if [ $stage -le 3 ]; then + # First we generate lattices and alignments: + steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \ + $train data/lang $srcdir ${srcdir}_ali || exit 1; + steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \ + $train data/lang $srcdir ${srcdir}_denlats || exit 1; +fi + +if [ $stage -le 4 ]; then + # Re-train the DNN by 6 iterations of sMBR + steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt --do-smbr true \ + $train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1 + # Decode + for ITER in 6 3 1; do + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $dev $dir/decode_devel_it${ITER} || exit 1 + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $tst $dir/decode_test_it${ITER} || exit 1 + done +fi + +echo Success +exit 0 + +# Getting results [see RESULTS file] +# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done diff --git a/egs/fame/s5/local/score.sh b/egs/fame/s5/local/score.sh new file mode 120000 index 00000000000..0afefc3158c --- /dev/null +++ b/egs/fame/s5/local/score.sh @@ -0,0 +1 @@ +../steps/score_kaldi.sh \ No newline at end of file diff --git a/egs/fame/s5/local/wer_hyp_filter b/egs/fame/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..372d1a9c73a --- /dev/null +++ b/egs/fame/s5/local/wer_hyp_filter @@ -0,0 +1,2 @@ +#!/bin/sed -f +s:::g diff --git a/egs/fame/s5/local/wer_output_filter b/egs/fame/s5/local/wer_output_filter new file mode 100755 index 00000000000..372d1a9c73a --- /dev/null +++ b/egs/fame/s5/local/wer_output_filter @@ -0,0 +1,2 @@ +#!/bin/sed -f +s:::g diff --git a/egs/fame/s5/local/wer_ref_filter b/egs/fame/s5/local/wer_ref_filter new file mode 100755 index 00000000000..372d1a9c73a --- /dev/null +++ b/egs/fame/s5/local/wer_ref_filter @@ -0,0 +1,2 @@ +#!/bin/sed -f +s:::g diff --git a/egs/fame/s5/path.sh b/egs/fame/s5/path.sh new file mode 100755 index 00000000000..2d17b17a84a --- /dev/null +++ b/egs/fame/s5/path.sh @@ -0,0 +1,6 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/fame/s5/run.sh b/egs/fame/s5/run.sh new file mode 100755 index 00000000000..26a8485ff7d --- /dev/null +++ b/egs/fame/s5/run.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +stage=0 +feat_nj=10 +train_nj=10 +decode_nj=10 +famecorpus=./corpus + +if [ -d $famecorpus ] ; then + echo "Fame corpus present. OK." +elif [ -f ./fame.tar.gz ] ; then + echo "Unpacking..." + tar xzf fame.tar.gz +elif [ ! -d $famecorpus ] && [ ! -f ./fame.tar.gz ] ; then + echo "The Fame! corpus is not present. Please register here: http://www.ru.nl/clst/datasets/ " + echo " and download the corpus and put it at $famecorpus" && exit 1 +fi + +numLeavesTri1=5000 +numGaussTri1=25000 +numLeavesMLLT=5000 +numGaussMLLT=25000 +numLeavesSAT=5000 +numGaussSAT=25000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=20000 + +if [ $stage -le 1 ]; then + local/fame_data_prep.sh $famecorpus || exit 1; + local/fame_dict_prep.sh $famecorpus || exit 1; + utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; + utils/format_lm.sh data/lang data/local/LM.gz data/local/dict/lexicon.txt data/lang_test || exit 1; +fi + +if [ $stage -le 2 ]; then + # Feature extraction + for x in train devel test; do + steps/make_mfcc.sh --nj $feat_nj --cmd "$train_cmd" data/$x exp/make_mfcc/$x mfcc || exit 1; + steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc || exit 1; + done +fi + +if [ $stage -le 3 ]; then + ### Monophone + echo "Starting monophone training." + steps/train_mono.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/mono || exit 1; + echo "Mono training done." + + echo "Decoding the development and test sets using monophone models." + utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/mono/graph data/devel exp/mono/decode_devel || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/mono/graph data/test exp/mono/decode_test || exit 1; + echo "Monophone decoding done." +fi + + +if [ $stage -le 4 ]; then + ### Triphone + echo "Starting triphone training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/mono exp/mono_ali || exit 1; + steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 data/train data/lang exp/mono_ali exp/tri1 || exit 1; + echo "Triphone training done." + + echo "Decoding the development and test sets using triphone models." + utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri1/graph data/devel exp/tri1/decode_devel || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri1/graph data/test exp/tri1/decode_test || exit 1; + echo "Triphone decoding done." +fi + +if [ $stage -le 5 ]; then + ### Triphone + LDA and MLLT + echo "Starting LDA+MLLT training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" $numLeavesMLLT $numGaussMLLT data/train data/lang exp/tri1_ali exp/tri2 || exit 1; + echo "LDA+MLLT training done." + + echo "Decoding the development and test sets using LDA+MLLT models." + utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri2/graph data/devel exp/tri2/decode_devel || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2/decode_test || exit 1; + echo "LDA+MLLT decoding done." +fi + + +if [ $stage -le 6 ]; then + ### Triphone + LDA and MLLT + SAT and FMLLR + echo "Starting SAT+FMLLR training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" --use-graphs true data/train data/lang exp/tri2 exp/tri2_ali || exit 1; + steps/train_sat.sh --cmd "$train_cmd" $numLeavesSAT $numGaussSAT data/train data/lang exp/tri2_ali exp/tri3 || exit 1; + echo "SAT+FMLLR training done." + + echo "Decoding the development and test sets using SAT+FMLLR models." + utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1; + steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri3/graph data/devel exp/tri3/decode_devel || exit 1; + steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri3/graph data/test exp/tri3/decode_test || exit 1; + echo "SAT+FMLLR decoding done." +fi + + +if [ $stage -le 7 ]; then + echo "Starting SGMM training." + steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/tri3 exp/tri3_ali || exit 1; + steps/train_ubm.sh --cmd "$train_cmd" $numGaussUBM data/train data/lang exp/tri3_ali exp/ubm || exit 1; + steps/train_sgmm2.sh --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM data/train data/lang exp/tri3_ali exp/ubm/final.ubm exp/sgmm2 || exit 1; + echo "SGMM training done." + + echo "Decoding the development and test sets using SGMM models" + utils/mkgraph.sh data/lang_test exp/sgmm2 exp/sgmm2/graph || exit 1; + steps/decode_sgmm2.sh --nj $decode_nj --cmd "$decode_cmd" --transform-dir exp/tri3/decode_devel exp/sgmm2/graph data/devel exp/sgmm2/decode_devel || exit 1; + steps/decode_sgmm2.sh --nj $decode_nj --cmd "$decode_cmd" --transform-dir exp/tri3/decode_test exp/sgmm2/graph data/test exp/sgmm2/decode_test || exit 1; + echo "SGMM decoding done." +fi + +if [ $stage -le 8 ]; then + echo "Starting DNN training and decoding." + local/nnet/run_dnn.sh || exit 1; + local/nnet/run_dnn_fbank.sh || exit 1; +fi + +#score +for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done diff --git a/egs/fame/s5/steps b/egs/fame/s5/steps new file mode 120000 index 00000000000..6e99bf5b5ad --- /dev/null +++ b/egs/fame/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/fame/s5/utils b/egs/fame/s5/utils new file mode 120000 index 00000000000..b240885218f --- /dev/null +++ b/egs/fame/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh index 380a8aec936..ad650cd390e 100755 --- a/egs/fisher_callhome_spanish/s5/run.sh +++ b/egs/fisher_callhome_spanish/s5/run.sh @@ -256,7 +256,7 @@ steps/train_mmi_sgmm2.sh \ ( utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph -steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ +steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 12"\ exp/tri5a/graph data/dev exp/tri5a/decode_dev utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph @@ -274,9 +274,9 @@ done dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \ - --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 --mem 2G") + --parallel-opts "--num-threads 16" --cmd "queue.pl --mem 2G") dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \ - --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 --mem 2G") + --parallel-opts "--gpu 1" --cmd "queue.pl --mem 2G") steps/nnet2/train_pnorm_ensemble.sh \ --mix-up 5000 --initial-learning-rate 0.008 --final-learning-rate 0.0008\ @@ -287,7 +287,7 @@ steps/nnet2/train_pnorm_ensemble.sh \ data/train data/lang exp/tri5a_ali exp/tri6a_dnn ( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev exp/tri5a/graph data/dev exp/tri6a_dnn/decode_dev ) & wait diff --git a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh index eae5f7b8581..210d0f5646f 100755 --- a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh +++ b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh @@ -21,7 +21,7 @@ EOF . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ "$USER" == dpovey ]; then diff --git a/egs/fisher_english/s5/local/online/run_nnet2.sh b/egs/fisher_english/s5/local/online/run_nnet2.sh index 0b9adb7d315..de4d56bb52e 100755 --- a/egs/fisher_english/s5/local/online/run_nnet2.sh +++ b/egs/fisher_english/s5/local/online/run_nnet2.sh @@ -21,7 +21,7 @@ If you want to use GPUs (and have them), go to src/, and configure and make on a where "nvcc" is installed. EOF fi -parallel_opts="-l gpu=1" +parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 dir=exp/nnet2_online/nnet_a diff --git a/egs/fisher_english/s5/local/online/run_nnet2_b.sh b/egs/fisher_english/s5/local/online/run_nnet2_b.sh index 7eac7cf0a7d..e1491a10c0b 100755 --- a/egs/fisher_english/s5/local/online/run_nnet2_b.sh +++ b/egs/fisher_english/s5/local/online/run_nnet2_b.sh @@ -19,22 +19,22 @@ set -e if $use_gpu; then if ! cuda-compiled; then - cat <0){ seen[$1]=1; } } +cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ > $cleantext || exit 1; @@ -75,7 +59,7 @@ train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; train_lm.sh --arpa --lmtype 4gram-mincount $dir || exit 1; # note: output is -# data/local/lm/3gram-mincount/lm_unpruned.gz +# data/local/lm/3gram-mincount/lm_unpruned.gz exit 0 @@ -97,7 +81,7 @@ cat $dir/word_map | awk '{print $1}' | cat - <(echo ""; echo "" ) > $sdir ngram-count -text $sdir/train -order 3 -limit-vocab -vocab $sdir/wordlist -unk \ -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz -ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout +ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout # data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for in closed-vocabulary LM # file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs @@ -106,7 +90,7 @@ ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout # Note: perplexity SRILM gives to Kaldi-LM model is similar to what kaldi-lm reports above. # Difference in WSJ must have been due to different treatment of . -ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout +ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout # data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for in closed-vocabulary LM # file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs diff --git a/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh index 4d083d61d0e..43cdd2cef65 100644 --- a/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh +++ b/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh @@ -92,7 +92,7 @@ for line in sys.stdin.readlines(): # Take the first 30k utterances (about 1/8th of the data) this will be used # for the diagubm training utils/subset_data_dir.sh --first data/${train_set}_hires 30000 data/${train_set}_30k_hires - local/remove_dup_utts.sh 200 data/${train_set}_30k_hires data/${train_set}_30k_nodup_hires # 33hr + utils/data/remove_dup_utts.sh 200 data/${train_set}_30k_hires data/${train_set}_30k_nodup_hires # 33hr fi # ivector extractor training diff --git a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh index 4afa867503a..324061aa5ac 100644 --- a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh @@ -8,7 +8,7 @@ set -o pipefail # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# . ./cmd.sh @@ -38,27 +38,21 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.00000125 max_param_change=1 num_jobs_nnet=4 num_epochs=2 -regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -modify_learning_rates=true -last_layer_factor=0.1 - ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. if $use_gpu; then if ! cuda-compiled; then - cat <" + echo "Usage: $0 " + echo "e.g.: $0 /export/corpora/LDC/LDC2007S10" echo "See comments in the script for more details" exit 1 fi @@ -19,7 +20,7 @@ sdir=$1 [ ! -d $sdir/data/references/eval03/english/cts ] \ && echo Expecting directory $tdir/data/references/eval03/english/cts to be present && exit 1; -. path.sh +. path.sh dir=data/local/rt03 mkdir -p $dir @@ -37,7 +38,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -47,7 +48,7 @@ awk -v sph2pipe=$sph2pipe '{ # sw02001-A_000098-001156 sw02001-A 0.98 11.56 #pem=$sdir/english/hub5e_00.pem #[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 #grep -v ';;' $pem \ @@ -59,7 +60,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \ | sort -u > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. #grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -77,7 +78,7 @@ cat $tdir/*.stm | \ grep -v inter_segment_gap | \ awk '{ printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\ - > $dir/stm + > $dir/stm #$tdir/reference/hub5e00.english.000405.stm > $dir/stm cp $rtroot/data/trans_rules/en20030506.glm $dir/glm @@ -87,10 +88,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -110,4 +111,3 @@ done echo Data preparation and formatting completed for RT-03 echo "(but not MFCC extraction)" - diff --git a/egs/fisher_swbd/s5/path.sh b/egs/fisher_swbd/s5/path.sh index e14c6074f6b..2d17b17a84a 100755 --- a/egs/fisher_swbd/s5/path.sh +++ b/egs/fisher_swbd/s5/path.sh @@ -1,6 +1,6 @@ -export KALDI_ROOT=`pwd`/../../../ -export PWD=`pwd` -export PATH=$KALDI_ROOT/src/ivectorbin:$PWD/stanford-utils:$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$KALDI_ROOT/tools/kaldi_lm:$KALDI_ROOT/tools/srilm/bin:$KALDI_ROOT/tools/srilm/bin/i686-m64:$PATH +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh export LC_ALL=C diff --git a/egs/fisher_swbd/s5/run.sh b/egs/fisher_swbd/s5/run.sh index 5addefc5fe1..0b0d7b9401d 100755 --- a/egs/fisher_swbd/s5/run.sh +++ b/egs/fisher_swbd/s5/run.sh @@ -1,12 +1,18 @@ #!/bin/bash # It's best to run the commands in this one by one. - . ./cmd.sh . ./path.sh mfccdir=mfcc set -e rescore=true + +# check for kaldi_lm +which get_word_map.pl > /dev/null +if [ $? -ne 0 ]; then + echo "This recipe requires installation of tools/kaldi_lm. Please run extras/kaldi_lm.sh in tools/" && exit 1; +fi + # prepare fisher data and put it under data/train_fisher local/fisher_data_prep.sh /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19 \ /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13 @@ -40,7 +46,7 @@ for f in spk2utt utt2spk wav.scp text segments reco2file_and_channel; do done # LM for train_all -local/fisher_train_lms.sh +local/fisher_train_lms.sh #local/fisher_create_test_lang.sh # Compiles G for trigram LM LM=data/local/lm/3gram-mincount/lm_unpruned.gz @@ -58,7 +64,7 @@ fi #local/eval2000_data_prep.sh /scail/group/deeplearning/speech/datasets/LDC2002S09/hub5e_00/ /scail/group/deeplearning/speech/datasets/LDC2002T43 || exit 1 local/eval2000_data_prep.sh /export/corpora/LDC/LDC2002S09/hub5e_00 /export/corpora/LDC/LDC2002T43 || exit 1 - + #local/rt03_data_prep.sh /scail/group/deeplearning/speech/datasets/rt_03 || exit 1 local/rt03_data_prep.sh /export/corpora/LDC/LDC2007S10 || exit 1 @@ -66,6 +72,12 @@ utils/fix_data_dir.sh data/train_all # Make MFCCs for the training set +# spread the mfccs over various machines, as this data-set is quite large. +if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then + mfcc=$(basename $mfccdir) # in case was absolute pathname (unlikely), get basename. + utils/create_split_dir.pl /export/b{05,06,07,08}/$USER/kaldi-data/egs/fisher_swbd/s5/$mfcc/storage \ + $mfccdir/storage +fi steps/make_mfcc.sh --nj 100 --cmd "$train_cmd" data/train_all exp/make_mfcc/train_all $mfccdir || exit 1; utils/fix_data_dir.sh data/train_all utils/validate_data_dir.sh data/train_all @@ -111,31 +123,31 @@ utils/data/remove_dup_utts.sh 200 data/train_30k data/train_30k_nodup utils/data/remove_dup_utts.sh 200 data/train_100k data/train_100k_nodup utils/data/remove_dup_utts.sh 300 data/train data/train_nodup -# The next commands are not necessary for the scripts to run, but increase -# efficiency of data access by putting the mfcc's of the subset +# The next commands are not necessary for the scripts to run, but increase +# efficiency of data access by putting the mfcc's of the subset # in a contiguous place in a file. -( . path.sh; +( . path.sh; # make sure mfccdir is defined as above.. - cp data/train_10k_nodup/feats.scp{,.bak} + cp data/train_10k_nodup/feats.scp{,.bak} copy-feats scp:data/train_10k_nodup/feats.scp ark,scp:$mfccdir/kaldi_fish_10k_nodup.ark,$mfccdir/kaldi_fish_10k_nodup.scp \ && cp $mfccdir/kaldi_fish_10k_nodup.scp data/train_10k_nodup/feats.scp ) -( . path.sh; +( . path.sh; # make sure mfccdir is defined as above.. - cp data/train_30k_nodup/feats.scp{,.bak} + cp data/train_30k_nodup/feats.scp{,.bak} copy-feats scp:data/train_30k_nodup/feats.scp ark,scp:$mfccdir/kaldi_fish_30k_nodup.ark,$mfccdir/kaldi_fish_30k_nodup.scp \ && cp $mfccdir/kaldi_fish_30k_nodup.scp data/train_30k_nodup/feats.scp ) -( . path.sh; +( . path.sh; # make sure mfccdir is defined as above.. - cp data/train_100k_nodup/feats.scp{,.bak} + cp data/train_100k_nodup/feats.scp{,.bak} copy-feats scp:data/train_100k_nodup/feats.scp ark,scp:$mfccdir/kaldi_fish_100k_nodup.ark,$mfccdir/kaldi_fish_100k_nodup.scp \ && cp $mfccdir/kaldi_fish_100k_nodup.scp data/train_100k_nodup/feats.scp ) # Start training on the Switchboard subset, which has cleaner alignments steps/train_mono.sh --nj 3 --cmd "$train_cmd" \ - data/train_10k_nodup data/lang_nosp exp/mono0a + data/train_10k_nodup data/lang_nosp exp/mono0a steps/align_si.sh --nj 10 --cmd "$train_cmd" \ data/train_30k_nodup data/lang_nosp exp/mono0a exp/mono0a_ali || exit 1; @@ -171,8 +183,8 @@ steps/align_si.sh --nj 50 --cmd "$train_cmd" \ steps/train_deltas.sh --cmd "$train_cmd" \ 5500 90000 data/train_100k_nodup data/lang_nosp exp/tri1b_ali exp/tri2 || exit 1; #used to be 2500 20000 on 30k -( - graph_dir=exp/tri2/graph_nosp_fsh_sw1_tg +( + graph_dir=exp/tri2/graph_nosp_fsh_sw1_tg utils/mkgraph.sh data/lang_nosp_fsh_sw1_tg exp/tri2 $graph_dir || exit 1; steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/eval2000 exp/tri2/decode_eval2000_nosp_fsh_sw1_tg || exit 1; @@ -180,7 +192,7 @@ steps/train_deltas.sh --cmd "$train_cmd" \ $graph_dir data/rt03 exp/tri2/decode_rt03_nosp_fsh_sw1_tg || exit 1; )& -# Train tri3a, the last speaker-independent triphone stage, +# Train tri3a, the last speaker-independent triphone stage, # on the whole Switchboard training set steps/align_si.sh --nj 100 --cmd "$train_cmd" \ data/train_swbd data/lang_nosp exp/tri2 exp/tri2_ali || exit 1; @@ -189,8 +201,8 @@ steps/train_deltas.sh --cmd "$train_cmd" \ 11500 200000 data/train_swbd data/lang_nosp exp/tri2_ali exp/tri3a || exit 1; #used to be 2500 20000 -( - graph_dir=exp/tri3a/graph_nosp_fsh_sw1_tg +( + graph_dir=exp/tri3a/graph_nosp_fsh_sw1_tg utils/mkgraph.sh data/lang_nosp_fsh_sw1_tg exp/tri3a $graph_dir || exit 1; steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/eval2000 exp/tri3a/decode_eval2000_nosp_fsh_sw1_tg || exit 1; @@ -205,8 +217,8 @@ steps/align_si.sh --nj 100 --cmd "$train_cmd" \ steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ 11500 400000 data/train_nodup data/lang_nosp exp/tri3a_ali exp/tri3b || exit 1; -( - graph_dir=exp/tri3b/graph_nosp_fsh_sw1_tg +( + graph_dir=exp/tri3b/graph_nosp_fsh_sw1_tg utils/mkgraph.sh data/lang_nosp_fsh_sw1_tg exp/tri3b $graph_dir || exit 1; steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/eval2000 exp/tri3b/decode_eval2000_nosp_fsh_sw1_tg || exit 1; @@ -232,16 +244,16 @@ if [ $rescore ]; then utils/build_const_arpa_lm.sh $LM_fg data/lang data/lang_fsh_sw1_fg fi -( +( graph_dir=exp/tri3b/graph_fsh_sw1_tg utils/mkgraph.sh data/lang_fsh_sw1_tg exp/tri3b $graph_dir || exit 1; steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/eval2000 exp/tri3b/decode_eval2000_fsh_sw1_tg || exit 1; steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/rt03 exp/tri3b/decode_rt03_fsh_sw1_tg || exit 1; -) & +)& -# Next we'll use fMLLR and train with SAT (i.e. on +# Next we'll use fMLLR and train with SAT (i.e. on # fMLLR features) steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ @@ -250,7 +262,7 @@ steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ steps/train_sat.sh --cmd "$train_cmd" \ 11500 800000 data/train_nodup data/lang exp/tri3b_ali exp/tri4a || exit 1; -( +( graph_dir=exp/tri4a/graph_fsh_sw1_tg utils/mkgraph.sh data/lang_fsh_sw1_tg exp/tri4a $graph_dir steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ @@ -272,11 +284,10 @@ fi steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ data/train_nodup data/lang exp/tri4a exp/tri4a_ali || exit 1; - steps/train_sat.sh --cmd "$train_cmd" \ 11500 1600000 data/train_nodup data/lang exp/tri4a_ali exp/tri5a || exit 1; -( +( graph_dir=exp/tri5a/graph_fsh_sw1_tg utils/mkgraph.sh data/lang_fsh_sw1_tg exp/tri5a $graph_dir steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ @@ -308,7 +319,7 @@ steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ steps/train_sat.sh --cmd "$train_cmd" \ 11500 3200000 data/train_nodup data/lang exp/tri5a_ali exp/tri6a || exit 1; -( +( graph_dir=exp/tri6a/graph_fsh_sw1_tg utils/mkgraph.sh data/lang_fsh_sw1_tg exp/tri6a $graph_dir steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ @@ -331,9 +342,6 @@ fi #steps/align_fmllr.sh --nj 200 --cmd "$train_cmd" \ # data/train_nodup data/lang exp/tri6a exp/tri6a_ali || exit 1; - -# # The following is the current online-nnet2 recipe, with "multi-splice". +# The following is the current online-nnet2 recipe, with "multi-splice". # local/online/run_nnet2_ms.sh local/online/run_nnet2_ms.sh - - diff --git a/egs/gale_arabic/README.txt b/egs/gale_arabic/README.txt index db436f11e8c..928fca8fdf3 100644 --- a/egs/gale_arabic/README.txt +++ b/egs/gale_arabic/README.txt @@ -10,8 +10,13 @@ GALE Phase 2 Arabic Broadcast Conversation Speech was developed by the Linguisti The data has two types of speech: conversational and report. This script trains and test on all of them and results are reported for each of them, train data is 320 hours, 9.3 hours testing -The dictionary, and scripts can be obtained from QCRI portal: http://alt.qcri.org/ +The dictionaries, and scripts can be obtained from QCRI portal: http://alt.qcri.org/ +The experiments here are based on the above corpus -s5: The experiments here are based on the above corpus +s5: Phoneme based: +s5b: Grapheme based: This is the receommended setup; including nnet3 and chain modeling + +[1] "A Complete Kaldi Recipe For Building Arabic Speech Recognition Systems", A. Ali, Y. Zhang, P. Cardinal, N. Dahak, S. Vogel, J. Glass. SLT 2014 +[2] "QCRI Advanced Transcription Systems (QATS) for the Arabic Multi-Dialect Brodcast Media Recognition: MGB-2 Challenge", S. Khurana, A. Ali. SLT 2016 \ No newline at end of file diff --git a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh index 8e42128726f..8b93a234eec 100755 --- a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh +++ b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh @@ -59,3 +59,5 @@ awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' #rm -fr $txtdir cd $top_pwd echo data prep text succeeded + +exit 0 diff --git a/egs/gale_arabic/s5/local/gale_format_data.sh b/egs/gale_arabic/s5/local/gale_format_data.sh index 6675dd20f71..9f03b9224cf 100755 --- a/egs/gale_arabic/s5/local/gale_format_data.sh +++ b/egs/gale_arabic/s5/local/gale_format_data.sh @@ -56,3 +56,5 @@ fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ echo gale_format_data succeeded. + +exit 0 \ No newline at end of file diff --git a/egs/gale_arabic/s5/local/gale_prep_dict.sh b/egs/gale_arabic/s5/local/gale_prep_dict.sh index b46d5d5fa29..74ef789eda7 100755 --- a/egs/gale_arabic/s5/local/gale_prep_dict.sh +++ b/egs/gale_arabic/s5/local/gale_prep_dict.sh @@ -30,3 +30,4 @@ sort -u > $dir/nonsilence_phones.txt || exit 1; echo Dictionary preparation succeeded +exit 0 diff --git a/egs/gale_arabic/s5/local/gale_train_lms.sh b/egs/gale_arabic/s5/local/gale_train_lms.sh index 838e7a26136..1b5d4665a19 100755 --- a/egs/gale_arabic/s5/local/gale_train_lms.sh +++ b/egs/gale_arabic/s5/local/gale_train_lms.sh @@ -112,3 +112,5 @@ fi echo train lm succeeded + +exit 0 \ No newline at end of file diff --git a/egs/gale_arabic/s5/local/online/run_nnet2.sh b/egs/gale_arabic/s5/local/online/run_nnet2.sh index 8ccbda5a8dc..0db62242459 100644 --- a/egs/gale_arabic/s5/local/online/run_nnet2.sh +++ b/egs/gale_arabic/s5/local/online/run_nnet2.sh @@ -18,23 +18,23 @@ decode_nj=30 if $use_gpu; then if ! cuda-compiled; then - cat <" data/local/lang data/lang +utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; # LM training -local/gale_train_lms.sh +local/gale_train_lms.sh || exit 1; -local/gale_format_data.sh +local/gale_format_data.sh || exit 1; # G compilation, check LG composition # Now make MFCC features. diff --git a/egs/gale_arabic/s5b/RESULTS b/egs/gale_arabic/s5b/RESULTS new file mode 100644 index 00000000000..2260a106654 --- /dev/null +++ b/egs/gale_arabic/s5b/RESULTS @@ -0,0 +1,72 @@ +## +# This file is generated using local/split_wer.sh $galeData //galeData is a local folder to keep intermediate gale data +# look at the end of run.sh in the same folder +## +##### RESULTS generated by amali at 2017-01-01-08-05-59 + +Report Results WER: +%WER 9.50 [ 2124 / 22363, 160 ins, 275 del, 1689 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_report_9 +%WER 10.72 [ 2398 / 22363, 163 ins, 313 del, 1922 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_report_9 +%WER 12.04 [ 2693 / 22363, 226 ins, 271 del, 2196 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_report_9 +%WER 12.29 [ 2749 / 22363, 273 ins, 266 del, 2210 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_report_10 +%WER 17.82 [ 3986 / 22363, 315 ins, 618 del, 3053 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_report_12 +%WER 18.15 [ 4059 / 22363, 335 ins, 589 del, 3135 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_report_11 +%WER 18.42 [ 4119 / 22363, 346 ins, 590 del, 3183 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_report_11 +%WER 18.69 [ 4179 / 22363, 304 ins, 640 del, 3235 sub ] exp/sgmm_5a_mmi_b0.1/decode2/wer_report_13 +%WER 19.06 [ 4263 / 22363, 348 ins, 611 del, 3304 sub ] exp/sgmm_5a_mmi_b0.1/decode1/wer_report_12 +%WER 19.24 [ 4302 / 22363, 315 ins, 580 del, 3407 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_report_12 +%WER 19.37 [ 4331 / 22363, 319 ins, 553 del, 3459 sub ] exp/tri2b_mmi/decode_it4/wer_report_12 +%WER 19.61 [ 4386 / 22363, 348 ins, 563 del, 3475 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_report_12 +%WER 19.71 [ 4408 / 22363, 301 ins, 607 del, 3500 sub ] exp/tri2b_mmi/decode_it3/wer_report_13 +%WER 19.81 [ 4429 / 22363, 349 ins, 667 del, 3413 sub ] exp/sgmm_5a/decode/wer_report_14 +%WER 20.14 [ 4503 / 22363, 399 ins, 647 del, 3457 sub ] exp/tri2b_mpe/decode_it4/wer_report_14 +%WER 20.58 [ 4603 / 22363, 408 ins, 658 del, 3537 sub ] exp/tri2b_mpe/decode_it3/wer_report_14 +%WER 21.64 [ 4839 / 22363, 498 ins, 614 del, 3727 sub ] exp/tri3b/decode/wer_report_17 +%WER 23.32 [ 5214 / 22363, 470 ins, 727 del, 4017 sub ] exp/tri2b/decode/wer_report_16 +%WER 23.54 [ 5265 / 22363, 444 ins, 794 del, 4027 sub ] exp/tri3b/decode.si/wer_report_17 +%WER 25.66 [ 5738 / 22363, 478 ins, 838 del, 4422 sub ] exp/tri2a/decode/wer_report_14 +%WER 26.38 [ 5900 / 22363, 435 ins, 929 del, 4536 sub ] exp/tri1/decode/wer_report_15 +Conversational Results WER: +%WER 21.59 [ 10213 / 47305, 944 ins, 3092 del, 6177 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_conversational_9 +%WER 24.77 [ 11716 / 47305, 1098 ins, 3579 del, 7039 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_conversational_9 +%WER 26.78 [ 12670 / 47305, 1741 ins, 2434 del, 8495 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_conversational_9 +%WER 27.55 [ 13032 / 47305, 1800 ins, 2666 del, 8566 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_conversational_11 +%WER 34.10 [ 16133 / 47305, 1903 ins, 3245 del, 10985 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_conversational_11 +%WER 34.81 [ 16466 / 47305, 2077 ins, 3037 del, 11352 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_conversational_10 +%WER 35.19 [ 16648 / 47305, 1933 ins, 3264 del, 11451 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_conversational_11 +%WER 35.63 [ 16857 / 47305, 1988 ins, 3247 del, 11622 sub ] exp/sgmm_5a_mmi_b0.1/decode2/wer_conversational_11 +%WER 36.23 [ 17137 / 47305, 2091 ins, 3256 del, 11790 sub ] exp/sgmm_5a_mmi_b0.1/decode1/wer_conversational_11 +%WER 37.40 [ 17691 / 47305, 2150 ins, 3362 del, 12179 sub ] exp/sgmm_5a/decode/wer_conversational_12 +%WER 37.95 [ 17951 / 47305, 1738 ins, 3892 del, 12321 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_conversational_11 +%WER 37.97 [ 17960 / 47305, 1890 ins, 4212 del, 11858 sub ] exp/tri2b_mpe/decode_it4/wer_conversational_13 +%WER 38.16 [ 18050 / 47305, 1678 ins, 4083 del, 12289 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_conversational_12 +%WER 38.47 [ 18200 / 47305, 1804 ins, 3698 del, 12698 sub ] exp/tri2b_mmi/decode_it4/wer_conversational_11 +%WER 38.50 [ 18213 / 47305, 1958 ins, 4156 del, 12099 sub ] exp/tri2b_mpe/decode_it3/wer_conversational_13 +%WER 38.51 [ 18215 / 47305, 1993 ins, 3476 del, 12746 sub ] exp/tri2b_mmi/decode_it3/wer_conversational_11 +%WER 39.26 [ 18574 / 47305, 2319 ins, 3963 del, 12292 sub ] exp/tri3b/decode/wer_conversational_17 +%WER 41.40 [ 19586 / 47305, 2140 ins, 4216 del, 13230 sub ] exp/tri3b/decode.si/wer_conversational_15 +%WER 42.23 [ 19979 / 47305, 2153 ins, 4354 del, 13472 sub ] exp/tri2b/decode/wer_conversational_15 +%WER 45.92 [ 21724 / 47305, 1995 ins, 5213 del, 14516 sub ] exp/tri2a/decode/wer_conversational_14 +%WER 46.86 [ 22166 / 47305, 2212 ins, 4819 del, 15135 sub ] exp/tri1/decode/wer_conversational_13 +Combined Results for Reports and Conversational WER: +%WER 17.64 [ 12286 / 69668, 1310 ins, 2807 del, 8169 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_8 +%WER 20.26 [ 14114 / 69668, 1261 ins, 3892 del, 8961 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_9 +%WER 22.05 [ 15363 / 69668, 1967 ins, 2705 del, 10691 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_9 +%WER 22.66 [ 15786 / 69668, 2047 ins, 2955 del, 10784 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_11 +%WER 28.89 [ 20127 / 69668, 2244 ins, 3829 del, 14054 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_11 +%WER 29.48 [ 20541 / 69668, 2243 ins, 3860 del, 14438 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_11 +%WER 29.81 [ 20767 / 69668, 2279 ins, 3854 del, 14634 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_11 +%WER 30.22 [ 21056 / 69668, 2165 ins, 4095 del, 14796 sub ] exp/sgmm_5a_mmi_b0.1/decode2/wer_12 +%WER 30.74 [ 21417 / 69668, 2273 ins, 4099 del, 15045 sub ] exp/sgmm_5a_mmi_b0.1/decode1/wer_12 +%WER 31.78 [ 22142 / 69668, 2547 ins, 3990 del, 15605 sub ] exp/sgmm_5a/decode/wer_12 +%WER 31.95 [ 22259 / 69668, 2092 ins, 4413 del, 15754 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_11 +%WER 32.20 [ 22436 / 69668, 2026 ins, 4646 del, 15764 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_12 +%WER 32.25 [ 22471 / 69668, 2315 ins, 4797 del, 15359 sub ] exp/tri2b_mpe/decode_it4/wer_13 +%WER 32.36 [ 22542 / 69668, 2156 ins, 4184 del, 16202 sub ] exp/tri2b_mmi/decode_it4/wer_11 +%WER 32.50 [ 22640 / 69668, 2393 ins, 3956 del, 16291 sub ] exp/tri2b_mmi/decode_it3/wer_11 +%WER 32.79 [ 22847 / 69668, 2407 ins, 4760 del, 15680 sub ] exp/tri2b_mpe/decode_it3/wer_13 +%WER 33.61 [ 23413 / 69668, 2817 ins, 4577 del, 16019 sub ] exp/tri3b/decode/wer_17 +%WER 35.73 [ 24894 / 69668, 2630 ins, 4944 del, 17320 sub ] exp/tri3b/decode.si/wer_15 +%WER 36.17 [ 25196 / 69668, 2429 ins, 5393 del, 17374 sub ] exp/tri2b/decode/wer_16 +%WER 39.42 [ 27462 / 69668, 2473 ins, 6051 del, 18938 sub ] exp/tri2a/decode/wer_14 +%WER 40.35 [ 28113 / 69668, 2713 ins, 5635 del, 19765 sub ] exp/tri1/decode/wer_13 diff --git a/egs/gale_arabic/s5b/cmd.sh b/egs/gale_arabic/s5b/cmd.sh new file mode 100755 index 00000000000..71dd849a93b --- /dev/null +++ b/egs/gale_arabic/s5b/cmd.sh @@ -0,0 +1,15 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd="queue.pl --mem 2G" +export decode_cmd="queue.pl --mem 4G" +export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/gale_arabic/s5b/conf/decode.config b/egs/gale_arabic/s5b/conf/decode.config new file mode 100644 index 00000000000..6f503eab35e --- /dev/null +++ b/egs/gale_arabic/s5b/conf/decode.config @@ -0,0 +1 @@ +link decode_dnn.config \ No newline at end of file diff --git a/egs/gale_arabic/s5b/conf/mfcc.conf b/egs/gale_arabic/s5b/conf/mfcc.conf new file mode 100644 index 00000000000..7361509099f --- /dev/null +++ b/egs/gale_arabic/s5b/conf/mfcc.conf @@ -0,0 +1 @@ +--use-energy=false # only non-default option. diff --git a/egs/gale_arabic/s5b/conf/mfcc_hires.conf b/egs/gale_arabic/s5b/conf/mfcc_hires.conf new file mode 100644 index 00000000000..c45f2b691a9 --- /dev/null +++ b/egs/gale_arabic/s5b/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=16000 +--num-mel-bins=40 +--num-ceps=40 +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/gale_arabic/s5b/conf/online_cmvn.conf b/egs/gale_arabic/s5b/conf/online_cmvn.conf new file mode 100644 index 00000000000..cbdaf5f281c --- /dev/null +++ b/egs/gale_arabic/s5b/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh diff --git a/egs/gale_arabic/s5b/local/bad_segments b/egs/gale_arabic/s5b/local/bad_segments new file mode 100644 index 00000000000..c3413f0714c --- /dev/null +++ b/egs/gale_arabic/s5b/local/bad_segments @@ -0,0 +1,10 @@ +ARABIYA_FROMIRAQ_ARB_20070302_175801_2326286_2327450 +ARABIYA_BILARABI_ARB_20061005_201400_221375_223694 +LBC_NAHAR_ARB_20060911_142800_3683267_3685290 +LBC_NAHAR_ARB_20070303_145800_3249800_3251128 +LBC_NAHAR_ARB_20070303_145800_3623646_3624152 +LBC_NAHAR_ARB_20070305_035800_481003_484069 +ALAM_WITHEVENT_ARB_20070227_205800_3141876_3144152 +ALAM_NEWSRPT_ARB_20070130_015801_2875054_2876396 +ALJZ_TODHARV_ARB_20060914_155800_2947717_2949041 +ALJZ_TODHARV_ARB_20070107_145800_2417848_2419238 diff --git a/egs/gale_arabic/s5b/local/chain/run_tdnn.sh b/egs/gale_arabic/s5b/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..34499362831 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1a.sh \ No newline at end of file diff --git a/egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh b/egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..f897827461c --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +#started from tedlium recipe with few edits + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train +gmm=tri2b # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=0 #default -10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1b #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +stage=18 +tarin_stage=3 +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 2 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/test_hires $dir/decode || exit 1; +fi +exit 0 diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..e604dc7e714 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,223 @@ +#!/bin/bash + +#started from tedlium recipe with few edits + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train +gmm=tri2b # the gmm for the target data gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1a #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 3 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/test_hires $dir/decode || exit 1; +fi +exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh b/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh new file mode 100755 index 00000000000..0fc667ac53a --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + + +galeData=$(readlink -f "${@: -1}" ); # last argumnet; the local folder +audio_dvds=${@:1:${#}-1} # all the audio dvds for GALE corpus; ; check audio=( in ../run.sh + +mkdir -p $galeData + +# check that sox is installed +which sox &>/dev/null +if [[ $? != 0 ]]; then + echo "sox is not installed"; exit 1 +fi + +for dvd in $audio_dvds; do + dvd_full_path=$(readlink -f $dvd) + if [[ ! -e $dvd_full_path ]]; then + echo missing $dvd_full_path; exit 1; + fi + find $dvd_full_path \( -name "*.wav" -o -name "*.flac" \) | while read file; do + id=$(basename $file | awk '{gsub(".wav","");gsub(".flac","");print}') + echo "$id sox $file -r 16000 -t wav - |" + done +done | sort -u > $galeData/wav.scp + +echo data prep audio succeded + +exit 0 + diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_split.sh b/egs/gale_arabic/s5b/local/gale_data_prep_split.sh new file mode 100755 index 00000000000..a62904a3b57 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_data_prep_split.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +if [ $# -ne 1 ]; then + echo "Arguments should be the "; exit 1 +fi + + +#data will data/local + +galeData=$(readlink -f $1) +mkdir -p data/local +dir=$(readlink -f data/local) + + +grep -f local/test_list $galeData/all | grep -v -f local/bad_segments > $galeData/all.test +grep -v -f local/test_list $galeData/all | grep -v -f local/bad_segments > $galeData/all.train + +for x in test train; do + outdir=$dir/$x + file=$galeData/all.$x + mkdir -p $outdir + awk '{print $2 " " $2}' $file | sort -u > $outdir/utt2spk + cp -pr $outdir/utt2spk $outdir/spk2utt + awk '{print $2 " " $1 " " $3 " " $4}' $file | sort -u > $outdir/segments + awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $file | sort -u > $outdir/text +done + + +grep -f local/test_list $galeData/wav.scp > $dir/test/wav.scp + +cat $galeData/wav.scp | awk -v seg=$dir/train/segments 'BEGIN{while((getline0) {seen[$2]=1;}} + {if (seen[$1]) { print $0}}' > $dir/train/wav.scp + +echo data prep split succeeded + +exit 0 \ No newline at end of file diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh b/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh new file mode 100755 index 00000000000..14d7241d4c1 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +galeData=$(readlink -f "${@: -1}" ); # last argumnet; the local folder +txt_dvds=${@:1:${#}-1} # all the txt cds correspoding to the audio corpus; check text=( in ../run.sh + + +top_pwd=`pwd` +txtdir=$galeData/txt +mkdir -p $txtdir; cd $txtdir + +for cdx in $txt_dvds; do + echo "Preparing $cdx" + if [[ $cdx == *.tgz ]] ; then + tar -xvf $cdx + elif [ -d "$cdx" ]; then + ln -s $cdx `basename $cdx` + else + echo "I don't really know what I shall do with $cdx " >&2 + fi +done + +find -L . -type f -name "*.tdf" | while read file; do +sed '1,3d' $file # delete the first 3 lines +done > all.tmp$$ + +perl -e ' + ($inFile,$idFile,$txtFile)= split /\s+/, $ARGV[0]; + open(IN, "$inFile"); + open(ID, ">$idFile"); + open(TXT, ">$txtFile"); + while () { + @arr= split /\t/,$_; + $start=sprintf ("%0.3f",$arr[2]);$rStart=$start;$start=~s/\.//; $start=~s/^0+$/0/; $start=~s/^0+([^0])/$1/; # remove zeros at the beginning + $end=sprintf ("%0.3f",$arr[3]);$rEnd=$end;$end=~s/^0+([^0])/$1/;$end=~s/\.//; + if ( ($arr[11] !~ m/report/) && ($arr[11] !~ m/conversational/) ){$arr[11]="UNK";} + $id="$arr[11] $arr[0] $arr[0]_${start}_${end} $rStart $rEnd\n"; + next if ($rStart == $rEnd); + $id =~ s/.sph//g; + print ID $id; + print TXT "$arr[7]\n"; + }' "all.tmp$$ allid.tmp$$ contentall.tmp$$" + + +perl ${top_pwd}/local/normalize_transcript_BW.pl contentall.tmp$$ contentall.buck.tmp$$ + +paste allid.tmp$$ contentall.buck.tmp$$ | sed 's: $::' | awk '{if (NF>5) {print $0}}' > all_1.tmp$$ + +awk '{$1="";print $0}' all_1.tmp$$ | sed 's:^ ::' > $galeData/all +awk '{if ($1 == "report") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $galeData/report +awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $galeData/conversational + +cd ..; +rm -fr $txtdir +cd $top_pwd +echo data prep text succeeded + +exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_format_data.sh b/egs/gale_arabic/s5b/local/gale_format_data.sh new file mode 100755 index 00000000000..a572b8194a3 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_format_data.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +if [ -f path.sh ]; then + . path.sh; else + echo "$0: missing path.sh"; exit 1; +fi + +for dir in test train; do + cp -pr data/local/$dir data/$dir +done + + +mkdir -p data/lang_test + +arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz +[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1; + +rm -r data/lang_test +cp -r data/lang data/lang_test + +gunzip -c "$arpa_lm" | \ + arpa2fst --disambig-symbol=#0 \ + --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst + + +echo "$0: Checking how stochastic G is (the first of these numbers should be small):" +fstisstochastic data/lang_test/G.fst + +## Check lexicon. +## just have a look and make sure it seems sane. +echo "$0: First few lines of lexicon FST:" +fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head + +echo "$0: Performing further checks" + +# Checking that G.fst is determinizable. +fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G. + +# Checking that L_disambig.fst is determinizable. +fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L. + +# Checking that disambiguated lexicon times G is determinizable +# Note: we do this with fstdeterminizestar not fstdeterminize, as +# fstdeterminize was taking forever (presumbaly relates to a bug +# in this version of OpenFst that makes determinization slow for +# some case). +fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \ + fstdeterminizestar >/dev/null || echo Error + +# Checking that LG is stochastic: +fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ + fstisstochastic || echo LG is not stochastic + + +echo gale_format_data succeeded. + +exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh b/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh new file mode 100755 index 00000000000..0162eb49330 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright 2017 QCRI (author: Ahmed Ali) +# Apache 2.0 + + +# run this from ../ +dir=$(readlink -f data/local/dict) +mkdir -p $dir + + +# (1) Get all avaialble dictionaries, since this is a grapheme model, so we mainly need the most frequent word lists +wget http://alt.qcri.org//resources/speech/dictionary/ar-ar_grapheme_lexicon_2016-02-09.bz2 || exit 1; +wget http://alt.qcri.org//resources/speech/dictionary/ar-ar_lexicon_2014-03-17.txt.bz2 || exit 1; +bzcat ar-ar_grapheme_lexicon_2016-02-09.bz2 | sed '1,3d' | awk '{print $1}' > tmp$$ +bzcat ar-ar_lexicon_2014-03-17.txt.bz2 | sed '1,3d' | awk '{print $1}' >> tmp$$ +# (2) Now we add all the words appeared in the training data +cat data/local/train/text | cut -d ' ' -f 2- | tr -s " " "\n" | sort -u >> tmp$$ +grep -v [0-9] tmp$$ | sed -e 's:[FNKaui\~o\`]::g' -e 's:{:}:g' | sort -u > tmp1.$$ # remove vowels and rare alef wasla +cat tmp1.$$ | sed 's:\(\):\1 :g' | sed -e 's: : :g' -e 's: : :g' -e 's:\s*: :g' -e 's:\*:V:g' > tmp2.$$ +paste -d ' ' tmp1.$$ tmp2.$$ > $dir/lexicon.txt + +#(2) Dictionary preparation: + +# silence phones, one per line. +echo SIL > $dir/silence_phones.txt +echo SIL > $dir/optional_silence.txt + +# nonsilence phones; on each line is a list of phones that correspond +# really to the same base phone. +cat tmp2.$$ | tr -s ' ' '\n' | grep -v ^$ | sort -u > $dir/nonsilence_phones.txt || exit 1; + +sed -i '1i SIL' $dir/lexicon.txt # insert word with phone sil at the begining of the dictionary + +rm -fr ar-ar_lexicon_2014-03-17.txt.bz2 ar-ar_grapheme_lexicon_2016-02-09.bz2 tmp$$ tmp1.$$ tmp2.$$ +echo Dictionary preparation succeeded + +# The script is still missing dates and numbers + +exit 0 + diff --git a/egs/gale_arabic/s5b/local/gale_train_lms.sh b/egs/gale_arabic/s5b/local/gale_train_lms.sh new file mode 100755 index 00000000000..3988ec3818f --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_train_lms.sh @@ -0,0 +1,81 @@ +#!/bin/bash + + +# To be run from one directory above this script. + + +lexicon=data/local/dict/lexicon.txt +[ ! -f $lexicon ] && echo "$0: No such file $lexicon" && exit 1; + + +# This script takes no arguments. It assumes you have already run +# previus steps successfully +# It takes as input the files +#data/local/train.*/text +#data/local/dict/lexicon.txt + + +export LC_ALL=C # You'll get errors about things being not sorted, if you +# have a different locale. +export PATH=$PATH:./../../../tools/kaldi_lm +( # First make sure the kaldi_lm toolkit is installed. + cd $KALDI_ROOT/tools || exit 1; + if [ -d kaldi_lm ]; then + echo Not installing the kaldi_lm toolkit since it is already there. + else + echo Downloading and installing the kaldi_lm tools + if [ ! -f kaldi_lm.tar.gz ]; then + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; + fi + tar -xvzf kaldi_lm.tar.gz || exit 1; + cd kaldi_lm + make || exit 1; + echo Done making the kaldi_lm tools + fi +) || exit 1; + + +dir=data/local/lm + mkdir -p $dir + text=data/local/train/text + [ ! -f $text ] && echo "$0: No such file $text" && exit 1; + + cleantext=$dir/text.no_oov + + cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } + {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ",$n);} } printf("\n");}' \ + > $cleantext || exit 1; + + + cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \ + sort -nr > $dir/word.counts || exit 1; + + +# Get counts from acoustic training transcripts, and add one-count +# for each word in the lexicon (but not silence, we don't want it +# in the LM-- we'll add it optionally later). + cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ + cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ + sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1; + +# note: we probably won't really make use of as there aren't any OOVs + cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map \ + || exit 1; + +# note: ignore 1st field of train.txt, it's the utterance-id. + cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;} + { for(n=2;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \ + || exit 1; + + train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; + +# LM is small enough that we don't need to prune it (only about 0.7M N-grams). +# Perplexity over 128254.000000 words is 90.446690 + +# note: output is +# data/local/lm/3gram-mincount/lm_unpruned.gz + + +echo train lm succeeded + +exit 0 diff --git a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..d9fc3385a42 --- /dev/null +++ b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh @@ -0,0 +1,237 @@ +#!/bin/bash + +set -e -o pipefail + +# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually +# be called by more scripts). It contains the common feature preparation and iVector-related parts +# of the script. See those scripts for examples of usage. + + +stage=0 +nj=100 +min_seg_len=1.55 # min length in seconds... we do this because chain training + # will discard segments shorter than 1.5 seconds. Must remain in sync + # with the same option given to prepare_lores_feats_and_alignments.sh +train_set=train # you might set this to e.g. train. +gmm=tri2b # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. + +num_threads_ubm=32 +nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it + # becomes exp/nnet3_cleaned or whatever. + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp_comb + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 2." + exit 1 +fi + + +if [ $stage -le 1 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp test; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp test; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 3 ]; then + echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" + # we have to combine short segments or we won't be able to train chain models + # on those segments. + utils/data/combine_short_segments.sh \ + data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb + + # just copy over the CMVN to avoid having to recompute it. + cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ + utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ +fi + +if [ $stage -le 4 ]; then + echo "$0: selecting segments of hires training data that were also present in the" + echo " ... original training data." + + # note, these data-dirs are temporary; we put them in a sub-directory + # of the place where we'll make the alignments. + temp_data_root=exp/nnet3${nnet3_affix}/tri5 + mkdir -p $temp_data_root + + utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ + data/${train_set}_sp_hires $temp_data_root/${train_set}_hires + + # note: essentially all the original segments should be in the hires data. + n1=$(wc -l /dev/null || true + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + ${graph_dir} data/test_hires ${dir}/decode || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/test_hires ${dir}/decode_test ${dir}/decode_test_rescore || exit 1 +fi + +exit 0; diff --git a/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..a6cc6e2dec8 --- /dev/null +++ b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +# started from tedlium recipe with few edits + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train +gmm=tri2b # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" +remove_egs=true +relu_dim=850 +num_epochs=3 + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < \n"; + exit (1); + } + +# <\check usage> +my $inFile = shift (@ARGV); +my $ouFile = shift(@ARGV); + + +open INFILE, "<$inFile" || die "unable to open the input file $inFile\n"; +binmode INFILE, ":encoding(utf8)"; + + +open OUTPUTFILE, ">$ouFile" or die "unable to open the output mlf file $ouFile\n"; +binmode OUTPUTFILE, ":encoding(utf8)"; + + +while () { + s/[^اأإآبتثجحخدذرزسشصضطظعغفقكلمنهويىئءؤة0-9]+/ /g; ## Removes non Arabic or numbers + my $BW = convertUTF8ToBuckwalter ($_); + print OUTPUTFILE "$BW"."\n"; +} +close INFILE; +close OUTPUTFILE; + + + +# this function is copied from MADATools.pm: MADA Tools + sub convertUTF8ToBuckwalter { + + my ($line)= (@_); + #$line = $UTF8_ENCODING_OBJ->decode($line); ## Same as Encode::decode("utf8",$line), but faster since object already created + $line =~ s/\x{0621}/\'/g; ## HAMZA + $line =~ s/\x{0622}/\|/g; ## ALEF WITH MADDA ABOVE + $line =~ s/\x{0623}/\>/g; ## ALEF WITH HAMZA ABOVE + $line =~ s/\x{0624}/\&/g; ## WAW WITH HAMZA ABOVE + $line =~ s/\x{0625}/\ " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab \ + ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; + +# Note: the double level of quoting for the sed command +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cat $dir/scoring/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + +exit 0; diff --git a/egs/gale_arabic/s5b/local/split_wer.sh b/egs/gale_arabic/s5b/local/split_wer.sh new file mode 100755 index 00000000000..70c97ae5d19 --- /dev/null +++ b/egs/gale_arabic/s5b/local/split_wer.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# Report WER for reports and conversational +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +if [ $# -ne 1 ]; then + echo "Arguments should be the gale folder, see ../run.sh for example." + exit 1; +fi + +[ -f ./path.sh ] && . ./path.sh + + +galeFolder=$(readlink -f $1) +symtab=./data/lang/words.txt +find exp/ -maxdepth 3 -type d -name decode\* > list_decode$$ + +#split the test set per type: +awk '{print $2}' $galeFolder/all.test | sort -u > $galeFolder/test_id$$ + +# generate the report test set +awk '{print $2}' $galeFolder/report | sort -u > $galeFolder/report_id$$ +comm -1 -2 $galeFolder/test_id$$ $galeFolder/report_id$$ > $galeFolder/report.test + +# generate the conversational test set +awk '{print $2}' $galeFolder/conversational | sort -u > $galeFolder/conversational_id$$ + +comm -1 -2 $galeFolder/test_id$$ $galeFolder/conversational_id$$ > $galeFolder/conversational.test + +rm -fr $galeFolder/test_id$$ $galeFolder/report_id$$ $galeFolder/conversational_id$$ + +min_lmwt=7 +max_lmwt=20 +cat list_decode$$ | while read dir; do + for type in report conversational; do + #echo "Processing: $dir $type" + rm -fr $dir/scoring_$type + cp -pr $dir/scoring $dir/scoring_$type + ( cd $dir/scoring_$type; + for x in *.tra test_filt.txt; do + sort -u $x > tmp$$ + join tmp$$ $galeFolder/${type}.test > $x + rm -fr tmp$$ + done + ) + +utils/run.pl LMWT=$min_lmwt:$max_lmwt $dir/scoring_$type/log/score.LMWT.log \ + cat $dir/scoring_${type}/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring_${type}/test_filt.txt ark,p:- ">&" $dir/wer_${type}_LMWT +done +done + + +time=$(date +"%Y-%m-%d-%H-%M-%S") +echo "RESULTS generated by $USER at $time" + +echo "Report Results WER:" +cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_report_* | utils/best_wer.sh; done | sort -n -k2 + +echo "Conversational Results WER:" +cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_conversational_* | utils/best_wer.sh; done | sort -n -k2 + +echo "Combined Results for Reports and Conversational WER:" +cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_?? $x/wer_?| utils/best_wer.sh; done | sort -n -k2 + +rm list_decode$$ + + + diff --git a/egs/gale_arabic/s5b/local/test_list b/egs/gale_arabic/s5b/local/test_list new file mode 100644 index 00000000000..d82cf498804 --- /dev/null +++ b/egs/gale_arabic/s5b/local/test_list @@ -0,0 +1,11 @@ +ALAM_WITHEVENT_ARB_20070116_205800 +ALAM_WITHEVENT_ARB_20070130_205800 +ALAM_WITHEVENT_ARB_20070206_205801 +ALAM_WITHEVENT_ARB_20070213_205800 +ALAM_WITHEVENT_ARB_20070227_205800 +ALAM_WITHEVENT_ARB_20070306_205800 +ALAM_WITHEVENT_ARB_20070313_205800 +ARABIYA_FROMIRAQ_ARB_20070216_175800 +ARABIYA_FROMIRAQ_ARB_20070223_175801 +ARABIYA_FROMIRAQ_ARB_20070302_175801 +ARABIYA_FROMIRAQ_ARB_20070309_175800 diff --git a/egs/gale_arabic/s5b/path.sh b/egs/gale_arabic/s5b/path.sh new file mode 100755 index 00000000000..be11b34cbc6 --- /dev/null +++ b/egs/gale_arabic/s5b/path.sh @@ -0,0 +1,5 @@ +export KALDI_ROOT=$(pwd)/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/gale_arabic/s5b/run.sh b/egs/gale_arabic/s5b/run.sh new file mode 100755 index 00000000000..9cc72d31a95 --- /dev/null +++ b/egs/gale_arabic/s5b/run.sh @@ -0,0 +1,167 @@ +#!/bin/bash -e + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +. path.sh +. cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. +num_jobs=120 +num_decode_jobs=40 + +#NB: You can add whatever number of copora you like. The supported extensions +#NB: (formats) are wav and flac. Flac will be converted using sox and in contrast +#NB: with the old approach, the conversion will be on-the-fly and one-time-only +#NB: during the parametrization. + +#NB: Text corpora scpecification. We support either tgz files, which are unpacked +#NB: or just plain (already unpacked) directories. The list of transcript is then +#NB: obtained using find command + +#This is CLSP configuration. We add the 2014 GALE data. We got around 2 % +#improvement just by including it. The gain might be large if someone would tweak +# the number of leaves and states and so on. + +#Make sure you edit this section to reflect whers you keep the LDC data on your cluster +audio=( + /data/sls/scratch/amali/data/GALE/LDC2013S02 + /data/sls/scratch/amali/data/GALE/LDC2013S07 + /data/sls/scratch/amali/data/GALE/LDC2014S07 +) +text=( + /data/sls/scratch/amali/data/GALE/LDC2013T17.tgz + /data/sls/scratch/amali/data/GALE/LDC2013T04.tgz + /data/sls/scratch/amali/data/GALE/LDC2014T17.tgz +) + +galeData=GALE +#prepare the data +#split train dev test +#prepare lexicon and LM + +# You can run the script from here automatically, but it is recommended to run the data preparation, +# and features extraction manually and and only once. +# By copying and pasting into your shell. + +#copy the audio files to local folder wav and convet flac files to wav +local/gale_data_prep_audio.sh "${audio[@]}" $galeData || exit 1; + +#get the transcription and remove empty prompts and all noise markers +local/gale_data_prep_txt.sh "${text[@]}" $galeData || exit 1; + +# split the data to reports and conversational and for each class will have rain/dev and test +local/gale_data_prep_split.sh $galeData || exit 1; + +# get all Arabic grapheme dictionaries and add silence and UNK +local/gale_prep_grapheme_dict.sh || exit 1; + + +#prepare the langauge resources +utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; + +# LM training +local/gale_train_lms.sh || exit 1; + +local/gale_format_data.sh || exit 1; +# G compilation, check LG composition + +# Now make MFCC features. +# mfccdir should be some place with a largish disk where you +# want to store MFCC features. +mfccdir=mfcc + +for x in train test ; do + steps/make_mfcc.sh --cmd "$train_cmd" --nj $num_jobs \ + data/$x exp/make_mfcc/$x $mfccdir + utils/fix_data_dir.sh data/$x # some files fail to get mfcc for many reasons + steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir +done + + +# Here we start the AM + +# Let's create a subset with 10k segments to make quick flat-start training: +utils/subset_data_dir.sh data/train 10000 data/train.10K || exit 1; + +# Train monophone models on a subset of the data, 10K segment +# Note: the --boost-silence option should probably be omitted by default +steps/train_mono.sh --nj 40 --cmd "$train_cmd" \ + data/train.10K data/lang exp/mono || exit 1; + + +# Get alignments from monophone system. +steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/mono exp/mono_ali || exit 1; + +# train tri1 [first triphone pass] +steps/train_deltas.sh --cmd "$train_cmd" \ + 2500 30000 data/train data/lang exp/mono_ali exp/tri1 || exit 1; + +# First triphone decoding +utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph +steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri1/graph data/test exp/tri1/decode + +steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + +# Train tri2a, which is deltas+delta+deltas +steps/train_deltas.sh --cmd "$train_cmd" \ + 3000 40000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; + +# tri2a decoding +utils/mkgraph.sh data/lang_test exp/tri2a exp/tri2a/graph +steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri2a/graph data/test exp/tri2a/decode + +# train and decode tri2b [LDA+MLLT] +steps/train_lda_mllt.sh --cmd "$train_cmd" 4000 50000 \ + data/train data/lang exp/tri1_ali exp/tri2b || exit 1; + +utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph +steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri2b/graph data/test exp/tri2b/decode + +# Align all data with LDA+MLLT system (tri2b) +steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; + + +# From 2b system, train 3b which is LDA + MLLT + SAT. +steps/train_sat.sh --cmd "$train_cmd" \ + 5000 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; + +utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph +steps/decode_fmllr.sh --nj $num_decode_jobs --cmd \ + "$decode_cmd" exp/tri3b/graph data/test exp/tri3b/decode + +# From 3b system, align all data. +steps/align_fmllr.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; + + +# nnet3 cross-entropy +local/nnet3/run_tdnn.sh #tdnn recipe: +local/nnet3/run_lstm.sh #lstm recipe: + +# chain lattice-free +local/chain/run_tdnn.sh #tdnn recipe: +local/chain/run_tdnn_lstm.sh #tdnn-lstm recipe: + +time=$(date +"%Y-%m-%d-%H-%M-%S") + +#get detailed WER; reports, conversational and combined +local/split_wer.sh $galeData > RESULTS.details.$USER.$time # to make sure you keep the results timed and owned + +echo training succedded +exit 0 + +#TODO: +#LM (4-gram and RNN) rescoring +#combine lattices +#dialect detection + + + + + diff --git a/egs/gale_arabic/s5b/steps b/egs/gale_arabic/s5b/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/gale_arabic/s5b/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/gale_arabic/s5b/utils b/egs/gale_arabic/s5b/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/gale_arabic/s5b/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file diff --git a/egs/gale_mandarin/s5/local/gale_prep_dict.sh b/egs/gale_mandarin/s5/local/gale_prep_dict.sh index f1e39fb452e..bf2391d3bd7 100755 --- a/egs/gale_mandarin/s5/local/gale_prep_dict.sh +++ b/egs/gale_mandarin/s5/local/gale_prep_dict.sh @@ -77,7 +77,8 @@ if [ ! -f conf/g2p_model ]; then fi echo "--- Preparing pronunciations for OOV words ..." -if [ ! -x g2p.py ]; then +g2p=`which g2p.py` +if [ ! -x $g2p ]; then echo "g2p.py is not found. Checkout tools/extra/install_sequitur.sh." exit 1 fi diff --git a/egs/gale_mandarin/s5/local/score.sh b/egs/gale_mandarin/s5/local/score.sh deleted file mode 100755 index 96b1e12a5f6..00000000000 --- a/egs/gale_mandarin/s5/local/score.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -orig_args= -for x in "$@"; do orig_args="$orig_args '$x'"; done - -# begin configuration section. we include all the options that score_sclite.sh or -# score_basic.sh might need, or parse_options.sh will die. -cmd=run.pl -stage=0 -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score.sh [options] " && exit; - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 - -if [ -f $data/stm ]; then # use sclite scoring. - echo "$data/stm exists: using local/score_sclite.sh" - eval local/score_sclite.sh $orig_args -else - echo "$data/stm does not exist: using local/score_basic.sh" - eval local/score_basic.sh $orig_args -fi diff --git a/egs/gale_mandarin/s5/local/score.sh b/egs/gale_mandarin/s5/local/score.sh new file mode 120000 index 00000000000..df664a0f1f1 --- /dev/null +++ b/egs/gale_mandarin/s5/local/score.sh @@ -0,0 +1 @@ +../steps/scoring/score_kaldi_cer.sh \ No newline at end of file diff --git a/egs/gale_mandarin/s5/local/score_basic.sh b/egs/gale_mandarin/s5/local/score_basic.sh deleted file mode 100755 index 171701820b7..00000000000 --- a/egs/gale_mandarin/s5/local/score_basic.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_basic.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/text $lang/words.txt $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - - -function filter_text { - perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; } - while() { @A = split(" ", $_); $id = shift @A; print "$id "; - foreach $a (@A) { if (!defined $bad{$a}) { print "$a "; }} print "\n"; }' \ - '[NOISE]' '[LAUGHTER]' '[VOCALIZED-NOISE]' '' '%HESITATION' -} - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ - lattice-best-path --lm-scale=LMWT --word-symbol-table=$lang/words.txt \ - "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/LMWT.tra || exit 1; - -for lmwt in `seq $min_lmwt $max_lmwt`; do - utils/int2sym.pl -f 2- $lang/words.txt <$dir/scoring/$lmwt.tra | \ - filter_text > $dir/scoring/$lmwt.txt || exit 1; -done - -filter_text <$data/text >$dir/scoring/text.filt - -unset LC_ALL -#for character error rate -cat $dir/scoring/text.filt | awk '{ print $1}' > $dir/scoring/utt_id -cat $dir/scoring/text.filt | awk '{{for (i = 2; i <= NF; i++) printf(" %s", $i);} printf("\n"); }' | sed -e 's/\(\S\)/\1 /g' > $dir/scoring/utt_tra -paste $dir/scoring/utt_id $dir/scoring/utt_tra > $dir/scoring/char.filt - -for lmwt in `seq $min_lmwt $max_lmwt`; do - cat $dir/scoring/$lmwt.txt | awk '{ print $1}' > $dir/scoring/utt_id - cat $dir/scoring/$lmwt.txt | awk '{{for (i = 2; i <= NF; i++) printf(" %s", $i);} printf("\n"); }' | sed -e 's/\(\S\)/\1 /g' > $dir/scoring/utt_tra - paste $dir/scoring/utt_id $dir/scoring/utt_tra > $dir/scoring/${lmwt}.char -done - -rm $dir/scoring/utt_tra $dir/scoring/utt_id - -export LC_ALL=C - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - compute-wer --text --mode=present \ - ark:$dir/scoring/text.filt ark:$dir/scoring/LMWT.txt ">&" $dir/wer_LMWT || exit 1; - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.cer.log \ - compute-wer --text --mode=present \ - ark:$dir/scoring/char.filt ark:$dir/scoring/LMWT.char ">&" $dir/cer_LMWT || exit 1; - -exit 0 diff --git a/egs/gale_mandarin/s5/local/score_sclite.sh b/egs/gale_mandarin/s5/local/score_sclite.sh deleted file mode 100755 index e7fcd8ad07a..00000000000 --- a/egs/gale_mandarin/s5/local/score_sclite.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_sclite.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -if [ $stage -le 0 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ - nbest-to-ctm ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 1 ]; then -# Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $dir/tmpf; - cat $dir/tmpf | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION' > $x; - done -fi - -if [ $stage -le 2 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cp $data/stm $dir/score_LMWT/ '&&' \ - $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm || exit 1; -fi - -exit 0 diff --git a/egs/gale_mandarin/s5/local/score_sclite_conf.sh b/egs/gale_mandarin/s5/local/score_sclite_conf.sh deleted file mode 100755 index a6a2759629d..00000000000 --- a/egs/gale_mandarin/s5/local/score_sclite_conf.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -decode_mbr=true -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_sclite_conf.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -if [ $stage -le 0 ]; then - # the escaping gets a bit crazy here, sorry... - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - ACWT=\`perl -e \"print 1.0/LMWT\;\"\` '&&' \ - lattice-align-words $lang/phones/word_boundary.int $model "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr --acoustic-scale=\$ACWT ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 1 ]; then -# Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $dir/tmpf; - cat $dir/tmpf | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION' > $x; - done -fi - -if [ $stage -le 2 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cp $data/stm $dir/score_LMWT/ '&&' \ - $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm || exit 1; -fi - -exit 0 diff --git a/egs/gale_mandarin/s5/local/wer_hyp_filter b/egs/gale_mandarin/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..a1bfdb57efc --- /dev/null +++ b/egs/gale_mandarin/s5/local/wer_hyp_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=('[NOISE]','[LAUGHTER]','[VOCALIZED-NOISE]','','%HESITATION'); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} diff --git a/egs/gale_mandarin/s5/local/wer_ref_filter b/egs/gale_mandarin/s5/local/wer_ref_filter new file mode 100755 index 00000000000..a1bfdb57efc --- /dev/null +++ b/egs/gale_mandarin/s5/local/wer_ref_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=('[NOISE]','[LAUGHTER]','[VOCALIZED-NOISE]','','%HESITATION'); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} diff --git a/egs/gale_mandarin/s5/run.sh b/egs/gale_mandarin/s5/run.sh index 74e69e9d12a..fe9fdbdd483 100755 --- a/egs/gale_mandarin/s5/run.sh +++ b/egs/gale_mandarin/s5/run.sh @@ -54,7 +54,7 @@ mfccdir=mfcc # spread the mfccs over various machines, as this data-set is quite large. if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then - mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename. + mfcc=$(basename $mfccdir) # in case was absolute pathname (unlikely), get basename. utils/create_split_dir.pl /export/b{05,06,07,08}/$USER/kaldi-data/egs/gale_mandarin/s5/$mfcc/storage \ $mfccdir/storage fi @@ -203,5 +203,3 @@ local/split_wer_per_corpus.sh $galeData >> RESULTS echo training succedded exit 0 - - diff --git a/egs/gp/s1/local/gp_train_multi_sgmm_deltas.sh b/egs/gp/s1/local/gp_train_multi_sgmm_deltas.sh deleted file mode 100755 index dfe1f211d6c..00000000000 --- a/egs/gp/s1/local/gp_train_multi_sgmm_deltas.sh +++ /dev/null @@ -1,359 +0,0 @@ -#!/bin/bash -u - -# Copyright 2012 Arnab Ghoshal -# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -# This is Subspace Gaussian Mixture Model (SGMM) training-- -# see "The subspace Gaussian mixture model--A structured model for speech recognition" -# by D. Povey et al, Computer Speech and Language, 2011. - -function error_exit () { - echo -e "$@" >&2; exit 1; -} - -function readint () { - local retval=${1/#*=/}; # In case --switch=ARG format was used -# retval=${retval#0*} # Strip any leading 0's - [[ "$retval" =~ ^-?[0-9][0-9]*$ ]] \ - || error_exit "Argument \"$retval\" not an integer." - echo $retval -} - -function est_alimodel () { -# If we have speaker vectors, we need an alignment model. This function gets -# the Gaussian-level alignments with the speaker vectors but accumulates stats -# without any speaker vectors; we re-estimate M, w, c and S to get a model -# that's compatible with not having speaker vectors. Note that the transitions -# are not updated since the decoding graph will be shared with the normal model. - local lx=$1 - for L in $LANGUAGES; do - wdir=$dir/$L - local lspkdim=`sgmm-info $wdir/$lx.mdl | grep speaker | awk '{print $NF}'` - if [ "$lspkdim" -le 0 ]; then - echo "est_alimodel: No speaker space in model '$wdir/$lx.mdl'. Returning." - return - fi - done - - local y=0; - local lflags=MwcS # First time don't update v - while [ $y -lt $numiters_alimdl ]; do - [ $y -gt 0 ] && lflags=vMwcS - echo "Pass $y of building alignment model, flags = '$lflags'" - local lmulti_est_opts='' # model, acc, model-out, occs-out tuples - for L in $LANGUAGES; do - ( - data=data/$L/train - lang=data/$L/lang - wdir=$dir/$L - local cur_alimdl=$wdir/tmp$y.alimdl - [ $y -eq 0 ] && cur_alimdl=$wdir/$lx.mdl - feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$wdir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |" - gselect_opt="--gselect=ark,s,cs:gunzip -c $wdir/TASK_ID.gselect.gz|" - spkvecs_opt="--spk-vecs=ark:$wdir/TASK_ID.vecs" - - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/acc_ali${lx}_$y.TASK_ID.log \ - $sjopts ali-to-post "ark:gunzip -c $wdir/TASK_ID.ali.gz|" ark:- \| \ - sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \ - --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk $wdir/$lx.mdl \ - "$feats" ark,s,cs:- ark:- \| \ - sgmm-acc-stats-gpost --update-flags=$lflags $cur_alimdl "$feats" \ - ark,s,cs:- $wdir/$y.TASK_ID.aliacc \ - || { touch $dir/err; \ - error_exit "$L; Align model iter $y: Error accumulating stats"; } - - # Summing accs is quite fast; run locally - sgmm-sum-accs $wdir/sum.aliacc $wdir/$y.*.aliacc || \ - { touch $dir/err; \ - error_exit "$L; Align model iter $y: Error summing stats"; } - )& # Accumulate in parallel for different languages - wdir=$dir/$L - local cur_alimdl=$wdir/tmp$y.alimdl - [ $y -eq 0 ] && cur_alimdl=$wdir/$lx.mdl - lmulti_est_opts="$lmulti_est_opts $cur_alimdl $wdir/sum.aliacc $wdir/tmp$[$y+1].alimdl $wdir/tmp$[$y+1].occs" - done - wait - - submit_jobs.sh "$qcmd" --log=$dir/log/update_ali.$y.log $sjopts \ - sgmm-est-multi --update-flags=$lflags --remove-speaker-space=true \ - $lmulti_est_opts \ - || error_exit "Error estimating alignment models on iter $y"; - - rm -f $dir/??/$y.*.aliacc $dir/??/sum.aliacc || exit 1; - [ $y -gt 0 ] && rm $dir/??/tmp$y.{alimdl,occs} - y=$[$y+1] - done - - for L in $LANGUAGES; do - mv $dir/$L/tmp$y.alimdl $dir/$L/$lx.alimdl - done -} - -nj=4 # Default number of jobs -stage=-5 # Default starting stage (start with tree building) -qcmd="" # Options for the submit_jobs.sh script -sjopts="" # Options for the submit_jobs.sh script -LANGUAGES='GE PO SP SW' # Languages processed - -PROG=`basename $0`; -usage="Usage: $PROG [options] \n -e.g.: $PROG 40 39 exp/ubm3c/final.ubm exp/sgmm3c\n\n -Options:\n - --help\t\tPrint this message and exit\n - --lang STR\tList of languages to process (default = '$LANGUAGES')\n - --num-jobs INT\tNumber of parallel jobs to run (default=$nj).\n - --qcmd STR\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n - --sjopts STR\tOptions for the 'submit_jobs.sh' script\n - --stage INT\tStarting stage (e.g. -4 for SGMM init; 2 for iter 2; default=$stage)\n -"; - -echo "$PROG $@" -while [ $# -gt 0 ]; do - case "${1# *}" in # ${1# *} strips any leading spaces from the arguments - --help) echo -e $usage; exit 0 ;; - --lang) LANGUAGES="$2"; shift 2 ;; - --num-jobs) - shift; nj=`readint $1`; - [ $nj -lt 1 ] && error_exit "--num-jobs arg '$nj' not positive."; - shift ;; - --qcmd) - shift; qcmd=" --qcmd=${1}"; shift ;; - --sjopts) - shift; sjopts="$1"; shift ;; - --stage) - shift; stage=`readint $1`; shift ;; - -*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; - *) break ;; # end of options: interpreted as num-leaves - esac -done - -if [ $# != 4 ]; then - error_exit $usage; -fi - -[ -f path.sh ] && . path.sh - -# This is SGMM with speaker vectors, on top of LDA+[something] features. -# Any speaker-specific transforms are obtained from the alignment directory. -# To be run from .. - -phndim=$1 -spkdim=$2 -ubm=$3 -dir=$4 - -[ -f $ubm ] || error_exit "UBM file '$ubm' does not exist" -mkdir -p $dir/log || error_exit "Cannot create '$dir/log'" - -# (1): Model initialization; training graph and initial alignment generation. -for L in $LANGUAGES; do -( - data=data/$L/train - lang=data/$L/lang - alidir=exp/$L/tri2a_ali - wdir=$dir/$L - oov_sym=`cat $lang/oov.txt` - mkdir -p $wdir/log || error_exit "Cannot create working directory '$wdir'" - - # Initialize the model (removed the --spk-space-dim option) - if [ $stage -le -5 ]; then - echo "$L: Initializing model" - submit_jobs.sh "$qcmd" --log=$wdir/log/init_sgmm.log $sjopts \ - sgmm-init --phn-space-dim=$phndim $lang/topo $wdir/tree $ubm \ - $wdir/0.mdl || { touch $dir/err; error_exit "$L: SGMM init failed."; } - fi - - # Make training graphs - if [ $stage -le -4 ]; then - echo "$L: Compiling training graphs" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/mkgraphs.TASK_ID.log \ - $sjopts compile-train-graphs $wdir/tree $wdir/0.mdl $lang/L.fst \ - "ark:sym2int.pl --map-oov '$oov_sym' --ignore-first-field $lang/words.txt < $data/split$nj/TASK_ID/text |" \ - "ark:|gzip -c >$wdir/TASK_ID.fsts.gz" \ - || { touch $dir/err; error_exit "$L: Error compiling training graphs"; } - fi - - if [ $stage -le -3 ]; then - echo "$L: Converting alignments" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/convert.TASK_ID.log \ - $sjopts convert-ali $alidir/final.mdl $wdir/0.mdl $wdir/tree \ - "ark:gunzip -c $alidir/TASK_ID.ali.gz|" \ - "ark:|gzip -c >$wdir/TASK_ID.ali.gz" \ - || { touch $dir/err; error_exit "$L: Convert alignment failed."; } - fi - - if [ $stage -le -2 ]; then - echo "$L: Computing cepstral mean and variance statistics" - submit_jobs.sh "$qcmd" --njobs=$nj $sjopts --log=$wdir/log/cmvn.TASK_ID.log \ - compute-cmvn-stats --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \ - scp:$data/split$nj/TASK_ID/feats.scp ark:$wdir/TASK_ID.cmvn \ - || { touch $dir/err; error_exit "$L: Computing CMN/CVN stats failed."; } - fi - - feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$wdir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |" - - if [ $stage -le -1 ]; then - echo "$L: Doing Gaussian selection" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/gselectTASK_ID.log \ - $sjopts sgmm-gselect $wdir/0.mdl "$feats" "ark,t:|gzip -c > $wdir/TASK_ID.gselect.gz" \ - || { touch $dir/err; error_exit "$L: Error doing Gaussian selection"; } - fi -)& # Run the language-specific initializations in parallel -done -wait -[ -f $dir/err ] && { rm $dir/err; error_exit "Error initializing models."; } - -# Language independent constants -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" -numiters_alimdl=3 # Number of iterations for estimating alignment model. -incsub_interval=8 # increase substates every 8 iterations -# total substates after each such increment -total_substates=( 5000 7000 9000 12000 16000 20000 25000 30000 35000 40000 ) -# For a given number of substates, iterate for $incsub_interval iterations -numiters=$[(${#total_substates[@]}+1)*$incsub_interval] -realign_interval=4 # realign every 4 iterations -spkvec_start=8 # use speaker subspace *after* 8 iterations -spkvec_interval=2 # reestimate the speaker vectors every 2 iterations -randprune=0.1 - -# Initially don't have speaker vectors, but change this after we estimate them. -spkvecs_gen=0 - -x=0 -while [ $x -lt $numiters ]; do - if [ $x -eq 0 ]; then - flags=v # On first iter, don't update M or N. - elif [ $spkdim -gt 0 -a $[$x%2] -eq 0 -a $x -gt $spkvec_start ]; then - # Update N on odd iterations after 1st spkvec iter, if we have spk-space. - flags=NwSvct - else # Else update M but not N. - flags=MwSvct - fi - - if [ $stage -le $x ]; then - echo "Pass $x: update flags = '$flags' " - multi_est_opts='' # Will contain model, acc, model-out, occs-out tuples - for L in $LANGUAGES; do - ( - data=data/$L/train - lang=data/$L/lang - wdir=$dir/$L - feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$wdir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |" - gselect_opt="--gselect=ark,s,cs:gunzip -c $wdir/TASK_ID.gselect.gz|" - if [ $spkdim -gt 0 -a $spkvecs_gen -eq 1 ]; then - spkvecs_opt="--spk-vecs=ark:$wdir/TASK_ID.vecs" - else - spkvecs_opt='' - fi - silphonelist=`cat $lang/silphones.csl` -# numsubstates=`cat $wdir/numleaves` # Initial #-substates. - - if [ $[$x%$realign_interval] -eq 0 -a $x -gt 0 ]; then - echo "$L; iter $x: Aligning data" - submit_jobs.sh "$qcmd" $sjopts --log=$wdir/log/align.$x.TASK_ID.log \ - --njobs=$nj sgmm-align-compiled $spkvecs_opt $scale_opts \ - "$gselect_opt" --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk \ - --beam=8 --retry-beam=40 $wdir/$x.mdl \ - "ark:gunzip -c $wdir/TASK_ID.fsts.gz|" "$feats" \ - "ark:|gzip -c >$wdir/TASK_ID.ali.gz" || \ - { touch $dir/err; error_exit "$L, it $x: Error realigning data"; } - fi - - if [ $spkdim -gt 0 -a $x -gt $spkvec_start \ - -a $[$x%$spkvec_interval] -eq 0 ]; then - echo "$L; iter $x: Computing speaker vectors" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/spkvecs.$x.TASK_ID.log \ - $sjopts ali-to-post "ark:gunzip -c $wdir/TASK_ID.ali.gz|" ark:- \| \ - weight-silence-post 0.01 $silphonelist $wdir/$x.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \ - $spkvecs_opt "$gselect_opt" --rand-prune=$randprune $wdir/$x.mdl \ - "$feats" ark,s,cs:- ark:$wdir/tmpTASK_ID.vecs || \ - { touch $dir/err; error_exit "$L, it $x: Error computing spkvecs"; } - for n in `seq 1 $nj`; do - mv $wdir/tmp${n}.vecs $wdir/${n}.vecs; - done - spkvecs_gen=1 - fi - - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/acc.$x.TASK_ID.log \ - $sjopts sgmm-acc-stats --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk \ - --update-flags=$flags --rand-prune=$randprune $spkvecs_opt \ - "$gselect_opt" $wdir/$x.mdl "$feats" \ - "ark,s,cs:ali-to-post 'ark:gunzip -c $wdir/TASK_ID.ali.gz|' ark:-|" \ - $wdir/$x.TASK_ID.acc || \ - { touch $dir/err; error_exit "$L, it $x: Error accumulating stats"; } - - # Summing accs is quite fast; run locally - sgmm-sum-accs $wdir/sum.acc $wdir/$x.*.acc || \ - { touch $dir/err; error_exit "$L, it $x: Error summing stats"; } - ) & # Accumulate in parallel for different languages - wdir=$dir/$L - multi_est_opts="$multi_est_opts $wdir/$x.mdl $wdir/sum.acc $wdir/$[$x+1].mdl $wdir/$[$x+1].occs" - done - wait - [ -f $dir/err ] && \ - { rm $dir/err; error_exit "Iter $x: Error in accumulation"; } - - add_dim_opts='' - if [ $x -eq $spkvec_start ]; then - add_dim_opts="--increase-spk-dim=$spkdim --increase-phn-dim=$phndim" - elif [ $x -eq $[$spkvec_start*2] ]; then - add_dim_opts="--increase-spk-dim=$spkdim --increase-phn-dim=$phndim" - fi - split_opts='' - if [ $[$x%$incsub_interval] -eq 1 -a $x -gt 1 ]; then - index=$[($x/$incsub_interval)-1] - numsubstates=${total_substates[$index]} - split_opts="--split-substates=$numsubstates" - fi - - submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log $sjopts \ - sgmm-est-multi --update-flags=$flags $split_opts $add_dim_opts \ - $multi_est_opts || error_exit "Error in pass $x estimation." - - # If using speaker vectors, estimate alignment model without spkvecs - if [ $[$x%$incsub_interval] -eq 0 -a $x -gt 0 ]; then - chmod -w $dir/??/$x.mdl $dir/??/$x.occs # Preserve for scoring - [ $spkdim -gt 0 ] && est_alimodel $x; - else - rm -f $dir/??/$x.mdl $dir/??/$x.occs - fi - rm -f $dir/??/$x.*.acc $dir/??/sum.acc - fi # End of current stage - x=$[$x+1]; -done - -for L in $LANGUAGES; do - ( - wdir=$dir/$L - rm -f $wdir/final.mdl $wdir/final.occs; - chmod -w $wdir/$x.mdl $wdir/$x.occs # Preserve for scoring - ln -s $wdir/$x.mdl $wdir/final.mdl; - ln -s $wdir/$x.occs $wdir/final.occs; - # If using speaker vectors, estimate alignment model without spkvecs - [ $spkdim -gt 0 ] && est_alimodel $wdir/$x.mdl; - rm -f $wdir/final.alimdl; - ln -sf $wdir/$x.alimdl $wdir/final.alimdl; - - # Print out summary of the warning messages. - for x in $wdir/log/*.log; do - n=`grep WARNING $x | wc -l`; - if [ $n -ne 0 ]; then echo "$n warnings in $x"; fi; - done - ) -done - -echo Done diff --git a/egs/gp/s1/path.sh b/egs/gp/s1/path.sh index a38149ac899..8a3b9a84d98 100644 --- a/egs/gp/s1/path.sh +++ b/egs/gp/s1/path.sh @@ -5,9 +5,9 @@ KALDIROOT=/exports/home/aghoshal/kaldi/trunk [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh KALDISRC=$KALDIROOT/src -KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin +KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin -KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin:$KALDISRC/lm +KALDIBIN=$KALDIBIN:$KALDISRC/sgmm2bin:$KALDISRC/lmbin FSTBIN=$KALDIROOT/tools/openfst/bin LMBIN=$KALDIROOT/tools/irstlm/bin @@ -34,4 +34,3 @@ export LC_ALL=C # Site-specific configs: [ `hostname -y` == ecdf ] && { . path_ed.sh; } - diff --git a/egs/gp/s1/steps/decode_sgmm_deltas.sh b/egs/gp/s1/steps/decode_sgmm_deltas.sh deleted file mode 100755 index 0e15ef5aef5..00000000000 --- a/egs/gp/s1/steps/decode_sgmm_deltas.sh +++ /dev/null @@ -1,162 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Arnab Ghoshal -# Copyright 2010-2011 Microsoft Corporation - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -# Decoding script for SGMM using standard MFCC/PLP + delta + acceleration -# features. - -# assumes you are using speaker vectors [for no vectors, see -# decode_sgmm_novec_lda_etc.sh, if it exists already]. -# if this includes speaker-specific transforms, you have to provide an "old" -# decoding directory where the transforms are located. The data decoded in -# that directory must be split up in the same way as the current directory. - -function error_exit () { - echo -e "$@" >&2; exit 1; -} - -function file_exists () { - [ -f $1 ] || error_exit "$PROG: no such file '$1'" -} - -function readposint () { # Strictly speaking, reading non-negative integers - local retval=${1/#*=/}; # In case --switch=ARG format was used - [[ "$retval" =~ ^[0-9]*$ ]] \ - || error_exit "Argument \"$retval\" not a non-negative integer." - echo $retval -} - -beam=13.0 -nj=1 # Default total number of jobs -jobid=0 # Default job number -qcmd="" # Options for the submit_jobs.sh script -sjopts="" # Options for the submit_jobs.sh script -use_spkvecs='' # Not expecting a model with speaker vectors, by default. - -PROG=`basename $0`; -usage="Usage: $PROG [options] []\n -e.g.: $PROG -j 10 0 exp/sgmm3c/graph_tgpr data/test_dev93 exp/sgmm3c/decode_dev93_tgpr exp/tri2b/decode_dev93_tgpr\n\n -Options:\n - --help\t\tPrint this message and exit.\n - --beam FLOAT\tDecoding beam (default=$beam).\n - -j INT INT\tNumber of parallel jobs to run (default=$nj) and current jobid.\n - --qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n - --sjopts STRING\tOptions for the 'submit_jobs.sh' script.\n - --with-spkvecs\tModel has speaker vectors; do 2-pass decoding.\n -"; - -while [ $# -gt 0 ]; do - case "${1# *}" in # ${1# *} strips any leading spaces from the arguments - --help) echo -e $usage; exit 0 ;; - --beam) beam=$2; shift 2 ;; - -j) nj=`readposint $2`; jobid=`readposint $3`; shift 3 ;; - --qcmd) qcmd=" --qcmd=${2}"; shift 2 ;; - --sjopts) sjopts="$2"; shift 2 ;; - --with-spkvecs) use_spkvecs=1; shift ;; - -*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; - *) break ;; # end of options: interpreted as num-leaves - esac -done - -if [ $# -lt 3 -o $# -gt 4 ]; then - error_exit $usage; -fi - -[ -f path.sh ] && . path.sh - -graphdir=$1 -data=$2 -dir=$3 -transdir=$4 -acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. - -srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory. - -mkdir -p $dir - -if [ $nj -gt 1 ]; then - mydata=$data/split$nj/$jobid -else - mydata=$data -fi - -requirements="$mydata/feats.scp $srcdir/final.mdl $graphdir/HCLG.fst" -[ -z "$use_spkvecs" ] || requirements=$requirements" $srcdir/final.alimdl" -for f in $requirements; do - file_exists $f -done - -if [ ! -z "$transdir" ]; then # "$transdir" nonempty.. - file_exists $transdir/$n.trans -fi - -feats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | add-deltas ark:- ark:- |" - -[ ! -z "$transdir" ] && feats="$feats transform-feats --utt2spk=ark:$mydata/utt2spk ark:$transdir/$jobid.trans ark:- ark:- |" - - -# Do Gaussian selection, since we'll have two decoding passes and don't want to -# redo this. Note: it doesn't make a difference if we use final.mdl or -# final.alimdl, they have the same UBM. -sgmm-gselect $srcdir/final.mdl "$feats" "ark:|gzip -c >$dir/$jobid.gselect.gz" \ - 2>$dir/gselect$jobid.log \ - || error_exit "Error in Gaussian selection."; -gselect_opt="--gselect=ark:gunzip -c $dir/$jobid.gselect.gz|" - -target_lat="$dir/lat.$jobid.gz" -[ -z "$use_spkvecs" ] || target_lat="$dir/pre_lat.$jobid.gz" -align_model="$srcdir/final.mdl" -[ -z "$use_spkvecs" ] || align_model="$srcdir/final.alimdl" - -# Generate a state-level lattice for rescoring, with the alignment model and no -# speaker vectors. - -sgmm-latgen-faster --max-active=7000 --beam=$beam --lattice-beam=6.0 \ - --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \ - --word-symbol-table=$graphdir/words.txt "$gselect_opt" $align_model \ - $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $target_lat" \ - 2> $dir/decode_pass1.$jobid.log \ - || error_exit "Error in 1st-pass decoding."; - -# Do a second pass "decoding" if using speaker vectors. -if [ ! -z "$use_spkvecs" ]; then - silphonelist=`cat $graphdir/silphones.csl` || exit 1 - ( lattice-determinize --acoustic-scale=$acwt --prune=true --beam=4.0 \ - "ark:gunzip -c $dir/pre_lat.$jobid.gz|" ark:- \ - | lattice-to-post --acoustic-scale=$acwt ark:- ark:- \ - | weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- \ - | sgmm-post-to-gpost "$gselect_opt" $srcdir/final.alimdl "$feats" ark:- \ - ark:- \ - | sgmm-est-spkvecs-gpost --spk2utt=ark:$mydata/spk2utt $srcdir/final.mdl \ - "$feats" ark:- "ark:$dir/$jobid.vecs" - ) 2> $dir/vecs.$jobid.log \ - || error_exit "Error estimating speaker vectors."; - - # Now rescore the state-level lattices with the adapted features and the - # corresponding model. Prune and determinize the lattices to limit their size. - - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$mydata/utt2spk \ - --spk-vecs=ark:$dir/$jobid.vecs $srcdir/final.mdl \ - "ark:gunzip -c $dir/pre_lat.$jobid.gz|" "$feats" \ - "ark:|lattice-determinize --acoustic-scale=$acwt --prune=true --beam=6.0 ark:- ark:- | gzip -c > $dir/lat.$jobid.gz" \ - 2>$dir/rescore.$jobid.log \ - || error_exit "Error in 2nd-pass rescoring."; - - rm $dir/pre_lat.$jobid.gz - # The top-level decoding script rescores "lat.$jobid.gz" to get final output. -fi - diff --git a/egs/gp/s1/steps/train_sgmm_deltas.sh b/egs/gp/s1/steps/train_sgmm_deltas.sh deleted file mode 100755 index e68a1757308..00000000000 --- a/egs/gp/s1/steps/train_sgmm_deltas.sh +++ /dev/null @@ -1,270 +0,0 @@ -#!/bin/bash - -# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -# This is Subspace Gaussian Mixture Model (SGMM) training-- -# see "The subspace Gaussian mixture model--A structured model for speech recognition" -# by D. Povey et al, Computer Speech and Language, 2011. - -function error_exit () { - echo -e "$@" >&2; exit 1; -} - -function readint () { - local retval=${1/#*=/}; # In case --switch=ARG format was used - retval=${retval#0*} # Strip any leading 0's - [[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \ - || error_exit "Argument \"$retval\" not an integer." - echo $retval -} - -nj=4 # Default number of jobs -stage=-4 # Default starting stage (start with tree building) -qcmd="" # Options for the submit_jobs.sh script -sjopts="" # Options for the submit_jobs.sh script - -PROG=`basename $0`; -usage="Usage: $PROG [options] \n -e.g.: $PROG 10000 40 39 data/train data/lang exp/tri2a_ali exp/ubm3c/final.ubm exp/sgmm3c\n\n -Options:\n - --help\t\tPrint this message and exit\n - --num-jobs INT\tNumber of parallel jobs to run (default=$nj).\n - --qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n - --sjopts STRING\tOptions for the 'submit_jobs.sh' script\n - --stage INT\tStarting stage (e.g. -4 for SGMM init; 2 for iter 2; default=$stage)\n -"; - -while [ $# -gt 0 ]; do - case "${1# *}" in # ${1# *} strips any leading spaces from the arguments - --help) echo -e $usage; exit 0 ;; - --num-jobs) - shift; nj=`readint $1`; - [ $nj -lt 1 ] && error_exit "--num-jobs arg '$nj' not positive."; - shift ;; - --qcmd) - shift; qcmd=" --qcmd=${1}"; shift ;; - --sjopts) - shift; sjopts="$1"; shift ;; - --stage) - shift; stage=`readint $1`; shift ;; - -*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; - *) break ;; # end of options: interpreted as num-leaves - esac -done - -if [ $# != 8 ]; then - error_exit $usage; -fi - -[ -f path.sh ] && . path.sh - -# This is SGMM with speaker vectors, on top of LDA+[something] features. -# Any speaker-specific transforms are obtained from the alignment directory. -# To be run from .. - -totsubstates=$1 -phndim=$2 -spkdim=$3 -data=$4 -lang=$5 -alidir=$6 -ubm=$7 -dir=$8 - -mkdir -p $dir || exit 1; - -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" - -numiters=25 # Total number of iterations -numiters_alimdl=3 # Number of iterations for estimating alignment model. -maxiterinc=15 # Last iter to increase #substates on. -realign_iters="5 10 15"; -spkvec_iters="5 8 12 17" -add_dim_iters="6 8 10 12"; # Iters on which to increase phn dim and/or spk dim, - # if necessary, In most cases, either none of these or only the first of these - # will have any effect (we increase in increments of [feature dim]) - -oov_sym=`cat $lang/oov.txt` -silphonelist=`cat $lang/silphones.csl` - -numsubstates=`cat $dir/numleaves` # Initial #-substates. -# per-iter increment for #substates -incsubstates=$[($totsubstates-$numsubstates)/$maxiterinc] - -# Initially don't have speaker vectors, but change this after we estimate them. -spkvecs_opt= -gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/TASK_ID.gselect.gz|" - -randprune=0.1 -mkdir -p $dir/log - -featspart="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$alidir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |" - -if [ ! -f $ubm ]; then - echo "No UBM in $ubm" - exit 1; -fi - -if [ $stage -le -4 ]; then - submit_jobs.sh "$qcmd" --log=$dir/log/init_sgmm.log $sjopts \ - sgmm-init --phn-space-dim=$phndim --spk-space-dim=$spkdim $lang/topo \ - $dir/tree $ubm $dir/0.mdl || error_exit "SGMM init failed." -fi - -if [ $stage -le -3 ]; then -# Make training graphs (this is split in $nj parts). - echo "Compiling training graphs" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/compile_graphsTASK_ID.log \ - $sjopts compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \ - "ark:sym2int.pl --map-oov '$oov_sym' --ignore-first-field $lang/words.txt < $data/split$nj/TASK_ID/text |" \ - "ark:|gzip -c >$dir/TASK_ID.fsts.gz" \ - || error_exit "Error compiling training graphs" -fi - -if [ $stage -le -2 ]; then - echo "Doing Gaussian selection" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/gselectTASK_ID.log \ - $sjopts sgmm-gselect $dir/0.mdl "$featspart" "ark,t:|gzip -c > $dir/TASK_ID.gselect.gz" \ - || error_exit "Error doing Gaussian selection" -fi - - -if [ $stage -le -1 ]; then - echo "Converting alignments" # don't bother parallelizing; very fast. - for n in `seq 1 $nj`; do - convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree \ - "ark:gunzip -c $alidir/$n.ali.gz|" "ark:|gzip -c >$dir/$n.ali.gz" \ - 2>$dir/log/convert.$n.log - done -fi - -x=0 -while [ $x -lt $numiters ]; do - if [ $x -eq 0 ]; then - flags=vwcSt # On first iter, don't update M or N. - elif [ $spkdim -gt 0 -a $[$x%2] -eq 1 -a \ - $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then - # Update N on odd iterations after 1st spkvec iter, if we have spk-space. - flags=vNwcSt - else # Else update M but not N. - flags=vMwcSt - fi - - if [ $stage -le $x ]; then - echo "Pass $x: update flags = '$flags' " - if echo $realign_iters | grep -w $x >/dev/null; then - echo "Aligning data" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/align.$x.TASK_ID.log \ - $sjopts sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \ - --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk --beam=8 --retry-beam=40 \ - $dir/$x.mdl "ark:gunzip -c $dir/TASK_ID.fsts.gz|" "$featspart" \ - "ark:|gzip -c >$dir/TASK_ID.ali.gz" \ - || error_exit "Error realigning data on iter $x" - fi - - if [ $spkdim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/spkvecs.$x.TASK_ID.log \ - $sjopts ali-to-post "ark:gunzip -c $dir/TASK_ID.ali.gz|" ark:- \| \ - weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \ - $spkvecs_opt "$gselect_opt" --rand-prune=$randprune $dir/$x.mdl \ - "$featspart" ark,s,cs:- ark:$dir/tmpTASK_ID.vecs \ - || error_exit "Error computing speaker vectors on iter $x" - for n in `seq 1 $nj`; do - mv $dir/tmp${n}.vecs $dir/${n}.vecs; - done - spkvecs_opt="--spk-vecs=ark:$dir/TASK_ID.vecs" - fi - - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/acc.$x.TASK_ID.log \ - $sjopts sgmm-acc-stats --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk \ - --update-flags=$flags --rand-prune=$randprune $spkvecs_opt \ - "$gselect_opt" $dir/$x.mdl "$featspart" \ - "ark,s,cs:ali-to-post 'ark:gunzip -c $dir/TASK_ID.ali.gz|' ark:-|" \ - $dir/$x.TASK_ID.acc || error_exit "Error accumulating stats on iter $x" - - add_dim_opts= - if echo $add_dim_iters | grep -w $x >/dev/null; then - add_dim_opts="--increase-phn-dim=$phndim --increase-spk-dim=$spkdim" - fi - - submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log $sjopts \ - sgmm-est --update-flags=$flags --split-substates=$numsubstates \ - $add_dim_opts --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \ - "sgmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl \ - || error_exit "Error in pass $x estimation." - - rm -f $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs - fi - - if [ $x -lt $maxiterinc ]; then - numsubstates=$[$numsubstates+$incsubstates] - fi - x=$[$x+1]; -done - -( cd $dir; rm final.mdl final.occs 2>/dev/null; - ln -s $x.mdl final.mdl; - ln -s $x.occs final.occs ) - -if [ $spkdim -gt 0 ]; then - # If we have speaker vectors, we need an alignment model. - # The point of this last phase of accumulation is to get Gaussian-level - # alignments with the speaker vectors but accumulate stats without - # any speaker vectors; we re-estimate M, w, c and S to get a model - # that's compatible with not having speaker vectors. - - # We do this for a few iters, in this recipe. - cur_alimdl=$dir/$x.mdl - y=0; - while [ $y -lt $numiters_alimdl ]; do - echo "Pass $y of building alignment model" - if [ $y -eq 0 ]; then - flags=MwcS # First time don't update v... - else - flags=vMwcS # don't update transitions-- will probably share graph with normal model. - fi - - if [ $stage -le $[$y+100] ]; then - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/acc_ali.$y.TASK_ID.log \ - $sjopts ali-to-post "ark:gunzip -c $dir/TASK_ID.ali.gz|" ark:- \| \ - sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \ - --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk $dir/$x.mdl \ - "$featspart" ark,s,cs:- ark:- \| \ - sgmm-acc-stats-gpost --update-flags=$flags $cur_alimdl "$featspart" \ - ark,s,cs:- $dir/$y.TASK_ID.aliacc \ - || error_exit "Error accumulating stats for alignment model on iter $y" - - submit_jobs.sh "$qcmd" --log=$dir/log/update_ali.$y.log $sjopts \ - sgmm-est --update-flags=$flags --remove-speaker-space=true \ - $cur_alimdl "sgmm-sum-accs - $dir/$y.*.aliacc|" $dir/$[$y+1].alimdl \ - || error_exit "Error estimating alignment model on iter $y"; - rm $dir/$y.*.aliacc || exit 1; - [ $y -gt 0 ] && rm $dir/$y.alimdl - fi - cur_alimdl=$dir/$[$y+1].alimdl - y=$[$y+1] - done - (cd $dir; rm final.alimdl 2>/dev/null; ln -s $y.alimdl final.alimdl ) -fi - -# Print out summary of the warning messages. -for x in $dir/log/*.log; do - n=`grep WARNING $x | wc -l`; - if [ $n -ne 0 ]; then echo $n warnings in $x; fi; -done - -echo Done diff --git a/egs/gp/s5/path.sh b/egs/gp/s5/path.sh index af75fa50c1b..fcf365ec8b6 100644 --- a/egs/gp/s5/path.sh +++ b/egs/gp/s5/path.sh @@ -7,9 +7,9 @@ KALDI_ROOT=/homes/eva/q/qghoshal/src/kaldi/trunk [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh KALDISRC=$KALDI_ROOT/src -KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin +KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin -KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin:$KALDISRC/lm +KALDIBIN=$KALDIBIN:$KALDISRC/sgmm2bin:$KALDISRC/lmbin FSTBIN=$KALDI_ROOT/tools/openfst/bin LMBIN=$KALDI_ROOT/tools/irstlm/bin @@ -25,10 +25,9 @@ SCRIPTS=$kaldi_local:$kaldi_utils:$kaldi_steps export PATH=$PATH:$KALDIBIN:$FSTBIN:$LMBIN:$SCRIPTS -# If the correct version of shorten and sox are not on the path, +# If the correct version of shorten and sox are not on the path, # the following will be set by local/gp_check_tools.sh SHORTEN_BIN= # e.g. $PWD/tools/shorten-3.6.1/bin SOX_BIN= # e.g. $PWD/tools/sox-14.3.2/bin - diff --git a/egs/gp/s5/run.sh b/egs/gp/s5/run.sh index e563bdff0d1..8054d02988d 100755 --- a/egs/gp/s5/run.sh +++ b/egs/gp/s5/run.sh @@ -347,12 +347,12 @@ for L in $GP_LANGUAGES; do num_states=$(grep "^$L" conf/sgmm.conf | cut -f2) num_substates=$(grep "^$L" conf/sgmm.conf | cut -f3) mkdir -p exp/$L/sgmm2a - steps/train_sgmm.sh --cmd "$train_cmd" --cluster-thresh 100 --spk-dim 0 \ + steps/train_sgmm2.sh --cmd "$train_cmd" --cluster-thresh 100 --spk-dim 0 \ $num_states $num_substates data/$L/train data/$L/lang exp/$L/tri1_ali \ exp/$L/ubm2a/final.ubm exp/$L/sgmm2a >& exp/$L/sgmm2a/train.log mkdir -p exp/$L/sgmm2b - steps/train_sgmm.sh --cmd "$train_cmd" --cluster-thresh 100 \ + steps/train_sgmm2.sh --cmd "$train_cmd" --cluster-thresh 100 \ $num_states $num_gauss data/$L/train data/$L/lang exp/$L/tri1_ali \ exp/$L/ubm2a/final.ubm exp/$L/sgmm2b >& exp/$L/sgmm2b/train.log ) & @@ -370,7 +370,7 @@ for L in $GP_LANGUAGES; do $highmem_cmd $graph_dir/mkgraph.log \ utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/$sgmm $graph_dir - steps/decode_sgmm.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/dev \ + steps/decode_sgmm2.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/dev \ exp/$L/$sgmm/decode_dev_${lm_suffix} ) & done # loop over LMs diff --git a/egs/hkust/README.txt b/egs/hkust/README.txt index 5dbde98b539..752ae0e0897 100644 --- a/egs/hkust/README.txt +++ b/egs/hkust/README.txt @@ -6,6 +6,3 @@ LDC2005S15 : http://www.ldc.upenn.edu/Catalog/catalogEntry.jsp?catalogId=LDC2005 LDC2005T32 : http://www.ldc.upenn.edu/Catalog/catalogEntry.jsp?catalogId=LDC2005T32 s5: The experiments here were based on the above corpus - - - diff --git a/egs/hkust/s5/RESULTS b/egs/hkust/s5/RESULTS index 3c4933bbbb0..6886d21f975 100644 --- a/egs/hkust/s5/RESULTS +++ b/egs/hkust/s5/RESULTS @@ -1,15 +1,30 @@ # for x in exp/*/decode; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done -%WER 80.89 [ 45422 / 56154, 1530 ins, 11018 del, 32874 sub ] exp/mono0a/decode/cer_9 -%WER 60.01 [ 33698 / 56154, 2528 ins, 5961 del, 25209 sub ] exp/tri1/decode/cer_12 -%WER 59.68 [ 33514 / 56154, 2574 ins, 5752 del, 25188 sub ] exp/tri2/decode/cer_12 -%WER 57.25 [ 32148 / 56154, 2484 ins, 5811 del, 23853 sub ] exp/tri3a/decode/cer_13 -%WER 53.47 [ 30026 / 56154, 2789 ins, 5115 del, 22122 sub ] exp/tri4a/decode/cer_13 -%WER 49.72 [ 27921 / 56154, 2833 ins, 4568 del, 20520 sub ] exp/tri5a/decode/cer_13 -%WER 43.95 [ 24681 / 56154, 2106 ins, 3890 del, 18685 sub ] exp/tri5a_mmi_b0.1/decode/cer_10 -%WER 44.60 [ 25044 / 56154, 2121 ins, 4040 del, 18883 sub ] exp/tri5a_mpe/decode/cer_11 -%WER 43.81 [ 24602 / 56154, 2843 ins, 3751 del, 18008 sub ] exp/sgmm2_5a/decode/cer_10 -exp/tri5a_mce/decode/cer_11:%WER 44.74 [ 25125 / 56154, 2112 ins, 4108 del, 18905 sub ] +%WER 80.72 [ 45327 / 56154, 1609 ins, 10856 del, 32862 sub ] exp/mono0a/decode/cer_9 +%WER 58.86 [ 33054 / 56154, 2651 ins, 6240 del, 24163 sub ] exp/tri1/decode/cer_13 +%WER 58.32 [ 32748 / 56154, 2491 ins, 6279 del, 23978 sub ] exp/tri2/decode/cer_14 +%WER 56.49 [ 31719 / 56154, 2601 ins, 5979 del, 23139 sub ] exp/tri3a/decode/cer_13 +%WER 51.75 [ 29060 / 56154, 2879 ins, 5088 del, 21093 sub ] exp/tri4a/decode/cer_13 +%WER 47.36 [ 26596 / 56154, 2740 ins, 4577 del, 19279 sub ] exp/tri5a/decode/cer_13 +%WER 42.55 [ 23894 / 56154, 1877 ins, 4437 del, 17580 sub ] exp/tri5a_mpe/decode/cer_13 +%WER 42.19 [ 23693 / 56154, 2138 ins, 3871 del, 17684 sub ] exp/tri5a_mmi_b0.1/decode/cer_10 +%WER 41.11 [ 23086 / 56154, 2863 ins, 3608 del, 16615 sub ] exp/sgmm2_5a/decode/cer_10 +# nnet2 online results +%WER 38.32 [ 21518 / 56154, 2344 ins, 4273 del, 14901 sub ] exp/nnet2_online/nnet_ms/decode/cer_12 +%WER 38.01 [ 21345 / 56154, 2555 ins, 4173 del, 14617 sub ] exp/nnet2_online/nnet_ms_online/decode/cer_12 +%WER 37.10 [ 20832 / 56154, 2399 ins, 3936 del, 14497 sub ] exp/nnet2_online/nnet_ms_online/decode_per_utt/cer_12 + +# nnet3 online results +%WER 32.77 [ 18400 / 56154, 1971 ins, 3525 del, 12904 sub ] exp/nnet3/tdnn_sp/decode/cer_10 +%WER 33.02 [ 18540 / 56154, 2335 ins, 3251 del, 12954 sub ] exp/nnet3/tdnn_sp_online/decode/cer_9 +%WER 34.01 [ 19098 / 56154, 2195 ins, 3482 del, 13421 sub ] exp/nnet3/tdnn_sp_online/decode_per_utt/cer_10 + +# chain online results +%WER 28.24 [ 15858 / 56154, 1454 ins, 3415 del, 10989 sub ] exp/chain/tdnn_7h_sp/decode/cer_10 +%WER 28.16 [ 15812 / 56154, 1648 ins, 2824 del, 11340 sub ] exp/chain/tdnn_7h_sp_online/decode/cer_9 +%WER 29.55 [ 16594 / 56154, 1547 ins, 3437 del, 11610 sub ] exp/chain/tdnn_7h_sp_online/decode_per_utt/cer_10 + +## results before adding pitch # nnet1 results exp/dnn5b_pretrain-dbn_dnn/decode/cer_10:%WER 39.42 [ 22134 / 56154, 2507 ins, 3730 del, 15897 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode/cer_11:%WER 36.50 [ 20499 / 56154, 1915 ins, 3312 del, 15272 sub ] @@ -18,11 +33,11 @@ exp/cnn5c/decode/cer_10:%WER 40.13 [ 22536 / 56154, 2329 ins, 3962 del, 16245 su exp/cnn5c_pretrain-dbn_dnn/decode/cer_10:%WER 38.80 [ 21790 / 56154, 2470 ins, 3582 del, 15738 sub ] exp/lstm5e/decode/cer_10:%WER 37.61 [ 21121 / 56154, 1829 ins, 3941 del, 15351 sub ] -# nnet2 results +# nnet2 mfcc results exp/nnet2_5d/decode/cer_10:%WER 38.59 [ 21669 / 56154, 2498 ins, 3581 del, 15590 sub ] -# ConvNet with 2 convolutional layers and 2 ReLU layers +# ConvNet using fbank, with 2 convolutional layers and 2 ReLU layers exp/nnet2_convnet/decode/cer_10:%WER 41.19 [ 23129 / 56154, 2599 ins, 3782 del, 16748 sub ] -# nnet3 results (using speed perturbed data) +# nnet3 mfcc results (using speed perturbed data) exp/nnet3/tdnn_sp/decode_dev/cer_10:%WER 33.79 [ 18977 / 56154, 2027 ins, 3485 del, 13465 sub ] -exp/nnet3/lstm_sp_ld5/decode_dev/cer_9:%WER 33.51 [ 18815 / 56154, 1813 ins, 3249 del, 13753 sub ] \ No newline at end of file +exp/nnet3/lstm_sp_ld5/decode_dev/cer_9:%WER 33.51 [ 18815 / 56154, 1813 ins, 3249 del, 13753 sub ] diff --git a/egs/hkust/s5/conf/mfcc_hires.conf b/egs/hkust/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..d870ab04c38 --- /dev/null +++ b/egs/hkust/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800) diff --git a/egs/hkust/s5/conf/online_cmvn.conf b/egs/hkust/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/hkust/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/hkust/s5/conf/online_pitch.conf b/egs/hkust/s5/conf/online_pitch.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/hkust/s5/conf/online_pitch.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/hkust/s5/conf/pitch.conf b/egs/hkust/s5/conf/pitch.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/hkust/s5/conf/pitch.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/hkust/s5/local/chain/run_tdnn.sh b/egs/hkust/s5/local/chain/run_tdnn.sh new file mode 100755 index 00000000000..4829e9736ca --- /dev/null +++ b/egs/hkust/s5/local/chain/run_tdnn.sh @@ -0,0 +1,211 @@ +#!/bin/bash + +# This script is based on tun_tdnn_7h.sh in swbd chain recipe. + +set -e + +# configs for 'chain' +affix= +stage=12 +train_stage=-10 +get_egs_stage=-10 +dir=exp/chain/tdnn_7h # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= + +# training options +num_epochs=4 +initial_effective_lrate=0.001 +final_effective_lrate=0.0001 +leftmost_questions_truncate=-1 +max_param_change=2.0 +final_layer_normalize_target=0.5 +num_jobs_initial=2 +num_jobs_final=12 +minibatch_size=128 +frames_per_eg=150 +remove_egs=true +common_egs_dir= +xent_regularize=0.1 + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 5000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=43 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=625 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=625 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=625 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=625 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=625 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=625 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=625 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=625 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_eg \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.num-jobs-initial $num_jobs_initial \ + --trainer.optimization.num-jobs-final $num_jobs_final \ + --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ + --trainer.optimization.final-effective-lrate $final_effective_lrate \ + --trainer.max-param-change $max_param_change \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri5a_sp_lats \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +graph_dir=$dir/graph +if [ $stage -le 15 ]; then + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 10 --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_dev \ + $graph_dir data/dev_hires $dir/decode || exit 1; +fi + +if [ $stage -le 16 ]; then + steps/online/nnet3/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf \ + --add-pitch true \ + data/lang exp/nnet2_online/extractor "$dir" ${dir}_online || exit 1; +fi + +if [ $stage -le 17 ]; then + # do the actual online decoding with iVectors, carrying info forward from + # previous utterances of the same speaker. + steps/online/nnet3/decode.sh --config conf/decode.config \ + --cmd "$decode_cmd" --nj 10 --acwt 1.0 --post-decode-acwt 10.0 \ + "$graph_dir" data/dev_hires \ + ${dir}_online/decode || exit 1; +fi + +if [ $stage -le 18 ]; then + # this version of the decoding treats each utterance separately + # without carrying forward speaker information. + steps/online/nnet3/decode.sh --config conf/decode.config \ + --cmd "$decode_cmd" --nj 10 --per-utt true --acwt 1.0 --post-decode-acwt 10.0 \ + "$graph_dir" data/dev_hires \ + ${dir}_online/decode_per_utt || exit 1; +fi diff --git a/egs/hkust/s5/local/character_tokenizer b/egs/hkust/s5/local/character_tokenizer new file mode 100755 index 00000000000..a3d8098d17f --- /dev/null +++ b/egs/hkust/s5/local/character_tokenizer @@ -0,0 +1,32 @@ +#!/usr/bin/env perl +# Copyright 2012-2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 +use utf8; + +use open qw(:encoding(utf8)); +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +while (<>) { + @F = split " "; + print $F[0] . " "; + foreach $s (@F[1..$#F]) { + if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "!SIL")) { + print " $s"; + } else { + @chars = split "", $s; + foreach $c (@chars) { + if ($c =~ /\p{InCJK_Unified_Ideographs}/) { + print " $c"; + } else { + print "$c"; + } + } + } + print " "; + } + print "\n"; +} + + diff --git a/egs/hkust/s5/local/create_oov_char_lexicon.pl b/egs/hkust/s5/local/create_oov_char_lexicon.pl old mode 100644 new mode 100755 index aaf5d3bcb9b..0c146c9a123 --- a/egs/hkust/s5/local/create_oov_char_lexicon.pl +++ b/egs/hkust/s5/local/create_oov_char_lexicon.pl @@ -1,5 +1,5 @@ -#!/usr/bin/perl -# Copyright 2016 LeSpeech (Author: Xingyu Na) +#!/usr/bin/env perl +# Copyright 2016 Alibaba Robotics Corp. (Author: Xingyu Na) # # A script for char-based Chinese OOV lexicon generation. # diff --git a/egs/hkust/s5/local/hkust_data_prep.sh b/egs/hkust/s5/local/hkust_data_prep.sh index 9fb6fe07cbb..207f03af36b 100755 --- a/egs/hkust/s5/local/hkust_data_prep.sh +++ b/egs/hkust/s5/local/hkust_data_prep.sh @@ -1,98 +1,73 @@ #!/bin/bash - -. path.sh +. ./path.sh || exit 1; if [ $# != 2 ]; then - echo "Usage: hkust_data_prep.sh AUDIO_PATH TEXT_PATH" - exit 1; + echo "Usage: $0 " + echo " $0 /export/corpora/LDC03S04 /export/corpora/LDC03T19" + exit 1; fi -HKUST_AUDIO_DIR=$1 -HKUST_TEXT_DIR=$2 +hkust_audio_dir=$1 +hkust_text_dir=$2 train_dir=data/local/train dev_dir=data/local/dev - -case 0 in #goto here - 1) -;; #here: -esac - mkdir -p $train_dir mkdir -p $dev_dir #data directory check -if [ ! -d $HKUST_AUDIO_DIR ] || [ ! -d $HKUST_TEXT_DIR ]; then - echo "Error: run.sh requires two directory arguments" +if [ ! -d $hkust_audio_dir ] || [ ! -d $hkust_text_dir ]; then + echo "Error: $0 requires two directory arguments" exit 1; fi #find sph audio file for train dev resp. -find $HKUST_AUDIO_DIR -iname "*.sph" | grep -i "audio/train" > $train_dir/sph.flist -find $HKUST_AUDIO_DIR -iname "*.sph" | grep -i "audio/dev" > $dev_dir/sph.flist +find $hkust_audio_dir -iname "*.sph" | grep -i "audio/train" > $train_dir/sph.flist || exit 1; +find $hkust_audio_dir -iname "*.sph" | grep -i "audio/dev" > $dev_dir/sph.flist || exit 1; n=`cat $train_dir/sph.flist $dev_dir/sph.flist | wc -l` [ $n -ne 897 ] && \ echo Warning: expected 897 data data files, found $n - #Transcriptions preparation #collect all trans, convert encodings to utf-8, -find $HKUST_TEXT_DIR -iname "*.txt" | grep -i "trans/train" | xargs cat |\ +find $hkust_text_dir -iname "*.txt" | grep -i "trans/train" | xargs cat |\ iconv -f GBK -t utf-8 - | perl -e ' while () { @A = split(" ", $_); if (@A <= 1) { next; } - if ($A[0] eq "#") { $utt_id = $A[1]; } + if ($A[0] eq "#") { $utt_id = $A[1]; } if (@A >= 3) { - $A[2] =~ s:^([AB])\:$:$1:; - printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5; - for($n = 3; $n < @A; $n++) { print " $A[$n]" }; - print "\n"; + $A[2] =~ s:^([AB])\:$:$1:; + printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5; + for($n = 3; $n < @A; $n++) { print " $A[$n]" }; + print "\n"; } } - ' | sort -k1 > $train_dir/transcripts.txt + ' | sort -k1 > $train_dir/transcripts.txt || exit 1; -find $HKUST_TEXT_DIR -iname "*.txt" | grep -i "trans/dev" | xargs cat |\ +find $hkust_text_dir -iname "*.txt" | grep -i "trans/dev" | xargs cat |\ iconv -f GBK -t utf-8 - | perl -e ' while () { @A = split(" ", $_); if (@A <= 1) { next; } - if ($A[0] eq "#") { $utt_id = $A[1]; } + if ($A[0] eq "#") { $utt_id = $A[1]; } if (@A >= 3) { - $A[2] =~ s:^([AB])\:$:$1:; - printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5; - for($n = 3; $n < @A; $n++) { print " $A[$n]" }; - print "\n"; + $A[2] =~ s:^([AB])\:$:$1:; + printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5; + for($n = 3; $n < @A; $n++) { print " $A[$n]" }; + print "\n"; } } - ' | sort -k1 > $dev_dir/transcripts.txt + ' | sort -k1 > $dev_dir/transcripts.txt || exit 1; - - -#transcripts normalization and segmentation +#transcripts normalization and segmentation #(this needs external tools), -#Download and configure segment tools -pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'` -export PYTHONPATH=$PYTHONPATH:`pwd`/tools/mmseg-1.3.0/lib/python${pyver}/site-packages -if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then - echo "--- Downloading mmseg-1.3.0 ..." - echo "NOTE: it assumes that you have Python, Setuptools installed on your system!" - wget -P tools http://pypi.python.org/packages/source/m/mmseg/mmseg-1.3.0.tar.gz - tar xf tools/mmseg-1.3.0.tar.gz -C tools - cd tools/mmseg-1.3.0 - mkdir -p lib/python${pyver}/site-packages - python setup.py build - python setup.py install --prefix=. - cd ../.. - if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then - echo "mmseg is not found - installation failed?" - exit 1 - fi -fi +python -c "import mmseg" 2>/dev/null || \ + (echo "mmseg is not found. Checkout tools/extra/install_mmseg.sh" && exit 1;) cat $train_dir/transcripts.txt |\ sed -e 's// /g' |\ @@ -101,7 +76,7 @@ cat $train_dir/transcripts.txt |\ sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\ local/hkust_normalize.pl |\ python local/hkust_segment.py |\ - awk '{if (NF > 1) print $0;}' > $train_dir/text + awk '{if (NF > 1) print $0;}' > $train_dir/text || exit 1; cat $dev_dir/transcripts.txt |\ sed -e 's// /g' |\ @@ -110,11 +85,11 @@ cat $dev_dir/transcripts.txt |\ sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\ local/hkust_normalize.pl |\ python local/hkust_segment.py |\ - awk '{if (NF > 1) print $0;}' > $dev_dir/text + awk '{if (NF > 1) print $0;}' > $dev_dir/text || exit 1; # some data is corrupted. Delete them cat $train_dir/text | grep -v 20040527_210939_A901153_B901154-A-035691-035691 | egrep -v "A:|B:" > tmp -mv tmp $train_dir/text +mv tmp $train_dir/text || exit 1; #Make segment files from transcript #segments file format is: utt-id side-id start-time end-time, e.g.: @@ -129,16 +104,14 @@ awk '{ segment=$1; split(segment,S,"-"); side=S[2]; audioname=S[1];startf=S[3];e print segment " " audioname "-" side " " startf/100 " " endf/100}' <$dev_dir/text > $dev_dir/segments awk '{name = $0; gsub(".sph$","",name); gsub(".*/","",name); print(name " " $0)}' $dev_dir/sph.flist > $dev_dir/sph.scp +sph2pipe=`which sph2pipe` || sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1; - -sph2pipe=`cd ../../..; echo $PWD/tools/sph2pipe_v2.5/sph2pipe` -[ ! -f $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1; - -cat $train_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); +cat $train_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ sort > $train_dir/wav.scp || exit 1; -cat $dev_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); +cat $dev_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ sort > $dev_dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -162,6 +135,5 @@ cat $train_dir/utt2spk | sort -k 2 | utils/utt2spk_to_spk2utt.pl > $train_dir/sp cat $dev_dir/segments | awk '{spk=substr($1,1,33); print $1 " " spk}' > $dev_dir/utt2spk || exit 1; cat $dev_dir/utt2spk | sort -k 2 | utils/utt2spk_to_spk2utt.pl > $dev_dir/spk2utt || exit 1; -echo HKUST data preparation succeeded - -exit 1; +echo "$0: HKUST data preparation succeeded" +exit 0 diff --git a/egs/hkust/s5/local/hkust_format_data.sh b/egs/hkust/s5/local/hkust_format_data.sh index 33cf8fa22ef..7fc9b701f49 100755 --- a/egs/hkust/s5/local/hkust_format_data.sh +++ b/egs/hkust/s5/local/hkust_format_data.sh @@ -1,7 +1,7 @@ #!/bin/bash # -if [ -f path.sh ]; then . path.sh; fi +if [ -f ./path.sh ]; then . ./path.sh; fi silprob=0.5 mkdir -p data/lang_test data/train data/dev diff --git a/egs/hkust/s5/local/hkust_prepare_dict.sh b/egs/hkust/s5/local/hkust_prepare_dict.sh index 49d2f8feff8..6aca37586ed 100755 --- a/egs/hkust/s5/local/hkust_prepare_dict.sh +++ b/egs/hkust/s5/local/hkust_prepare_dict.sh @@ -2,74 +2,59 @@ # Copyright 2016 LeSpeech (Author: Xingyu Na) # prepare dictionary for HKUST -# it is done for English and Chinese separately, +# it is done for English and Chinese separately, # For English, we use CMU dictionary, and Sequitur G2P # for OOVs, while all englist phone set will concert to Chinese # phone set at the end. For Chinese, we use an online dictionary, # for OOV, we just produce pronunciation using Charactrt Mapping. - -. path.sh -[ $# != 0 ] && echo "Usage: local/hkust_prepare_dict.sh" && exit 1; +. ./path.sh + +[ $# != 0 ] && echo "Usage: $0" && exit 1; train_dir=data/local/train dev_dir=data/local/dev dict_dir=data/local/dict mkdir -p $dict_dir mkdir -p $dict_dir/lexicon-{en,ch} - + # extract full vocabulary cat $train_dir/text $dev_dir/text | awk '{for (i = 2; i <= NF; i++) print $i}' |\ sed -e 's/ /\n/g' | sort -u | grep -v '\[LAUGHTER\]' | grep -v '\[NOISE\]' |\ - grep -v '\[VOCALIZED-NOISE\]' > $dict_dir/words.txt + grep -v '\[VOCALIZED-NOISE\]' > $dict_dir/words.txt || exit 1; # split into English and Chinese -cat $dict_dir/words.txt | grep '[a-zA-Z]' > $dict_dir/lexicon-en/words-en.txt -cat $dict_dir/words.txt | grep -v '[a-zA-Z]' > $dict_dir/lexicon-ch/words-ch.txt +cat $dict_dir/words.txt | grep '[a-zA-Z]' > $dict_dir/lexicon-en/words-en.txt || exit 1; +cat $dict_dir/words.txt | grep -v '[a-zA-Z]' > $dict_dir/lexicon-ch/words-ch.txt || exit 1; -##### produce pronunciations for english +##### produce pronunciations for english if [ ! -f $dict_dir/cmudict/cmudict.0.7a ]; then echo "--- Downloading CMU dictionary ..." svn co -r 13068 https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict \ $dict_dir/cmudict || exit 1; fi +# format cmudict echo "--- Striping stress and pronunciation variant markers from cmudict ..." perl $dict_dir/cmudict/scripts/make_baseform.pl \ $dict_dir/cmudict/cmudict.0.7a /dev/stdout |\ - sed -e 's:^\([^\s(]\+\)([0-9]\+)\(\s\+\)\(.*\):\1\2\3:' > $dict_dir/cmudict/cmudict-plain.txt + sed -e 's:^\([^\s(]\+\)([0-9]\+)\(\s\+\)\(.*\):\1\2\3:' > $dict_dir/cmudict/cmudict-plain.txt || exit 1; +# extract in-vocab lexicon and oov words echo "--- Searching for English OOV words ..." -gawk 'NR==FNR{words[$1]; next;} !($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} !($1 in words)' \ $dict_dir/cmudict/cmudict-plain.txt $dict_dir/lexicon-en/words-en.txt |\ - egrep -v '<.?s>' > $dict_dir/lexicon-en/words-en-oov.txt + egrep -v '<.?s>' > $dict_dir/lexicon-en/words-en-oov.txt || exit 1; -gawk 'NR==FNR{words[$1]; next;} ($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} ($1 in words)' \ $dict_dir/lexicon-en/words-en.txt $dict_dir/cmudict/cmudict-plain.txt |\ - egrep -v '<.?s>' > $dict_dir/lexicon-en/lexicon-en-iv.txt + egrep -v '<.?s>' > $dict_dir/lexicon-en/lexicon-en-iv.txt || exit 1; wc -l $dict_dir/lexicon-en/words-en-oov.txt wc -l $dict_dir/lexicon-en/lexicon-en-iv.txt -pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'` -export PYTHONPATH=$PYTHONPATH:`pwd`/tools/g2p/lib/python${pyver}/site-packages -if [ ! -f tools/g2p/lib/python${pyver}/site-packages/g2p.py ]; then - echo "--- Downloading Sequitur G2P ..." - echo "NOTE: it assumes that you have Python, NumPy and SWIG installed on your system!" - wget -P tools http://www-i6.informatik.rwth-aachen.de/web/Software/g2p-r1668.tar.gz - tar xf tools/g2p-r1668.tar.gz -C tools - cd tools/g2p - echo '#include ' >> Utility.hh # won't compile on my system w/o this "patch" - python setup.py build - python setup.py install --prefix=. - cd ../.. - if [ ! -f tools/g2p/lib/python${pyver}/site-packages/g2p.py ]; then - echo "Sequitur G2P is not found - installation failed?" - exit 1 - fi -fi - +# setup g2p and generate oov lexicon if [ ! -f conf/g2p_model ]; then echo "--- Downloading a pre-trained Sequitur G2P model ..." wget http://sourceforge.net/projects/kaldi/files/sequitur-model4 -O conf/g2p_model @@ -80,24 +65,31 @@ if [ ! -f conf/g2p_model ]; then fi echo "--- Preparing pronunciations for OOV words ..." -python tools/g2p/lib/python${pyver}/site-packages/g2p.py \ - --model=conf/g2p_model --apply $dict_dir/lexicon-en/words-en-oov.txt > $dict_dir/lexicon-en/lexicon-en-oov.txt +g2p=`which g2p.py` +if [ ! -x $g2p ]; then + echo "g2p.py is not found. Checkout tools/extras/install_sequitur.sh." + exit 1 +fi +g2p.py --model=conf/g2p_model --apply $dict_dir/lexicon-en/words-en-oov.txt \ + > $dict_dir/lexicon-en/lexicon-en-oov.txt || exit 1; +# merge in-vocab and oov lexicon cat $dict_dir/lexicon-en/lexicon-en-oov.txt $dict_dir/lexicon-en/lexicon-en-iv.txt |\ - sort > $dict_dir/lexicon-en/lexicon-en-phn.txt + sort > $dict_dir/lexicon-en/lexicon-en-phn.txt || exit 1; +# convert cmu phoneme to pinyin phonenme mkdir $dict_dir/map -cat conf/cmu2pinyin | awk '{print $1;}' | sort -u > $dict_dir/map/cmu +cat conf/cmu2pinyin | awk '{print $1;}' | sort -u > $dict_dir/map/cmu || exit 1; cat conf/pinyin2cmu | awk -v cmu=$dict_dir/map/cmu \ 'BEGIN{while((getline $dict_dir/map/cmu-used + {for (i = 2; i <=NF; i++) if (dict[$i]) print $i;}' | sort -u > $dict_dir/map/cmu-used || exit 1; cat $dict_dir/map/cmu | awk -v cmu=$dict_dir/map/cmu-used \ 'BEGIN{while((getline $dict_dir/map/cmu-not-used + {if (!dict[$1]) print $1;}' > $dict_dir/map/cmu-not-used || exit 1; -gawk 'NR==FNR{words[$1]; next;} ($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} ($1 in words)' \ $dict_dir/map/cmu-not-used conf/cmu2pinyin |\ - egrep -v '<.?s>' > $dict_dir/map/cmu-py + egrep -v '<.?s>' > $dict_dir/map/cmu-py || exit 1; cat $dict_dir/map/cmu-py | \ perl -e ' @@ -116,9 +108,9 @@ cat $dict_dir/map/cmu-py | \ push(@entry, $W); for($i = 0; $i < @A; $i++) { push(@entry, @{$py2ph{$A[$i]}}); } print "@entry"; - print "\n"; - } -' conf/pinyin2cmu > $dict_dir/map/cmu-cmu + print "\n"; + } +' conf/pinyin2cmu > $dict_dir/map/cmu-cmu || exit 1; cat $dict_dir/lexicon-en/lexicon-en-phn.txt | \ perl -e ' @@ -135,45 +127,46 @@ cat $dict_dir/lexicon-en/lexicon-en-phn.txt | \ @entry = (); $W = shift(@A); push(@entry, $W); - for($i = 0; $i < @A; $i++) { + for($i = 0; $i < @A; $i++) { if (exists $py2ph{$A[$i]}) { push(@entry, @{$py2ph{$A[$i]}}); } else {push(@entry, $A[$i])}; } print "@entry"; - print "\n"; + print "\n"; } -' $dict_dir/map/cmu-cmu > $dict_dir/lexicon-en/lexicon-en.txt +' $dict_dir/map/cmu-cmu > $dict_dir/lexicon-en/lexicon-en.txt || exit 1; -##### produce pronunciations for chinese +##### produce pronunciations for chinese if [ ! -f $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt ]; then + echo "------------- Downloading cedit dictionary ---------------" mkdir -p $dict_dir/cedict - wget -P $dict_dir/cedict http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz + wget -P $dict_dir/cedict http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz gunzip $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz fi cat $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt | grep -v '#' | awk -F '/' '{print $1}' |\ - perl -e ' + perl -e ' while () { @A = split(" ", $_); print $A[1]; for($n = 2; $n < @A; $n++) { - $A[$n] =~ s:\[?([a-zA-Z0-9\:]+)\]?:$1:; - $tmp = uc($A[$n]); + $A[$n] =~ s:\[?([a-zA-Z0-9\:]+)\]?:$1:; + $tmp = uc($A[$n]); print " $tmp"; } print "\n"; } - ' | sort -k1 > $dict_dir/cedict/ch-dict.txt + ' | sort -k1 > $dict_dir/cedict/ch-dict.txt || exit 1; echo "--- Searching for Chinese OOV words ..." -gawk 'NR==FNR{words[$1]; next;} !($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} !($1 in words)' \ $dict_dir/cedict/ch-dict.txt $dict_dir/lexicon-ch/words-ch.txt |\ - egrep -v '<.?s>' > $dict_dir/lexicon-ch/words-ch-oov.txt + egrep -v '<.?s>' > $dict_dir/lexicon-ch/words-ch-oov.txt || exit 1; -gawk 'NR==FNR{words[$1]; next;} ($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} ($1 in words)' \ $dict_dir/lexicon-ch/words-ch.txt $dict_dir/cedict/ch-dict.txt |\ - egrep -v '<.?s>' > $dict_dir/lexicon-ch/lexicon-ch-iv.txt + egrep -v '<.?s>' > $dict_dir/lexicon-ch/lexicon-ch-iv.txt || exit 1; wc -l $dict_dir/lexicon-ch/words-ch-oov.txt wc -l $dict_dir/lexicon-ch/lexicon-ch-iv.txt @@ -187,10 +180,10 @@ cat $dict_dir/cedict/ch-dict.txt |\ while () { @A = split(" ", $_); $word_len = length($A[0]); - $proun_len = @A - 1 ; + $proun_len = @A - 1 ; if ($word_len == $proun_len) {print $_;} } - ' > $dict_dir/cedict/ch-dict-1.txt + ' > $dict_dir/cedict/ch-dict-1.txt || exit 1; # extract chars cat $dict_dir/cedict/ch-dict-1.txt | awk '{print $1}' |\ @@ -203,12 +196,14 @@ cat $dict_dir/cedict/ch-dict-1.txt | awk '{print $1}' |\ print "$_\n"; } } - ' | grep -v '^$' > $dict_dir/lexicon-ch/ch-char.txt + ' | grep -v '^$' > $dict_dir/lexicon-ch/ch-char.txt || exit 1; # extract individual pinyins -cat $dict_dir/cedict/ch-dict-1.txt | awk '{for(i=2; i<=NF; i++) print $i}' | sed -e 's/ /\n/g' > $dict_dir/lexicon-ch/ch-char-pinyin.txt +cat $dict_dir/cedict/ch-dict-1.txt |\ + awk '{for(i=2; i<=NF; i++) print $i}' |\ + sed -e 's/ /\n/g' > $dict_dir/lexicon-ch/ch-char-pinyin.txt || exit 1; -# first make sure number of characters and pinyins +# first make sure number of characters and pinyins # are equal, so that a char-based dictionary can # be composed. nchars=`wc -l < $dict_dir/lexicon-ch/ch-char.txt` @@ -218,12 +213,13 @@ if [ $nchars -ne $npinyin ]; then exit 1 fi -paste $dict_dir/lexicon-ch/ch-char.txt $dict_dir/lexicon-ch/ch-char-pinyin.txt | sort -u > $dict_dir/lexicon-ch/ch-char-dict.txt +paste $dict_dir/lexicon-ch/ch-char.txt $dict_dir/lexicon-ch/ch-char-pinyin.txt |\ + sort -u > $dict_dir/lexicon-ch/ch-char-dict.txt || exit 1; # create a multiple pronunciation dictionary cat $dict_dir/lexicon-ch/ch-char-dict.txt |\ perl -e ' - my $prev = ""; + my $prev = ""; my $out_line = ""; while () { @A = split(" ", $_); @@ -232,14 +228,15 @@ cat $dict_dir/lexicon-ch/ch-char-dict.txt |\ #print length($prev); if (length($prev) == 0) { $out_line = $_; chomp($out_line);} if (length($prev)>0 && $cur ne $prev) { print $out_line; print "\n"; $out_line = $_; chomp($out_line);} - if (length($prev)>0 && $cur eq $prev) { $out_line = $out_line."/"."$cur_py";} + if (length($prev)>0 && $cur eq $prev) { $out_line = $out_line."/"."$cur_py";} $prev = $cur; } - print $out_line; - ' > $dict_dir/lexicon-ch/ch-char-dict-mp.txt + print $out_line; + ' > $dict_dir/lexicon-ch/ch-char-dict-mp.txt || exit 1; # get lexicon for Chinese OOV words -perl local/create_oov_char_lexicon.pl $dict_dir/lexicon-ch/ch-char-dict-mp.txt $dict_dir/lexicon-ch/words-ch-oov.txt > $dict_dir/lexicon-ch/lexicon-ch-oov.txt +local/create_oov_char_lexicon.pl $dict_dir/lexicon-ch/ch-char-dict-mp.txt \ + $dict_dir/lexicon-ch/words-ch-oov.txt > $dict_dir/lexicon-ch/lexicon-ch-oov.txt || exit 1; # seperate multiple prons for Chinese OOV lexicon cat $dict_dir/lexicon-ch/lexicon-ch-oov.txt |\ @@ -249,8 +246,8 @@ cat $dict_dir/lexicon-ch/lexicon-ch-oov.txt |\ while () { @A = split(" ", $_); @entry = (); - push(@entry, $A[0]); - for($i = 1; $i < @A; $i++ ) { + push(@entry, $A[0]); + for($i = 1; $i < @A; $i++ ) { @py = split("/", $A[$i]); @entry1 = @entry; @entry = (); @@ -258,27 +255,27 @@ cat $dict_dir/lexicon-ch/lexicon-ch-oov.txt |\ for ($k = 0; $k < @py; $k++) { $tmp = $entry1[$j]." ".$py[$k]; push(@entry, $tmp); - } - } + } + } } for ($i = 0; $i < @entry; $i++) { - print $entry[$i]; + print $entry[$i]; print "\n"; - } + } } - ' > $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt + ' > $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt || exit 1; # compose IV and OOV lexicons for Chinese cat $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt $dict_dir/lexicon-ch/lexicon-ch-iv.txt |\ - awk '{if (NF > 1 && $2 ~ /[A-Za-z0-9]+/) print $0;}' > $dict_dir/lexicon-ch/lexicon-ch.txt + awk '{if (NF > 1 && $2 ~ /[A-Za-z0-9]+/) print $0;}' > $dict_dir/lexicon-ch/lexicon-ch.txt || exit 1; # convert Chinese pinyin to CMU format cat $dict_dir/lexicon-ch/lexicon-ch.txt | sed -e 's/U:/V/g' | sed -e 's/ R\([0-9]\)/ ER\1/g'|\ - utils/pinyin_map.pl conf/pinyin2cmu > $dict_dir/lexicon-ch/lexicon-ch-cmu.txt + utils/pinyin_map.pl conf/pinyin2cmu > $dict_dir/lexicon-ch/lexicon-ch-cmu.txt || exit 1; # combine English and Chinese lexicons cat $dict_dir/lexicon-en/lexicon-en.txt $dict_dir/lexicon-ch/lexicon-ch-cmu.txt |\ - sort -u > $dict_dir/lexicon1.txt + sort -u > $dict_dir/lexicon1.txt || exit 1; cat $dict_dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}'| \ sort -u |\ @@ -287,8 +284,8 @@ cat $dict_dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{f while () { $phone = $_; chomp($phone); - chomp($_); - $phone =~ s:([A-Z]+)[0-9]:$1:; + chomp($_); + $phone =~ s:([A-Z]+)[0-9]:$1:; if (exists $ph_cl{$phone}) { push(@{$ph_cl{$phone}}, $_) } else { $ph_cl{$phone} = [$_]; } } @@ -314,4 +311,5 @@ cat $dict_dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", echo ' SPN' ) | \ cat - $dict_dir/lexicon1.txt > $dict_dir/lexicon.txt || exit 1; -exit 1; +echo "$0: HKUST dict preparation succeeded" +exit 0; diff --git a/egs/hkust/s5/local/hkust_segment.py b/egs/hkust/s5/local/hkust_segment.py index dff335fc10a..ba5ffc053d5 100755 --- a/egs/hkust/s5/local/hkust_segment.py +++ b/egs/hkust/s5/local/hkust_segment.py @@ -1,7 +1,8 @@ #!/usr/bin/env python #coding:utf-8 -#!/usr/bin/env python + import sys +from __future__ import print_function from mmseg import seg_txt for line in sys.stdin: blks = str.split(line) @@ -12,4 +13,4 @@ continue for j in seg_txt(blks[i]): out_line += " " + j - print out_line + print(out_line) diff --git a/egs/hkust/s5/local/hkust_train_lms.sh b/egs/hkust/s5/local/hkust_train_lms.sh index 4362bdd708f..8520bb26d2d 100755 --- a/egs/hkust/s5/local/hkust_train_lms.sh +++ b/egs/hkust/s5/local/hkust_train_lms.sh @@ -5,44 +5,33 @@ text=data/local/train/text -lexicon=data/local/dict/lexicon.txt +lexicon=data/local/dict/lexicon.txt for f in "$text" "$lexicon"; do [ ! -f $x ] && echo "$0: No such file $f" && exit 1; done # This script takes no arguments. It assumes you have already run -# swbd_p1_data_prep.sh. +# swbd_p1_data_prep.sh. # It takes as input the files #data/local/train/text #data/local/dict/lexicon.txt dir=data/local/lm mkdir -p $dir -export LC_ALL=C # You'll get errors about things being not sorted, if you -# have a different locale. -export PATH=$PATH:`pwd`/../../../tools/kaldi_lm -( # First make sure the kaldi_lm toolkit is installed. - cd ../../../tools || exit 1; - if [ -d kaldi_lm ]; then - echo Not installing the kaldi_lm toolkit since it is already there. - else - echo Downloading and installing the kaldi_lm tools - if [ ! -f kaldi_lm.tar.gz ]; then - wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; - fi - tar -xvzf kaldi_lm.tar.gz || exit 1; - cd kaldi_lm - make || exit 1; - echo Done making the kaldi_lm tools - fi -) || exit 1; - -mkdir -p $dir +export LC_ALL=C # You'll get errors about things being not sorted, if you + # have a different locale. +kaldi_lm=`which train_lm.sh` +if [ ! -x $kaldi_lm ]; then + echo "$0: train_lm.sh is not found. That might mean it's not installed" + echo "$0: or it is not added to PATH" + echo "$0: Use the script tools/extra/install_kaldi_lm.sh to install it" + exit 1 +fi cleantext=$dir/text.no_oov -cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } +cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ > $cleantext || exit 1; @@ -73,7 +62,7 @@ train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; # Perplexity over 128254.000000 words is 90.446690 # note: output is -# data/local/lm/3gram-mincount/lm_unpruned.gz +# data/local/lm/3gram-mincount/lm_unpruned.gz exit 0 @@ -94,10 +83,10 @@ cat $dir/word_map | awk '{print $1}' | cat - <(echo ""; echo "" ) > $sdir ngram-count -text $sdir/train -order 3 -limit-vocab -vocab $sdir/wordlist -unk \ -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz -ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout +ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout # 0 zeroprobs, logprob= -250954 ppl= 90.5091 ppl1= 132.482 # Note: perplexity SRILM gives to Kaldi-LM model is same as kaldi-lm reports above. # Difference in WSJ must have been due to different treatment of . -ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout +ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout # 0 zeroprobs, logprob= -250913 ppl= 90.4439 ppl1= 132.379 diff --git a/egs/hkust/s5/local/nnet3/run_ivector_common.sh b/egs/hkust/s5/local/nnet3/run_ivector_common.sh index 046f723ca1e..2ef33e43081 100755 --- a/egs/hkust/s5/local/nnet3/run_ivector_common.sh +++ b/egs/hkust/s5/local/nnet3/run_ivector_common.sh @@ -1,5 +1,7 @@ #!/bin/bash +# This script is modified based on swbd/s5c/local/nnet3/run_ivector_common.sh + # this script contains some common (shared) parts of the run_nnet*.sh scripts. . cmd.sh @@ -7,31 +9,25 @@ stage=0 num_threads_ubm=32 -speed_perturb=true -use_sat_alignments=true +ivector_extractor= set -e . cmd.sh . ./path.sh . ./utils/parse_options.sh -if [ "$use_sat_alignments" == "true" ] ; then - gmm_dir=exp/tri5a - align_script=steps/align_fmllr.sh -else - gmm_dir=exp/tri3a - align_script=steps/align_si.sh -fi +gmm_dir=exp/tri5a +align_script=steps/align_fmllr.sh -if [ $stage -le 1 ]; then - # Create high-resolution MFCC features (with 40 cepstra instead of 13). +if [ $stage -le 1 ] && [ -z $ivector_extractor ]; then + # Create high-resolution MFCC features (with 40 cepstra instead of 13) with pitch. # this shows how you can split across multiple file-systems. we'll split the # MFCC dir across multiple locations. You might want to be careful here, if you # have multiple copies of Kaldi checked out and run the same recipe, not to let # them overwrite each other. mfccdir=mfcc_hires if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage fi for datadir in train dev; do @@ -51,38 +47,42 @@ for line in sys.stdin.readlines(): mv $dir/wav.scp_scaled $dir/wav.scp fi - steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; + + # make MFCC data dir without pitch to extract iVector + utils/data/limit_feature_dim.sh 0:39 data/${datadir}_hires data/${datadir}_hires_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires_nopitch exp/make_hires/$datadir $mfccdir || exit 1; done fi -if [ $stage -le 2 ]; then +if [ $stage -le 2 ] && [ -z $ivector_extractor ]; then # Train a system just for its LDA+MLLT transform. We use --num-iters 13 # because after we get the transform (12th iter is the last), any further # training is pointless. steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ --realign-iters "" \ --splice-opts "--left-context=3 --right-context=3" \ - 5000 10000 data/train_hires data/lang \ + 5000 10000 data/train_hires_nopitch data/lang \ ${gmm_dir}_ali exp/nnet3/tri5 fi -if [ $stage -le 3 ]; then +if [ $stage -le 3 ] && [ -z $ivector_extractor ]; then steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \ --num-frames 700000 \ - --num-threads $num_threads_ubm \ - data/train_hires 512 exp/nnet3/tri5 exp/nnet3/diag_ubm + data/train_hires_nopitch 512 exp/nnet3/tri5 exp/nnet3/diag_ubm fi -if [ $stage -le 4 ]; then +if [ $stage -le 4 ] && [ -z $ivector_extractor ]; then # iVector extractors can in general be sensitive to the amount of data, but # this one has a fairly small dim (defaults to 100) steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ - data/train_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; + data/train_hires_nopitch exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; + ivector_extractor=exp/nnet3/extractor fi -if [ $stage -le 5 ] && [ "$speed_perturb" == "true" ]; then +if [ $stage -le 5 ]; then # Although the nnet will be trained by high resolution data, # we still have to perturbe the normal data to get the alignment # _sp stands for speed-perturbed @@ -94,7 +94,7 @@ if [ $stage -le 5 ] && [ "$speed_perturb" == "true" ]; then mfccdir=mfcc_perturbed for x in train_sp; do - steps/make_mfcc.sh --cmd "$train_cmd" --nj 70 \ + steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 70 \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done @@ -103,28 +103,31 @@ if [ $stage -le 5 ] && [ "$speed_perturb" == "true" ]; then $align_script --nj 30 --cmd "$train_cmd" \ data/train_sp data/lang $gmm_dir ${gmm_dir}_sp_ali || exit 1 - # Now perturb the high resolution daa + # Now perturb the high resolution data utils/copy_data_dir.sh data/train_sp data/train_sp_hires mfccdir=mfcc_perturbed_hires for x in train_sp_hires; do - steps/make_mfcc.sh --cmd "$train_cmd" --nj 70 --mfcc-config conf/mfcc_hires.conf \ + steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 70 --mfcc-config conf/mfcc_hires.conf \ data/$x exp/make_hires/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_hires/$x $mfccdir || exit 1; + # create MFCC data dir without pitch to extract iVector + utils/data/limit_feature_dim.sh 0:39 data/$x data/${x}_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/${x}_nopitch exp/make_hires/$x $mfccdir || exit 1; done utils/fix_data_dir.sh data/train_sp_hires fi -if [ "$speed_perturb" == "true" ]; then - train_set=train_sp -else - train_set=train +train_set=train_sp +if [ -z $ivector_extractor ]; then + echo "iVector extractor is not found!" + exit 1; fi if [ $stage -le 6 ]; then rm -f exp/nnet3/.error 2>/dev/null - ivectordir=exp/nnet3/ivectors_${train_set}_hires + ivectordir=exp/nnet3/ivectors_${train_set} if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then - utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage fi # We extract iVectors on all the train data, which will be what we train the # system on. With --utts-per-spk-max 2, the script. pairs the utterances @@ -133,11 +136,10 @@ if [ $stage -le 6 ]; then # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). - steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_hires_max2 + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires_nopitch data/${train_set}_hires_nopitch_max2 steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ - data/${train_set}_hires_max2 \ - exp/nnet3/extractor \ - exp/nnet3/ivectors_${train_set}_hires \ + data/${train_set}_hires_nopitch_max2 \ + $ivector_extractor $ivectordir \ || touch exp/nnet3/.error [ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1; fi @@ -145,7 +147,7 @@ fi if [ $stage -le 7 ]; then rm -f exp/nnet3/.error 2>/dev/null steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 8 \ - data/dev_hires exp/nnet3/extractor exp/nnet3/ivectors_dev || touch exp/nnet3/.error & + data/dev_hires_nopitch $ivector_extractor exp/nnet3/ivectors_dev || touch exp/nnet3/.error & wait [ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1; fi diff --git a/egs/hkust/s5/local/nnet3/run_tdnn.sh b/egs/hkust/s5/local/nnet3/run_tdnn.sh index 11f12ccf394..30611446ee4 100755 --- a/egs/hkust/s5/local/nnet3/run_tdnn.sh +++ b/egs/hkust/s5/local/nnet3/run_tdnn.sh @@ -1,5 +1,7 @@ #!/bin/bash +# This script is based on swbd/s5c/local/nnet3/run_tdnn.sh + # this is the standard "tdnn" system, built in nnet3; it's what we use to # call multi-splice. @@ -10,9 +12,7 @@ set -e stage=0 train_stage=-10 -use_sat_alignments=true affix= -speed_perturb=true common_egs_dir= # training options @@ -33,81 +33,102 @@ use_ivectors=true . ./utils/parse_options.sh if ! cuda-compiled; then - cat < $data_dir/wav.scp_scaled || exit 1; + mv $data_dir/wav.scp_scaled $data_dir/wav.scp + steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/train_scaled_hires exp/make_hires/train_scaled $mfccdir; + steps/compute_cmvn_stats.sh data/train_scaled_hires exp/make_hires/train_scaled $mfccdir; + + # we need these features for the run_nnet2_ms.sh + steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/train_hires exp/make_hires/train $mfccdir; + steps/compute_cmvn_stats.sh data/train_hires exp/make_hires/train $mfccdir; + + # Remove the small number of utterances that couldn't be extracted for some + # reason (e.g. too short; no such file). + utils/fix_data_dir.sh data/train_scaled_hires; + utils/fix_data_dir.sh data/train_hires; + + # Create MFCC+pitchs for the dev set + utils/copy_data_dir.sh data/dev data/dev_hires + steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \ + data/dev_hires exp/make_hires/dev $mfccdir; + steps/compute_cmvn_stats.sh data/dev_hires exp/make_hires/dev $mfccdir; + utils/fix_data_dir.sh data/dev_hires # remove segments with problems + + # Take the MFCCs for training iVector extractors + utils/data/limit_feature_dim.sh 0:39 data/train_scaled_hires data/train_scaled_hires_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/train_scaled_hires_nopitch exp/make_hires/train $mfccdir || exit 1; + utils/data/limit_feature_dim.sh 0:39 data/train_hires data/train_hires_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/train_hires_nopitch exp/make_hires/train $mfccdir || exit 1; + utils/data/limit_feature_dim.sh 0:39 data/dev_hires data/dev_hires_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/dev_hires_nopitch exp/make_hires/dev $mfccdir || exit 1; + + # Take the first 30k utterances (about 1/5th of the data) this will be used + # for the diagubm training + utils/subset_data_dir.sh --first data/train_scaled_hires_nopitch 30000 data/train_scaled_hires_30k + + # create a 100k subset for the lda+mllt training + utils/subset_data_dir.sh --first data/train_scaled_hires_nopitch 100000 data/train_scaled_hires_100k; +fi + +if [ $stage -le 2 ]; then + # We need to build a small system just because we need the LDA+MLLT transform + # to train the diag-UBM on top of. We use --num-iters 13 because after we get + # the transform (12th iter is the last), any further training is pointless. + # this decision is based on fisher_english + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ + --splice-opts "--left-context=3 --right-context=3" \ + 5500 90000 data/train_scaled_hires_100k \ + data/lang exp/tri2_ali_100k exp/nnet2_online/tri3b +fi + +if [ $stage -le 3 ]; then + # To train a diagonal UBM we don't need very much data, so use the smallest subset. + steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \ + data/train_scaled_hires_30k 512 exp/nnet2_online/tri3b exp/nnet2_online/diag_ubm +fi + +if [ $stage -le 4 ]; then + # iVector extractors can be sensitive to the amount of data, but this one has a + # fairly small dim (defaults to 100) so we don't use all of it, we use just the + # 100k subset (just under half the data). + steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ + data/train_scaled_hires_100k exp/nnet2_online/diag_ubm exp/nnet2_online/extractor || exit 1; +fi + +if [ $stage -le 5 ]; then + # We extract iVectors on all the train_nodup data, which will be what we + # train the system on. + + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_hires_nopitch data/train_hires_nopitch_max2 + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ + data/train_hires_nopitch_max2 exp/nnet2_online/extractor exp/nnet2_online/ivectors_train || exit 1; +fi + + +exit 0; diff --git a/egs/hkust/s5/local/online/run_nnet2_ms.sh b/egs/hkust/s5/local/online/run_nnet2_ms.sh new file mode 100755 index 00000000000..c3177e1136e --- /dev/null +++ b/egs/hkust/s5/local/online/run_nnet2_ms.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +. ./cmd.sh +set -e +stage=1 +train_stage=-10 +use_gpu=true +splice_indexes="layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3" +common_egs_dir= +dir=exp/nnet2_online/nnet_ms + +. ./path.sh +. ./utils/parse_options.sh + +if $use_gpu; then + if ! cuda-compiled; then + cat < " && exit; - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 - -if [ -f $data/stm ]; then # use sclite scoring. - echo "$data/stm exists: using local/score_sclite.sh" - eval local/score_sclite.sh $orig_args -else - echo "$data/stm does not exist: using local/score_basic.sh" - eval local/score_basic.sh $orig_args -fi +echo "$0: Done" diff --git a/egs/hkust/s5/local/score_basic.sh b/egs/hkust/s5/local/score_basic.sh deleted file mode 100755 index e54537654be..00000000000 --- a/egs/hkust/s5/local/score_basic.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. -# 2016 LeSpeech (Author: Xingyu Na) - -# begin configuration section. -cmd=run.pl -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_basic.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/text $lang/words.txt $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - - -function filter_text { - perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; } - while() { @A = split(" ", $_); $id = shift @A; print "$id "; - foreach $a (@A) { if (!defined $bad{$a}) { print "$a "; }} print "\n"; }' \ - '[NOISE]' '[LAUGHTER]' '[VOCALIZED-NOISE]' '' '%HESITATION' -} - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ - lattice-best-path --lm-scale=LMWT --word-symbol-table=$lang/words.txt \ - "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/LMWT.tra || exit 1; - -for lmwt in `seq $min_lmwt $max_lmwt`; do - utils/int2sym.pl -f 2- $lang/words.txt <$dir/scoring/$lmwt.tra | \ - filter_text > $dir/scoring/$lmwt.txt || exit 1; -done - -filter_text <$data/text >$dir/scoring/text.filt - -#for character error rate -cat $dir/scoring/text.filt | awk '{ print $1}' > $dir/scoring/utt_id -cat $dir/scoring/text.filt | awk '{{for (i = 2; i <= NF; i++) printf(" %s", $i);} printf("\n"); }' |\ - perl -e ' - use encoding utf8; - while () { - @words = split(" ", $_); - foreach (@words) { - @chars = split("", $_); - foreach (@chars) { - print "$_ "; - } - } - print "\n"; - } - ' > $dir/scoring/utt_tra -paste $dir/scoring/utt_id $dir/scoring/utt_tra > $dir/scoring/char.filt - -for lmwt in `seq $min_lmwt $max_lmwt`; do - cat $dir/scoring/$lmwt.txt | awk '{ print $1}' > $dir/scoring/utt_id - cat $dir/scoring/$lmwt.txt | awk '{{for (i = 2; i <= NF; i++) printf(" %s", $i);} printf("\n"); }' |\ - perl -e ' - use encoding utf8; - while () { - @words = split(" ", $_); - foreach (@words) { - @chars = split("", $_); - foreach (@chars) { - print "$_ "; - } - } - print "\n"; - } - ' > $dir/scoring/utt_tra - paste $dir/scoring/utt_id $dir/scoring/utt_tra > $dir/scoring/${lmwt}.char -done - -rm $dir/scoring/utt_tra $dir/scoring/utt_id - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - compute-wer --text --mode=present \ - ark:$dir/scoring/text.filt ark:$dir/scoring/LMWT.txt ">&" $dir/wer_LMWT || exit 1; - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.cer.log \ - compute-wer --text --mode=present \ - ark:$dir/scoring/char.filt ark:$dir/scoring/LMWT.char ">&" $dir/cer_LMWT || exit 1; - -exit 0 diff --git a/egs/hkust/s5/local/score_sclite.sh b/egs/hkust/s5/local/score_sclite.sh deleted file mode 100755 index c1062440494..00000000000 --- a/egs/hkust/s5/local/score_sclite.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -min_lmwt=7 -max_lmwt=17 -iter=final -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_sclite.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../${iter}.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -if [ $stage -le 0 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ - nbest-to-ctm ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 1 ]; then -# Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $dir/tmpf; - cat $dir/tmpf | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION' > $x; - done -fi - -if [ $stage -le 2 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cp $data/stm $dir/score_LMWT/ '&&' \ - $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm || exit 1; -fi - -exit 0 diff --git a/egs/hkust/s5/local/score_sclite_conf.sh b/egs/hkust/s5/local/score_sclite_conf.sh deleted file mode 100755 index a6a2759629d..00000000000 --- a/egs/hkust/s5/local/score_sclite_conf.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -decode_mbr=true -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_sclite_conf.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -if [ $stage -le 0 ]; then - # the escaping gets a bit crazy here, sorry... - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - ACWT=\`perl -e \"print 1.0/LMWT\;\"\` '&&' \ - lattice-align-words $lang/phones/word_boundary.int $model "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr --acoustic-scale=\$ACWT ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 1 ]; then -# Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $dir/tmpf; - cat $dir/tmpf | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION' > $x; - done -fi - -if [ $stage -le 2 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cp $data/stm $dir/score_LMWT/ '&&' \ - $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm || exit 1; -fi - -exit 0 diff --git a/egs/hkust/s5/local/wer_hyp_filter b/egs/hkust/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..a1bfdb57efc --- /dev/null +++ b/egs/hkust/s5/local/wer_hyp_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=('[NOISE]','[LAUGHTER]','[VOCALIZED-NOISE]','','%HESITATION'); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} diff --git a/egs/hkust/s5/local/wer_output_filter b/egs/hkust/s5/local/wer_output_filter new file mode 100755 index 00000000000..aceeeec41b4 --- /dev/null +++ b/egs/hkust/s5/local/wer_output_filter @@ -0,0 +1,25 @@ +#!/usr/bin/env perl +# Copyright 2012-2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 +use utf8; + +use open qw(:encoding(utf8)); +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +while (<>) { + @F = split " "; + print $F[0] . " "; + foreach $s (@F[1..$#F]) { + if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "!SIL")) { + print ""; + } else { + print "$s" + } + print " "; + } + print "\n"; +} + + diff --git a/egs/hkust/s5/local/wer_ref_filter b/egs/hkust/s5/local/wer_ref_filter new file mode 100755 index 00000000000..a1bfdb57efc --- /dev/null +++ b/egs/hkust/s5/local/wer_ref_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=('[NOISE]','[LAUGHTER]','[VOCALIZED-NOISE]','','%HESITATION'); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} diff --git a/egs/hkust/s5/path.sh b/egs/hkust/s5/path.sh index 5adfbeec7c2..2d17b17a84a 100755 --- a/egs/hkust/s5/path.sh +++ b/egs/hkust/s5/path.sh @@ -1,5 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. -#export KALDI_ROOT=/home/dpovey/kaldi-trunk-test +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh diff --git a/egs/hkust/s5/run.sh b/egs/hkust/s5/run.sh index bdd3e7797e8..0129dd45df2 100755 --- a/egs/hkust/s5/run.sh +++ b/egs/hkust/s5/run.sh @@ -1,9 +1,9 @@ #!/bin/bash -# Copyright 2012 Chao Weng +# Copyright 2012 Chao Weng +# 2016 Alibaba Robotics Corp. (Author: Xingyu Na) # Apache 2.0 -#exit 1; # This is a shell script, but it's recommended that you run the commands one by # one by copying and pasting into the shell. # Caution: some of the graph creation steps use quite a bit of memory, so you @@ -11,16 +11,13 @@ . cmd.sh -# Data Preparation, +# Data Preparation, local/hkust_data_prep.sh /export/corpora/LDC/LDC2005S15/ /export/corpora/LDC/LDC2005T32/ # Lexicon Preparation, -local/hkust_prepare_dict.sh +local/hkust_prepare_dict.sh || exit 1; - - - -# Phone Sets, questions, L compilation +# Phone Sets, questions, L compilation utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang # LM training @@ -29,35 +26,27 @@ local/hkust_train_lms.sh # G compilation, check LG composition local/hkust_format_data.sh -# Now make MFCC features. +# Now make MFCC plus pitch features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. mfccdir=mfcc -for x in train dev; do - steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/$x exp/make_mfcc/$x $mfccdir || exit 1; +for x in train dev; do + steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 10 data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done # after this, the next command will remove the small number of utterances # that couldn't be extracted for some reason (e.g. too short; no such file). utils/fix_data_dir.sh data/train || exit 1; +utils/subset_data_dir.sh --first data/train 100000 data/train_100k || exit 1; steps/train_mono.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/mono0a || exit 1; - # Monophone decoding utils/mkgraph.sh data/lang_test exp/mono0a exp/mono0a/graph || exit 1 -# note: local/decode.sh calls the command line once for each -# test, and afterwards averages the WERs into (in this case -# exp/mono/decode/ - - - steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj 10 \ exp/mono0a/graph data/dev exp/mono0a/decode - - # Get alignments from monophone system. steps/align_si.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/mono0a exp/mono_ali || exit 1; @@ -71,8 +60,6 @@ utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1; steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj 10 \ exp/tri1/graph data/dev exp/tri1/decode - - # align tri1 steps/align_si.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/tri1 exp/tri1_ali || exit 1; @@ -91,7 +78,10 @@ steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj 10 \ steps/align_si.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/tri2 exp/tri2_ali || exit 1; -# Train tri3a, which is LDA+MLLT, +steps/align_si.sh --cmd "$train_cmd" --nj 10 \ + data/train_100k data/lang exp/tri2 exp/tri2_ali_100k || exit 1; + +# Train tri3a, which is LDA+MLLT, steps/train_lda_mllt.sh --cmd "$train_cmd" \ 2500 20000 data/train data/lang exp/tri2_ali exp/tri3a || exit 1; @@ -123,51 +113,28 @@ utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph || exit 1; steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 10 --config conf/decode.config \ exp/tri5a/graph data/dev exp/tri5a/decode || exit 1; - -# MMI starting from system in tri5a. Use the same data (100k_nodup). -# Later we'll use all of it. steps/align_fmllr.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/tri5a exp/tri5a_ali || exit 1; -steps/make_denlats.sh --cmd "$train_cmd" --nj 10 --transform-dir exp/tri5a_ali \ - --config conf/decode.config \ - data/train data/lang exp/tri5a exp/tri5a_denlats || exit 1; -steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \ - data/train data/lang exp/tri5a_ali exp/tri5a_denlats exp/tri5a_mmi_b0.1 || exit 1; -steps/decode.sh --cmd "$decode_cmd" --nj 10 --config conf/decode.config \ - --transform-dir exp/tri5a/decode \ - exp/tri5a/graph data/dev exp/tri5a_mmi_b0.1/decode || exit 1 ; - -# Do MPE. -steps/train_mpe.sh --cmd "$train_cmd" data/train data/lang exp/tri5a_ali exp/tri5a_denlats exp/tri5a_mpe || exit 1; - -steps/decode.sh --cmd "$decode_cmd" --nj 10 --config conf/decode.config \ - --transform-dir exp/tri5a/decode \ - exp/tri5a/graph data/dev exp/tri5a_mpe/decode || exit 1 ; +# discriminative training +# local/run_discriminative.sh # SGMM system [sgmm5a] -steps/train_ubm.sh --cmd "$train_cmd" \ - 900 data/train data/lang exp/tri5a_ali exp/ubm5a || exit 1; +# local/run_sgmm.sh -steps/train_sgmm2.sh --cmd "$train_cmd" \ - 14000 35000 data/train data/lang exp/tri5a_ali \ - exp/ubm5a/final.ubm exp/sgmm2_5a || exit 1; +# nnet1 dnn +# local/nnet/run_dnn.sh -utils/mkgraph.sh data/lang_test exp/sgmm2_5a exp/sgmm2_5a/graph || exit 1; -steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --config conf/decode.config \ - --transform-dir exp/tri5a/decode \ - exp/sgmm2_5a/graph data/dev exp/sgmm2_5a/decode || exit 1; +# online nnet2 +local/online/run_nnet2_ms.sh -# nnet1 dnn -local/nnet/run_dnn.sh +# online nnet3 +local/nnet3/run_tdnn.sh -# nnet2 -local/nnet2/run_5d.sh -local/nnet2/run_convnet.sh +# online chain +local/chain/run_tdnn.sh # getting results (see RESULTS file) -for x in exp/*/decode; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null for x in exp/*/decode; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done 2>/dev/null -exit 1; - +exit 0; diff --git a/egs/iban/s5/local/prepare_lm.sh b/egs/iban/s5/local/prepare_lm.sh index a19dc18f566..10d5e276aa3 100755 --- a/egs/iban/s5/local/prepare_lm.sh +++ b/egs/iban/s5/local/prepare_lm.sh @@ -10,7 +10,7 @@ set -e -o pipefail local/train_lms_srilm.sh --train-text data/train/text data/ data/srilm -nl -nrz -w10 corpus/LM/iban-bp-2012.txt | sort -R > data/local/external_text +nl -nrz -w10 corpus/LM/iban-bp-2012.txt | utils/shuffle_list.pl > data/local/external_text local/train_lms_srilm.sh --train-text data/local/external_text data/ data/srilm_external # let's do ngram interpolation of the previous two LMs @@ -21,7 +21,7 @@ for w in 0.9 0.8 0.7 0.6 0.5; do ngram -lm data/srilm/lm.gz -mix-lm data/srilm_external/lm.gz \ -lambda $w -write-lm data/srilm_interp/lm.${w}.gz echo -n "data/srilm_interp/lm.${w}.gz " - ngram -lm data/srilm_interp/lm.${w}.gz -ppl data/srilm/dev.txt | paste -s + ngram -lm data/srilm_interp/lm.${w}.gz -ppl data/srilm/dev.txt | paste -s - done | sort -k15,15g > data/srilm_interp/perplexities.txt # for basic decoding, let's use only a trigram LM diff --git a/egs/iban/s5/local/train_lms_srilm.sh b/egs/iban/s5/local/train_lms_srilm.sh index 9ed88842650..f72596e750a 100755 --- a/egs/iban/s5/local/train_lms_srilm.sh +++ b/egs/iban/s5/local/train_lms_srilm.sh @@ -206,9 +206,9 @@ echo "--------------------" echo "Computing perplexity" echo "--------------------" ( - for f in $tgtdir/2gram* ; do ( echo $f; ngram -order 2 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/2gram* ; do ( echo $f; ngram -order 2 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done ) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " diff --git a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh index bda883f16c2..aeb0a7164e2 100755 --- a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh +++ b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh @@ -10,7 +10,7 @@ set -e # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# stage=0 @@ -44,7 +44,6 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.000001 @@ -59,8 +58,8 @@ decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we deci if $use_gpu; then if ! cuda-compiled; then - cat </dev/null || true data_dirs= - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \ $x $train_data_dir exp/shift_hires mfcc_hires utils/fix_data_dir.sh ${train_data_dir}_fs$x @@ -103,7 +102,7 @@ if [ $frame_subsampling_factor -ne 1 ]; then awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp done utils/combine_data.sh ${train_data_dir}_fs $data_dirs - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do rm -r ${train_data_dir}_fs$x done fi @@ -112,9 +111,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then affix=_fs fi - + rm ${train_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true -for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do +for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp done train_ivector_dir=${train_ivector_dir}_fs @@ -133,7 +132,7 @@ fi if [ -z "$lats_dir" ]; then lats_dir=${srcdir}_denlats${affix} if [ $stage -le 2 ]; then - nj=50 + nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of # the phases of get_egs_discriminative.sh below. num_threads_denlats=6 @@ -147,16 +146,13 @@ if [ -z "$lats_dir" ]; then fi fi -model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` -model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` +model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` +model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` left_context=$[model_left_context + extra_left_context] right_context=$[model_right_context + extra_right_context] -valid_left_context=$[valid_left_context + frames_per_eg] -valid_right_context=$[valid_right_context + frames_per_eg] - -cmvn_opts=`cat $srcdir/cmvn_opts` +cmvn_opts=`cat $srcdir/cmvn_opts` if [ -z "$degs_dir" ]; then degs_dir=${srcdir}_degs${affix} @@ -169,16 +165,13 @@ if [ -z "$degs_dir" ]; then # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi - degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true" - steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ --adjust-priors false --acwt 1.0 \ --online-ivector-dir $train_ivector_dir \ --left-context $left_context --right-context $right_context \ - --valid-left-context $valid_left_context --valid-right-context $valid_right_context \ - --priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \ - --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ + $frame_subsampling_opt \ + --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ $train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ; fi fi @@ -191,7 +184,7 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" --use-frame-shift false \ - --truncate-deriv-weights $truncate_deriv_weights --adjust-priors false \ + --adjust-priors false \ ${degs_dir} $dir ; fi @@ -202,7 +195,7 @@ if [ $stage -le 5 ]; then ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` iter=epoch$[x*frame_subsampling_factor] - + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ @@ -219,7 +212,7 @@ if [ $stage -le 5 ]; then done done wait - [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 fi if [ $stage -le 6 ] && $cleanup; then @@ -231,4 +224,3 @@ fi exit 0; - diff --git a/egs/librispeech/s5/local/data_prep.sh b/egs/librispeech/s5/local/data_prep.sh index 5a264a07464..dea93525e28 100755 --- a/egs/librispeech/s5/local/data_prep.sh +++ b/egs/librispeech/s5/local/data_prep.sh @@ -33,7 +33,7 @@ utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender utt2dur=$dst/utt2dur; [[ -f "$utt2dur" ]] && rm $utt2dur -for reader_dir in $(find $src -mindepth 1 -maxdepth 1 -type d | sort); do +for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do reader=$(basename $reader_dir) if ! [ $reader -eq $reader ]; then # not integer. echo "$0: unexpected subdirectory name $reader" @@ -53,7 +53,7 @@ for reader_dir in $(find $src -mindepth 1 -maxdepth 1 -type d | sort); do exit 1; fi - find $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \ + find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \ awk -v "dir=$chapter_dir" '{printf "%s flac -c -d -s %s/%s.flac |\n", $0, dir, $0}' >>$wav_scp|| exit 1 chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt diff --git a/egs/librispeech/s5/local/nnet2/run_5c.sh b/egs/librispeech/s5/local/nnet2/run_5c.sh index bf261b93910..956a8f09348 100755 --- a/egs/librispeech/s5/local/nnet2/run_5c.sh +++ b/egs/librispeech/s5/local/nnet2/run_5c.sh @@ -1,7 +1,7 @@ #!/bin/bash # This is neural net training on top of adapted 40-dimensional features. -# +# train_stage=-10 use_gpu=true @@ -16,8 +16,8 @@ test_sets="dev-clean dev-other" if $use_gpu; then if ! cuda-compiled; then - cat < # stage to do partial re-run from." echo " --num-gselect # Number of Gaussians per frame to" echo " # limit computation to, for speed" - echo " --subsample # In main E-M phase, use every n" + echo " --subsample # In main E-M phase, use every n" echo " # frames (a speedup)" echo " --num-frames # Maximum num-frames to keep in memory" echo " # for model initialization" @@ -59,7 +59,7 @@ if [ $# != 3 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" + echo " --parallel-opts # Option should match number of threads in" echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" diff --git a/egs/lre/v1/lid/train_ivector_extractor.sh b/egs/lre/v1/lid/train_ivector_extractor.sh index 8e238985f99..18f536a60cb 100755 --- a/egs/lre/v1/lid/train_ivector_extractor.sh +++ b/egs/lre/v1/lid/train_ivector_extractor.sh @@ -13,7 +13,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -24,8 +24,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -84,7 +84,7 @@ nj_full=$[$nj*$num_processes] sdata=$data/split$nj_full; utils/split_data.sh $data $nj_full || exit 1; -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 scp:$sdata/JOB/feats.scp ark:- | add-deltas-sdc ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -97,7 +97,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -146,7 +146,7 @@ while [ $x -lt $num_iters ]; do nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ + $cmd --num-threads $nt $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then diff --git a/egs/lre/v1/run.sh b/egs/lre/v1/run.sh index 740fad7aceb..bc0f8db572d 100755 --- a/egs/lre/v1/run.sh +++ b/egs/lre/v1/run.sh @@ -50,9 +50,9 @@ rm foo local/split_long_utts.sh --max-utt-len 120 data/train_unsplit data/train # This commented script is an alternative to the above utterance -# splitting method. Here we split the utterance based on the number of +# splitting method. Here we split the utterance based on the number of # frames which are voiced, rather than the total number of frames. -# max_voiced=3000 +# max_voiced=3000 # local/vad_split_utts.sh --max-voiced $max_voiced data/train_unsplit $mfccdir data/train use_vtln=true @@ -61,7 +61,7 @@ if $use_vtln; then cp -rt data/${t} data/${t}_novtln rm -r data/${t}_novtln/{split,.backup,spk2warp} 2>/dev/null || true steps/make_mfcc.sh --mfcc-config conf/mfcc_vtln.conf --nj 100 --cmd "$train_cmd" \ - data/${t}_novtln exp/make_mfcc $mfccdir + data/${t}_novtln exp/make_mfcc $mfccdir lid/compute_vad_decision.sh data/${t}_novtln exp/make_mfcc $mfccdir done # Vtln-related things: @@ -115,7 +115,7 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd" data/train \ # Alternatively, a diagonal UBM can replace the full UBM used above. # The preceding calls to train_diag_ubm.sh and train_full_ubm.sh # can be commented out and replaced with the following lines. -# +# # This results in a slight degradation but could improve error rate when # there is less training data than used in this example. # @@ -125,12 +125,12 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd" data/train \ #gmm-global-to-fgmm exp/diag_ubm_2048/final.dubm \ # exp/full_ubm_2048/final.ubm -lid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \ +lid/train_ivector_extractor.sh --cmd "$train_cmd --mem 2G" \ --num-iters 5 exp/full_ubm_2048/final.ubm data/train \ exp/extractor_2048 -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/train exp/ivectors_train -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/lre07 exp/ivectors_lre07 diff --git a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh index 972348766b5..aeced4fb273 100755 --- a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh +++ b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh @@ -12,6 +12,7 @@ nj=40 cmd="run.pl" stage=-2 +cleanup=true # End configuration section. @@ -77,4 +78,11 @@ $cmd $dir/log/init.log \ "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \ $dir/final.ubm || exit 1; +if $cleanup; then + echo "$0: removing stats" + for g in $(seq $nj); do + rm $dir/stats.$g.acc || exit 1 + done +fi + exit 0; diff --git a/egs/lre07/v1/lid/nnet2/get_egs2.sh b/egs/lre07/v1/lid/nnet2/get_egs2.sh index 27cf82bd1a1..7806dce4894 100755 --- a/egs/lre07/v1/lid/nnet2/get_egs2.sh +++ b/egs/lre07/v1/lid/nnet2/get_egs2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2015 David Snyder # Apache 2.0. # @@ -54,7 +54,7 @@ transform_dir= # If supplied, overrides alidir as the place to find fMLLR tr postdir= # If supplied, we will use posteriors in it as soft training targets. stage=0 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. random_copy=false online_ivector_dir= # can be used if we are including speaker information as iVectors. @@ -83,7 +83,7 @@ if [ $# != 3 ]; then echo " # very end." echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -109,7 +109,7 @@ utils/split_data.sh $data $nj mkdir -p $dir/log $dir/info cp $alidir/tree $dir -# Get list of validation utterances. +# Get list of validation utterances. awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \ > $dir/valid_uttlist || exit 1; @@ -129,7 +129,7 @@ awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlis [ -z "$transform_dir" ] && transform_dir=$alidir -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -140,7 +140,7 @@ case $feat_type in valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` # caution: the top-level nnet training script should copy these to its own dir now. cp $alidir/{splice_opts,final.mat} $dir || exit 1; @@ -280,13 +280,13 @@ if [ $stage -le 3 ]; then egs_list="$egs_list ark:$dir/egs_orig.$n.JOB.ark" done echo "$0: Generating training examples on disk" - # The examples will go round-robin to egs_list. + # The examples will go round-robin to egs_list. if [ ! -z $postdir ]; then $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ scp:$postdir/post.JOB.scp ark:- \| \ nnet-copy-egs ark:- $egs_list || exit 1; - else + else $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \ @@ -299,7 +299,7 @@ if [ $stage -le 4 ]; then # shuffle the order, writing to the egs.JOB.ark egs_list= - for n in $(seq $nj); do + for n in $(seq $nj); do egs_list="$egs_list $dir/egs_orig.JOB.$n.ark" done diff --git a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh index 4809f42e633..533001934ab 100755 --- a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh +++ b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2013 Xiaohui Zhang # 2013 Guoguo Chen # 2014 Vimal Manohar @@ -9,7 +9,7 @@ # This is a modified version of train_multisplice_accel2.sh in # steps/nnet2/ for language recognition. The main difference is -# that it uses different get_lda.sh and get_egs2.sh scripts. +# that it uses different get_lda.sh and get_egs2.sh scripts. # # The original train_multisplice_accel2.sh was a modified version of # train_pnorm_multisplice2.sh (still using pnorm). The "accel" refers to the @@ -25,11 +25,11 @@ num_epochs=15 # Number of epochs of training; initial_effective_lrate=0.01 final_effective_lrate=0.001 bias_stddev=0.5 -pnorm_input_dim=3000 +pnorm_input_dim=3000 pnorm_output_dim=300 minibatch_size=128 # by default use a smallish minibatch size for neural net # training; this controls instability which would otherwise - # be a problem with multi-threaded update. + # be a problem with multi-threaded update. samples_per_iter=400000 # each iteration of training, see this many samples # per job. This option is passed to get_egs.sh @@ -66,7 +66,7 @@ splice_indexes="layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-1:3" # so hidden layer indexing is different from component count -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't randprune=4.0 # speeds up LDA. alpha=4.0 # relates to preconditioning. update_period=4 # relates to online preconditioning: says how often we update the subspace. @@ -78,11 +78,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -92,7 +92,7 @@ transform_dir= # If supplied, overrides alidir feat_type= # Can be used to force "raw" features. align_cmd= # The cmd that is passed to steps/nnet2/align.sh align_use_gpu= # Passed to use_gpu in steps/nnet2/align.sh [yes/no] -realign_times= # List of times on which we realign. Each time is +realign_times= # List of times on which we realign. Each time is # floating point number strictly between 0 and 1, which # will be multiplied by the num-iters to get an iteration # number. @@ -127,10 +127,10 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." - echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... note, you might have to reduce --mem" + echo " # versus your defaults, because it gets multiplied by the --num-threads argument." + echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" @@ -148,7 +148,7 @@ if [ $# != 4 ]; then echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -372,7 +372,7 @@ while [ $x -lt $num_iters ]; do ilr=$initial_effective_lrate; flr=$final_effective_lrate; np=$num_archives_processed; nt=$num_archives_to_process; this_learning_rate=$(perl -e "print (($x + 1 >= $num_iters ? $flr : $ilr*exp($np*log($flr/$ilr)/$nt))*$this_num_jobs);"); - echo "On iteration $x, learning rate is $this_learning_rate." + echo "On iteration $x, learning rate is $this_learning_rate." if [ ! -z "${realign_this_iter[$x]}" ]; then prev_egs_dir=$cur_egs_dir @@ -417,7 +417,7 @@ while [ $x -lt $num_iters ]; do steps/nnet2/remove_egs.sh $prev_egs_dir fi fi - + # Set off jobs doing some diagnostics, in the background. # Use the egs dir from the previous iteration for the diagnostics $cmd $dir/log/compute_prob_valid.$x.log \ @@ -461,7 +461,7 @@ while [ $x -lt $num_iters ]; do ( # this sub-shell is so that when we "wait" below, # we only wait for the training jobs that we just spawned, # not the diagnostic jobs that we spawned above. - + # We can't easily use a single parallel SGE job to do the main training, # because the computation of which archive and which --frame option # to use for each job is a little complex, so we spawn each one separately. @@ -500,7 +500,7 @@ while [ $x -lt $num_iters ]; do n=$(perl -e '($nj,$pat)=@ARGV; $best_n=1; $best_logprob=-1.0e+10; for ($n=1;$n<=$nj;$n++) { $fn = sprintf($pat,$n); open(F, "<$fn") || die "Error opening log file $fn"; undef $logprob; while () { if (m/log-prob-per-frame=(\S+)/) { $logprob=$1; } } - close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; + close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; $best_n=$n; } } print "$best_n\n"; ' $num_jobs_nnet $dir/log/train.$x.%d.log) || exit 1; [ -z "$n" ] && echo "Error getting best model" && exit 1; cp $dir/$[$x+1].$n.mdl $dir/$[$x+1].mdl || exit 1; @@ -537,7 +537,7 @@ if [ $stage -le $num_iters ]; then cur_offset=0 # current offset from first_model_combine. for n in $(seq $max_models_combine); do next_offset=$[($n*$num_models_combine)/$max_models_combine] - sub_list="" + sub_list="" for o in $(seq $cur_offset $[$next_offset-1]); do iter=$[$first_model_combine+$o] mdl=$dir/$iter.mdl diff --git a/egs/lre07/v1/lid/train_diag_ubm.sh b/egs/lre07/v1/lid/train_diag_ubm.sh index 60f2452f3b7..a5e256818ce 100755 --- a/egs/lre07/v1/lid/train_diag_ubm.sh +++ b/egs/lre07/v1/lid/train_diag_ubm.sh @@ -29,7 +29,7 @@ cleanup=true min_gaussian_weight=0.0001 remove_low_count_gaussians=true # set this to false if you need #gauss to stay fixed. num_threads=32 -parallel_opts="-pe smp 32" +parallel_opts="--num-threads 32" # End configuration section. echo "$0 $@" # Print the command line for logging @@ -49,7 +49,7 @@ if [ $# != 3 ]; then echo " --stage # stage to do partial re-run from." echo " --num-gselect # Number of Gaussians per frame to" echo " # limit computation to, for speed" - echo " --subsample # In main E-M phase, use every n" + echo " --subsample # In main E-M phase, use every n" echo " # frames (a speedup)" echo " --num-frames # Maximum num-frames to keep in memory" echo " # for model initialization" @@ -59,7 +59,7 @@ if [ $# != 3 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" + echo " --parallel-opts # Option should match number of threads in" echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" @@ -129,10 +129,11 @@ for x in `seq 0 $[$num_iters-1]`; do $cmd $dir/log/update.$x.log \ gmm-global-est $opt --min-gaussian-weight=$min_gaussian_weight $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \ $dir/$[$x+1].dubm || exit 1; - rm $dir/$x.*.acc $dir/$x.dubm + $cleanup && rm $dir/$x.*.acc $dir/$x.dubm fi done -rm $dir/gselect.*.gz +$cleanup && rm $dir/gselect.*.gz + mv $dir/$num_iters.dubm $dir/final.dubm || exit 1; exit 0; diff --git a/egs/lre07/v1/lid/train_ivector_extractor.sh b/egs/lre07/v1/lid/train_ivector_extractor.sh index 8e238985f99..55bd54bb275 100755 --- a/egs/lre07/v1/lid/train_ivector_extractor.sh +++ b/egs/lre07/v1/lid/train_ivector_extractor.sh @@ -13,7 +13,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -24,8 +24,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -84,7 +84,7 @@ nj_full=$[$nj*$num_processes] sdata=$data/split$nj_full; utils/split_data.sh $data $nj_full || exit 1; -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 scp:$sdata/JOB/feats.scp ark:- | add-deltas-sdc ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -97,7 +97,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -135,27 +135,25 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; echo "Updating model (pass $x)" nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + $cmd --num-threads $nt $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done +$cleanup && rm $dir/post.*.gz rm $dir/final.ie 2>/dev/null ln -s $x.ie $dir/final.ie diff --git a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh index 7464ce5faea..573258e7b88 100755 --- a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh +++ b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh @@ -9,16 +9,16 @@ # This script trains the i-vector extractor using a DNN-based UBM. It also requires # an fGMM, created by the script lid/init_full_gmm_from_dnn.sh. -# Note: there are 3 separate levels of parallelization: num_threads, num_processes, -# and num_jobs. This may seem a bit excessive. It has to do with minimizing -# memory usage and disk I/O, subject to various constraints. The "num_threads" +# Note: there are 3 separate levels of parallelization: num_threads, num_processes, +# and num_jobs. This may seem a bit excessive. It has to do with minimizing +# memory usage and disk I/O, subject to various constraints. The "num_threads" # is how many threads a program uses; the "num_processes" is the number of separate # processes a single job spawns, and then sums the accumulators in memory. # Our recommendation: # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -29,8 +29,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -95,9 +95,9 @@ utils/split_data.sh $data $nj_full || exit 1; sdata_dnn=$data_dnn/split$nj_full; utils/split_data.sh $data_dnn $nj_full || exit 1; - -parallel_opts="-pe smp $[$num_threads*$num_processes]" + +parallel_opts="--num-threads $[$num_threads*$num_processes]" # Set up features. @@ -114,7 +114,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1; -fi +fi # Do Gaussian selection and posterior extracion @@ -153,24 +153,21 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; - echo "Updating model (pass $x)" - nt=$[$num_threads*$num_processes] # use the same number of threads that - # each accumulation process uses, since we - # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + echo "Updating model (pass $x)" + nt=$[$num_threads*$num_processes] # use the same number of threads that + # each accumulation process uses, since we + # can be sure the queue will support this many. + $cmd --num-threads $nt $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done diff --git a/egs/lre07/v1/run.sh b/egs/lre07/v1/run.sh index a4ff4d909ba..8664494e558 100755 --- a/egs/lre07/v1/run.sh +++ b/egs/lre07/v1/run.sh @@ -127,12 +127,12 @@ utils/subset_data_dir.sh data/train 5000 data/train_5k utils/subset_data_dir.sh data/train 10000 data/train_10k -lid/train_diag_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=20G,ram_free=20G" \ +lid/train_diag_ubm.sh --nj 30 --cmd "$train_cmd --mem 20G" \ data/train_5k 2048 exp/diag_ubm_2048 -lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=20G,ram_free=20G" \ +lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd --mem 20G" \ data/train_10k exp/diag_ubm_2048 exp/full_ubm_2048_10k -lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd --mem 35G" \ data/train exp/full_ubm_2048_10k exp/full_ubm_2048 # Alternatively, a diagonal UBM can replace the full UBM used above. @@ -148,7 +148,7 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ #gmm-global-to-fgmm exp/diag_ubm_2048/final.dubm \ # exp/full_ubm_2048/final.ubm -lid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +lid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ --use-weights true \ --num-iters 5 exp/full_ubm_2048/final.ubm data/train \ exp/extractor_2048 @@ -162,10 +162,10 @@ utils/fix_data_dir.sh data/train_lr echo "**Language count for logistic regression training (after splitting long utterances):**" awk '{print $2}' data/train_lr/utt2lang | sort | uniq -c | sort -nr -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/train_lr exp/ivectors_train -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/lre07 exp/ivectors_lre07 lid/run_logistic_regression.sh --prior-scale 0.70 \ diff --git a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh index a223e12333f..51fcf401cb2 100755 --- a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh +++ b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh @@ -19,13 +19,13 @@ set -e # assume use_gpu=true since it would be way too slow otherwise. if ! cuda-compiled; then - cat < [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo diff --git a/egs/mini_librispeech/s5/local/chain/run_tdnn.sh b/egs/mini_librispeech/s5/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..34499362831 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1a.sh \ No newline at end of file diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..642c20ec191 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,298 @@ +#!/bin/bash + +# This is a basic TDNN experiment. + +# steps/info/chain_dir_info.pl exp/chain/tdnn1a_sp +# exp/chain/tdnn1a_sp: num-iters=6 nj=2..5 num-params=7.0M dim=40+100->2309 combine=-0.072->-0.069 xent:train/valid[3,5,final]=(-2.10,-1.62,-1.48/-2.26,-1.85,-1.77) logprob:train/valid[3,5,final]=(-0.096,-0.069,-0.060/-0.124,-0.107,-0.104) + +# local/chain/compare_wer.sh --online exp/chain/tdnn1a_sp +# System tdnn1a_sp +#WER dev_clean_2 (tgsmall) 18.58 +# [online:] 18.49 +#WER dev_clean_2 (tglarge) 13.35 +# [online:] 13.47 +# Final train prob -0.0596 +# Final valid prob -0.1036 +# Final train prob (xent) -1.4843 +# Final valid prob (xent) -1.7723 + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +decode_nj=10 +train_set=train_clean_5 +test_sets=dev_clean_2 +gmm=tri3b +nnet3_affix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1a # affix for the TDNN directory name +tree_affix= +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training options +# training chunk-options +chunk_width=140,100,160 +# we don't need extra left/right context for TDNN systems. +chunk_left_context=0 +chunk_right_context=0 +common_egs_dir= +xent_regularize=0.1 + +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 11 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 12 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 13 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0) + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 14 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.00005 \ + --chain.apply-deriv-weights=false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=4 \ + --trainer.frames-per-iter=3000000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=5 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.num-chunk-per-minibatch=256,128,64 \ + --trainer.optimization.momentum=0.0 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --egs.opts="--frames-overlap-per-eg 0" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 15 ]; then + # Note: it's not important to give mkgraph.sh the lang directory with the + # matched topology (since it gets the topology file from the model). + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_tgsmall \ + $tree_dir $tree_dir/graph_tgsmall || exit 1; +fi + +if [ $stage -le 16 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l " + echo "e.g.: $0 http://www.openslr.org/resources/11 data/local/lm" + exit 1 +fi + +base_url=$1 +dst_dir=$2 + +# given a filename returns the corresponding file size in bytes +# The switch cases below can be autogenerated by entering the data directory and running: +# for f in *; do echo "\"$f\") echo \"$(du -b $f | awk '{print $1}')\";;"; done +function filesize() { + case $1 in + "3-gram.arpa.gz") echo "759636181";; + "3-gram.pruned.1e-7.arpa.gz") echo "34094057";; + "3-gram.pruned.3e-7.arpa.gz") echo "13654242";; + "librispeech-lexicon.txt") echo "5627653";; + "librispeech-vocab.txt") echo "1737588";; + *) echo "";; + esac +} + +function check_and_download () { + [[ $# -eq 1 ]] || { echo "check_and_download() expects exactly one argument!"; return 1; } + fname=$1 + echo "Downloading file '$fname' into '$dst_dir'..." + expect_size="$(filesize $fname)" + [[ ! -z "$expect_size" ]] || { echo "Unknown file size for '$fname'"; return 1; } + if [[ -s $dst_dir/$fname ]]; then + # In the following statement, the first version works on linux, and the part + # after '||' works on Linux. + f=$dst_dir/$fname + fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f) + if [[ "$fsize" -eq "$expect_size" ]]; then + echo "'$fname' already exists and appears to be complete" + return 0 + else + echo "WARNING: '$fname' exists, but the size is wrong - re-downloading ..." + fi + fi + wget --no-check-certificate -O $dst_dir/$fname $base_url/$fname || { + echo "Error while trying to download $fname!" + return 1 + } + f=$dst_dir/$fname + # In the following statement, the first version works on linux, and the part after '||' + # works on Linux. + fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f) + [[ "$fsize" -eq "$expect_size" ]] || { echo "$fname: file size mismatch!"; return 1; } + return 0 +} + +mkdir -p $dst_dir + +for f in 3-gram.arpa.gz 3-gram.pruned.1e-7.arpa.gz 3-gram.pruned.3e-7.arpa.gz \ + librispeech-vocab.txt librispeech-lexicon.txt; do + check_and_download $f || exit 1 +done + +cd $dst_dir +ln -sf 3-gram.pruned.1e-7.arpa.gz lm_tgmed.arpa.gz +ln -sf 3-gram.pruned.3e-7.arpa.gz lm_tgsmall.arpa.gz +ln -sf 3-gram.arpa.gz lm_tglarge.arpa.gz + +exit 0 diff --git a/egs/mini_librispeech/s5/local/format_lms.sh b/egs/mini_librispeech/s5/local/format_lms.sh new file mode 120000 index 00000000000..cd7ba62c0f3 --- /dev/null +++ b/egs/mini_librispeech/s5/local/format_lms.sh @@ -0,0 +1 @@ +../../../librispeech/s5/local/format_lms.sh \ No newline at end of file diff --git a/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh b/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh new file mode 100755 index 00000000000..095e85cc338 --- /dev/null +++ b/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh @@ -0,0 +1,132 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo diff --git a/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..82bb46d64a9 --- /dev/null +++ b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +set -euo pipefail + +# This script is called from local/nnet3/run_tdnn.sh and +# local/chain/run_tdnn.sh (and may eventually be called by more +# scripts). It contains the common feature preparation and +# iVector-related parts of the script. See those scripts for examples +# of usage. + +stage=0 +train_set=train_clean_5 +test_sets="dev_clean_2" +gmm=tri3b + +nnet3_affix= + +. ./cmd.sh +. ./path.sh +. utils/parse_options.sh + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + +if [ $stage -le 1 ]; then + # Although the nnet will be trained by high resolution data, we still have to + # perturb the normal data to get the alignment _sp stands for speed-perturbed + echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp + echo "$0: making MFCC features for low-resolution speed-perturbed data" + steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1; + steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1; + utils/fix_data_dir.sh data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: aligning with the perturbed low-resolution data" + steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \ + data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1 +fi + +if [ $stage -le 3 ]; then + # Create high-resolution MFCC features (with 40 cepstra instead of 13). + # this shows how you can split across multiple file-systems. + echo "$0: creating high-resolution MFCC features" + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b1{5,6,7,8}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp ${test_sets}; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1; + + for datadir in ${train_set}_sp ${test_sets}; do + steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1; + utils/fix_data_dir.sh data/${datadir}_hires || exit 1; + done +fi + +if [ $stage -le 4 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + # We'll use about a quarter of the data. + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + num_utts_total=$(wc -l $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 11 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value=0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 12 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l 2041 combine=-0.71->-0.58 loglike:train/valid[20,31,combined]=(-2.78,-0.95,-0.57/-2.94,-1.31,-0.98) accuracy:train/valid[20,31,combined]=(0.48,0.75,0.81/0.45,0.67,0.71) + +# local/nnet3/compare_wer.sh --online exp/nnet3/tdnn_lstm1a_sp exp/nnet3/tdnn_lstm1b_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp +#WER dev_clean_2 (tgsmall) 17.67 17.01 +# [online:] 18.06 17.26 +#WER dev_clean_2 (tglarge) 13.43 12.63 +# [online:] 13.73 12.94 +# Final train prob -0.3660 -0.5680 +# Final valid prob -1.0236 -0.9771 +# Final train acc 0.8737 0.8067 +# Final valid acc 0.7222 0.7144 + + + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +decode_nj=10 +train_set=train_clean_5 +test_sets=dev_clean_2 +gmm=tri3b +nnet3_affix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1b # affix for the TDNN+LSTM directory name +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training options +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 +common_egs_dir= +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.3@0.50,0' + +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 11 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value=0.99 \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 12 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab \ + ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1; +done + +# Note: the double level of quoting for the sed command +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \ + cat $dir/scoring/LMWT.$wip.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; +done + +exit 0; diff --git a/egs/mini_librispeech/s5/local/subset_dataset.sh b/egs/mini_librispeech/s5/local/subset_dataset.sh new file mode 100755 index 00000000000..050128247a4 --- /dev/null +++ b/egs/mini_librispeech/s5/local/subset_dataset.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright 2017 Luminar Technologies, Inc. (author: Daniel Galvez) +# Apache 2.0 + +# The following commands were used to generate the mini_librispeech dataset: +# +# Note that data generation is random. This could be fixed by +# providing a seed argument to the shuf program. + +if [ "$#" -ne 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 /export/a05/dgalvez/LibriSpeech/train-clean-100 \\ + /export/a05/dgalvez/LibriSpeech/train-clean-5 5" + exit 1 +fi + +src_dir=$1 +dest_dir=$2 +dest_num_hours=$3 + +src=$(basename $src_dir) +dest=$(basename $dest_dir) +librispeech_dir=$(dirname $src_dir) + +# TODO: Possibly improve this to ensure gender balance and speaker +# balance. +# TODO: Use actual time values instead of assuming that to make sure we get $dest_num_hours of data +src_num_hours=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | awk -F'|' '{ print $3 }' | \ +python -c ' +from __future__ import print_function +from sys import stdin +minutes_str = stdin.read().split() +print(int(round(sum([float(minutes) for minutes in minutes_str]) / 60.0)))') +src_num_chapters=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | \ + awk -F'|' '{ print $1 }' | sort -u | wc -l) +mkdir -p data/subset_tmp +grep "$src" $librispeech_dir/CHAPTERS.TXT | \ + awk -F'|' '{ print $1 }' | \ + shuf -n $(((dest_num_hours * src_num_chapters) / src_num_hours)) > \ + data/subset_tmp/${dest}_chapter_id_list.txt + +while read -r chapter_id || [[ -n "$chapter_id" ]]; do + chapter_dir=$(find $src_dir/ -mindepth 2 -name "$chapter_id" -type d) + speaker_id=$(basename $(dirname $chapter_dir)) + mkdir -p $dest_dir/$speaker_id/ + cp -r $chapter_dir $dest_dir/$speaker_id/ +done < data/subset_tmp/${dest}_chapter_id_list.txt diff --git a/egs/mini_librispeech/s5/path.sh b/egs/mini_librispeech/s5/path.sh new file mode 100644 index 00000000000..705600ad47a --- /dev/null +++ b/egs/mini_librispeech/s5/path.sh @@ -0,0 +1,8 @@ +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C + +# For now, don't include any of the optional dependenices of the main +# librispeech recipe diff --git a/egs/mini_librispeech/s5/run.sh b/egs/mini_librispeech/s5/run.sh new file mode 100755 index 00000000000..964485d4531 --- /dev/null +++ b/egs/mini_librispeech/s5/run.sh @@ -0,0 +1,204 @@ +#!/bin/bash + +# Note: this works only on pre-downloaded data on the CLSP servers +data=/export/a05/dgalvez/ + +data_url=www.openslr.org/resources/TODO # TODO +lm_url=www.openslr.org/resources/11 + +. ./cmd.sh +. ./path.sh + +stage=0 +. utils/parse_options.sh + +# TODO(galv): Reconsider this +set -euxo pipefail + +# TODO(galv): Modify openslr.org to contain the minified training dataset. +# for part in dev-clean-2 train-clean-5; do +# local/download_and_untar.sh $data $data_url $part +# done + +if [ $stage -le 0 ]; then + local/download_lm.sh $lm_url data/local/lm +fi + +if [ $stage -le 1 ]; then + # format the data as Kaldi data directories + for part in dev-clean-2 train-clean-5; do + # use underscore-separated names in data directories. + local/data_prep.sh $data/LibriSpeech/$part data/$(echo $part | sed s/-/_/g) + done + + local/prepare_dict.sh --stage 3 --nj 30 --cmd "$train_cmd" \ + data/local/lm data/local/lm data/local/dict_nosp + + utils/prepare_lang.sh data/local/dict_nosp \ + "" data/local/lang_tmp_nosp data/lang_nosp + + local/format_lms.sh --src-dir data/lang_nosp data/local/lm + # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs + utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz \ + data/lang_nosp data/lang_nosp_test_tglarge +fi + +if [ $stage -le 2 ]; then + mfccdir=mfcc + # spread the mfccs over various machines, as this data-set is quite large. + if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then + mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename. + utils/create_split_dir.pl /export/b{07,14,16,17}/$USER/kaldi-data/egs/librispeech/s5/$mfcc/storage \ + $mfccdir/storage + fi + + for part in dev_clean_2 train_clean_5; do + steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/$part exp/make_mfcc/$part $mfccdir + steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir + done + + # Get the shortest 500 utterances first because those are more likely + # to have accurate alignments. + utils/subset_data_dir.sh --shortest data/train_clean_5 500 data/train_500short +fi + +# train a monophone system +if [ $stage -le 3 ]; then + # TODO(galv): Is this too many jobs for a smaller dataset? + steps/train_mono.sh --boost-silence 1.25 --nj 5 --cmd "$train_cmd" \ + data/train_500short data/lang_nosp exp/mono + # TODO: Understand why we use lang_nosp here... + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/mono exp/mono/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/mono/graph_nosp_tgsmall \ + data/$test exp/mono/decode_nosp_tgsmall_$test + done + )& + + steps/align_si.sh --boost-silence 1.25 --nj 5 --cmd "$train_cmd" \ + data/train_clean_5 data/lang_nosp exp/mono exp/mono_ali_train_clean_5 +fi + +# train a first delta + delta-delta triphone system on all utterances +if [ $stage -le 4 ]; then + steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ + 2000 10000 data/train_clean_5 data/lang_nosp exp/mono_ali_train_clean_5 exp/tri1 + + # decode using the tri1 model + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri1 exp/tri1/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode.sh --nj 5 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgsmall \ + data/$test exp/tri1/decode_nosp_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri1/decode_nosp_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri1/decode_nosp_{tgsmall,tglarge}_$test + done + )& + + steps/align_si.sh --nj 5 --cmd "$train_cmd" \ + data/train_clean_5 data/lang_nosp exp/tri1 exp/tri1_ali_train_clean_5 +fi + +# train an LDA+MLLT system. +if [ $stage -le 5 ]; then + steps/train_lda_mllt.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=3 --right-context=3" 2500 15000 \ + data/train_clean_5 data/lang_nosp exp/tri1_ali_train_clean_5 exp/tri2b + + # decode using the LDA+MLLT model + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri2b exp/tri2b/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgsmall \ + data/$test exp/tri2b/decode_nosp_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri2b/decode_nosp_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri2b/decode_nosp_{tgsmall,tglarge}_$test + done + )& + + # Align utts using the tri2b model + steps/align_si.sh --nj 5 --cmd "$train_cmd" --use-graphs true \ + data/train_clean_5 data/lang_nosp exp/tri2b exp/tri2b_ali_train_clean_5 +fi + +# Train tri3b, which is LDA+MLLT+SAT +if [ $stage -le 6 ]; then + steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \ + data/train_clean_5 data/lang_nosp exp/tri2b_ali_train_clean_5 exp/tri3b + + # decode using the tri3b model + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri3b exp/tri3b/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ + exp/tri3b/graph_nosp_tgsmall data/$test \ + exp/tri3b/decode_nosp_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri3b/decode_nosp_{tgsmall,tglarge}_$test + done + )& +fi + +# Now we compute the pronunciation and silence probabilities from training data, +# and re-create the lang directory. +if [ $stage -le 7 ]; then + steps/get_prons.sh --cmd "$train_cmd" \ + data/train_clean_5 data/lang_nosp exp/tri3b + utils/dict_dir_add_pronprobs.sh --max-normalize true \ + data/local/dict_nosp \ + exp/tri3b/pron_counts_nowb.txt exp/tri3b/sil_counts_nowb.txt \ + exp/tri3b/pron_bigram_counts_nowb.txt data/local/dict + + utils/prepare_lang.sh data/local/dict \ + "" data/local/lang_tmp data/lang + + local/format_lms.sh --src-dir data/lang data/local/lm + + utils/build_const_arpa_lm.sh \ + data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge + + steps/align_fmllr.sh --nj 5 --cmd "$train_cmd" \ + data/train_clean_5 data/lang exp/tri3b exp/tri3b_ali_train_clean_5 +fi + + +if [ $stage -le 8 ]; then + # Test the tri3b system with the silprobs and pron-probs. + + # decode using the tri3b model + utils/mkgraph.sh data/lang_test_tgsmall \ + exp/tri3b exp/tri3b/graph_tgsmall + for test in dev_clean_2; do + steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ + exp/tri3b/graph_tgsmall data/$test \ + exp/tri3b/decode_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test exp/tri3b/decode_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test exp/tri3b/decode_{tgsmall,tglarge}_$test + done +fi + + +# Train a chain model +if [ $stage -le 9 ]; then + local/chain/run_tdnn.sh --stage 0 +fi + +# Don't finish until all background decoding jobs are finished. +wait diff --git a/egs/mini_librispeech/s5/steps b/egs/mini_librispeech/s5/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/mini_librispeech/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/mini_librispeech/s5/utils b/egs/mini_librispeech/s5/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/mini_librispeech/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file diff --git a/egs/multi_en/s5/local/rt03_data_prep.sh b/egs/multi_en/s5/local/rt03_data_prep.sh index 84955f0ed50..aa1e2ba4cc2 100755 --- a/egs/multi_en/s5/local/rt03_data_prep.sh +++ b/egs/multi_en/s5/local/rt03_data_prep.sh @@ -8,7 +8,7 @@ # - Modified paths to match multi_en naming conventions ########################################################################################### -# RT-03 data preparation (conversational telephone speech part only) +# RT-03 data preparation (conversational telephone speech part only) # Adapted from Arnab Ghoshal's script for Hub-5 Eval 2000 by Peng Qi # To be run from one directory above this script. @@ -16,7 +16,8 @@ # Expects the standard directory layout for RT-03 if [ $# -ne 1 ]; then - echo "Usage: "`basename $0`" " + echo "Usage: $0 " + echo "e.g.: $0 /export/corpora/LDC/LDC2007S10" echo "See comments in the script for more details" exit 1 fi @@ -45,7 +46,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -55,7 +56,7 @@ awk -v sph2pipe=$sph2pipe '{ # sw02001-A_000098-001156 sw02001-A 0.98 11.56 #pem=$sdir/english/hub5e_00.pem #[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 #grep -v ';;' $pem \ @@ -67,7 +68,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \ | sort -u > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. #grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -85,7 +86,7 @@ cat $tdir/*.stm | \ grep -v inter_segment_gap | \ awk '{ printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\ - > $dir/stm + > $dir/stm #$tdir/reference/hub5e00.english.000405.stm > $dir/stm cp $rtroot/data/trans_rules/en20030506.glm $dir/glm @@ -95,10 +96,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -118,4 +119,3 @@ done echo Data preparation and formatting completed for RT-03 echo "(but not MFCC extraction)" - diff --git a/egs/rm/s5/local/chain/run_tdnn_5g.sh b/egs/rm/s5/local/chain/run_tdnn_5g.sh index f6fbe070763..088cb3ec778 100755 --- a/egs/rm/s5/local/chain/run_tdnn_5g.sh +++ b/egs/rm/s5/local/chain/run_tdnn_5g.sh @@ -120,7 +120,7 @@ if [ $stage -le 8 ]; then --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ --trainer.optimization.final-effective-lrate $final_effective_lrate \ --trainer.max-param-change $max_param_change \ - --cleanup.remove-egs true \ + --cleanup.remove-egs $remove_egs \ --feat-dir data/train \ --tree-dir $treedir \ --lat-dir exp/tri3b_lats \ diff --git a/egs/rm/s5/local/chain/run_tdnn_5n.sh b/egs/rm/s5/local/chain/run_tdnn_5n.sh index 7fd7b82aa1d..7a08becd57f 100755 --- a/egs/rm/s5/local/chain/run_tdnn_5n.sh +++ b/egs/rm/s5/local/chain/run_tdnn_5n.sh @@ -25,7 +25,8 @@ num_jobs_final=4 minibatch_size=128 frames_per_eg=150 remove_egs=false - +#common_egs_dir=exp/chain/tdnn_5g/egs/ +common_egs_dir= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -121,7 +122,7 @@ if [ $stage -le 8 ]; then --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ --trainer.optimization.final-effective-lrate $final_effective_lrate \ --trainer.max-param-change $max_param_change \ - --cleanup.remove-egs true \ + --cleanup.remove-egs $remove_egs \ --feat-dir data/train \ --tree-dir $treedir \ --lat-dir exp/tri3b_lats \ diff --git a/egs/rm/s5/local/nnet2/run_4b_gpu.sh b/egs/rm/s5/local/nnet2/run_4b_gpu.sh index 34a5cd34f7e..9cde9f1694e 100755 --- a/egs/rm/s5/local/nnet2/run_4b_gpu.sh +++ b/egs/rm/s5/local/nnet2/run_4b_gpu.sh @@ -16,7 +16,7 @@ If you want to use GPUs (and have them), go to src/, and configure and make on a where "nvcc" is installed. EOF -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. . utils/parse_options.sh # to parse the --stage option, if given diff --git a/egs/rm/s5/local/nnet2/run_4c.sh b/egs/rm/s5/local/nnet2/run_4c.sh index 2b580fe29d6..b3060c46ca0 100755 --- a/egs/rm/s5/local/nnet2/run_4c.sh +++ b/egs/rm/s5/local/nnet2/run_4c.sh @@ -14,20 +14,20 @@ use_gpu=true if $use_gpu; then if ! cuda-compiled; then - cat < \n" . + " e.g.: $0 /export/corpora5/LDC/LDC2007S11 data\n"; + exit(1); +} + +my ($db_base, $out_dir) = @ARGV; +$out_dir = "$out_dir/rt04_dev"; + +if (system("mkdir -p $out_dir")) { + die "Error making directory $out_dir"; +} + +open(SPKR, ">", "$out_dir/utt2spk") + or die "Could not open the output file $out_dir/utt2spk"; +open(WAV, ">", "$out_dir/wav.scp") + or die "Could not open the output file $out_dir/wav.scp"; +open(RECO2FILE_AND_CHANNEL, ">", "$out_dir/reco2file_and_channel") + or die "Could not open the output file $out_dir/reco2file_and_channel"; + +open(LIST, 'find ' . $db_base . '/data/audio/dev04s -name "*.sph" |'); + + +my $sox =`which sox` || die "Could not find sox in PATH"; +chomp($sox); + +while (my $line = ) { + chomp($line); + my ($file_id, $path, $suffix) = fileparse($line, qr/\.[^.]*/); + if ($suffix =~ /.sph/) { + #print WAV $file_id . " $sox $line -c 1 -b 16 -t wav - |\n"; + print WAV $file_id . " sph2pipe -f wav $line |\n"; + } elsif ($suffix =~ /.wav/) { + print WAV $file_id . " $line |\n"; + } else { + die "$0: Unknown suffix $suffix in $line\n" + } + + print SPKR "$file_id $file_id\n"; + print RECO2FILE_AND_CHANNEL "$file_id $file_id 1\n"; +} + +close(LIST) || die; +close(WAV) || die; +close(SPKR) || die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} + +system("utils/fix_data_dir.sh $out_dir"); + +if (system( + "utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/rt/s5/local/make_rt_2004_eval.pl b/egs/rt/s5/local/make_rt_2004_eval.pl new file mode 100755 index 00000000000..4c1286ea1cc --- /dev/null +++ b/egs/rt/s5/local/make_rt_2004_eval.pl @@ -0,0 +1,64 @@ +#!/usr/bin/perl -w +# Copyright 2015 Vimal Manohar +# Apache 2.0. + +use strict; +use File::Basename; + +if (@ARGV != 2) { + print STDERR "Usage: $0 \n" . + " e.g.: $0 /export/corpora5/LDC/LDC2007S12/package/rt04_eval data\n"; + exit(1); +} + +my ($db_base, $out_dir) = @ARGV; +$out_dir = "$out_dir/rt04_eval"; + +if (system("mkdir -p $out_dir")) { + die "Error making directory $out_dir"; +} + +open(SPKR, ">", "$out_dir/utt2spk") + or die "Could not open the output file $out_dir/utt2spk"; +open(WAV, ">", "$out_dir/wav.scp") + or die "Could not open the output file $out_dir/wav.scp"; +open(RECO2FILE_AND_CHANNEL, ">", "$out_dir/reco2file_and_channel") + or die "Could not open the output file $out_dir/reco2file_and_channel"; + +open(LIST, 'find ' . $db_base . '/data/audio/eval04s -name "*.sph" |'); + +my $sox =`which sox` || die "Could not find sox in PATH"; +chomp($sox); + +while (my $line = ) { + chomp($line); + my ($file_id, $path, $suffix) = fileparse($line, qr/\.[^.]*/); + if ($suffix =~ /.sph/) { + #print WAV $file_id . " $sox $line -c 1 -b 16 -t wav - |\n"; + print WAV $file_id . " sph2pipe -f wav $line |\n"; + } elsif ($suffix =~ /.wav/) { + print WAV $file_id . " $line |\n"; + } else { + die "$0: Unknown suffix $suffix in $line\n" + } + + print SPKR "$file_id $file_id\n"; + print RECO2FILE_AND_CHANNEL "$file_id $file_id 1\n"; +} + +close(LIST) || die; +close(WAV) || die; +close(SPKR) || die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} + +system("utils/fix_data_dir.sh $out_dir"); + +if (system( + "utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} + diff --git a/egs/rt/s5/local/make_rt_2005_eval.pl b/egs/rt/s5/local/make_rt_2005_eval.pl new file mode 100755 index 00000000000..d48dcaae926 --- /dev/null +++ b/egs/rt/s5/local/make_rt_2005_eval.pl @@ -0,0 +1,64 @@ +#!/usr/bin/perl -w +# Copyright 2015 Vimal Manohar +# Apache 2.0. + +use strict; +use File::Basename; + +if (@ARGV != 2) { + print STDERR "Usage: $0 \n" . + " e.g.: $0 /export/corpora5/LDC/LDC2011S06 data\n"; + exit(1); +} + +my ($db_base, $out_dir) = @ARGV; +$out_dir = "$out_dir/rt05_eval"; + +if (system("mkdir -p $out_dir")) { + die "Error making directory $out_dir"; +} + +open(SPKR, ">", "$out_dir/utt2spk") + or die "Could not open the output file $out_dir/utt2spk"; +open(WAV, ">", "$out_dir/wav.scp") + or die "Could not open the output file $out_dir/wav.scp"; +open(RECO2FILE_AND_CHANNEL, ">", "$out_dir/reco2file_and_channel") + or die "Could not open the output file $out_dir/reco2file_and_channel"; + +open(LIST, 'find ' . $db_base . '/data/audio/eval05s -name "*.sph" |'); + +my $sox =`which sox` || die "Could not find sox in PATH"; +chomp($sox); + +while (my $line = ) { + chomp($line); + my ($file_id, $path, $suffix) = fileparse($line, qr/\.[^.]*/); + if ($suffix =~ /.sph/) { + print WAV $file_id . " $sox $line -c 1 -b 16 -t wav - |\n"; + } elsif ($suffix =~ /.wav/) { + print WAV $file_id . " $line |\n"; + } else { + die "$0: Unknown suffix $suffix in $line\n" + } + + print SPKR "$file_id $file_id\n"; + print RECO2FILE_AND_CHANNEL "$file_id $file_id 1\n"; +} + +close(LIST) || die; +close(WAV) || die; +close(SPKR) || die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} + +system("utils/fix_data_dir.sh $out_dir"); + +if (system( + "utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} + + diff --git a/egs/rt/s5/local/run_prepare_rt.sh b/egs/rt/s5/local/run_prepare_rt.sh new file mode 100755 index 00000000000..c431f760dab --- /dev/null +++ b/egs/rt/s5/local/run_prepare_rt.sh @@ -0,0 +1,87 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -o pipefail +set -u + +. path.sh +. cmd.sh + +mic=sdm +task=sad + +. parse_options.sh + +RT04_DEV_ROOT=/export/corpora5/LDC/LDC2007S11 +RT04_EVAL_ROOT=/export/corpora5/LDC/LDC2007S12/package/rt04_eval +RT05_EVAL_ROOT=/export/corpora5/LDC/LDC2011S06 + +if [ ! -f data/rt04_dev/.done ]; then + local/make_rt_2004_dev.pl $RT04_DEV_ROOT data + touch data/rt04_dev/.done +fi + +if [ ! -f data/rt04_eval/.done ]; then + local/make_rt_2004_eval.pl $RT04_EVAL_ROOT data + touch data/rt04_eval/.done +fi + +if [ ! -f data/rt05_eval/.done ]; then + local/make_rt_2005_eval.pl $RT05_EVAL_ROOT data + touch data/rt05_eval/.done +fi + +mkdir -p data/local + +dir=data/local/rt05_eval/$mic/$task +mkdir -p $dir + +if [ $task == "stt" ]; then + cp $RT05_EVAL_ROOT/data/reference/concatenated/rt05s.confmtg.050614.${task}.${mic}.stm $dir/stm +else + cp $RT05_EVAL_ROOT/data/reference/concatenated/rt05s.confmtg.050614.${task}.${mic}.rttm $dir/rttm +fi + +cp $RT05_EVAL_ROOT/data/indicies/expt_05s_${task}ul_eval05s_eng_confmtg_${mic}_1.uem $dir/uem +cat $dir/uem | awk '!/;;/{if (NF > 0) print $1}' | perl -pe 's/(.*)\.sph/$1/g' | sort -u > $dir/list +utils/subset_data_dir.sh --utt-list $dir/list data/rt05_eval data/rt05_eval_${mic}_${task} +[ -f $dir/stm ] && cp $dir/stm data/rt05_eval_${mic}_${task} +[ -f $dir/uem ] && cp $dir/uem data/rt05_eval_${mic}_${task} +[ -f $dir/rttm ] && cp $dir/rttm data/rt05_eval_${mic}_${task} + +dir=data/local/rt04_dev/$mic/$task +mkdir -p $dir + +if [ $task == "stt" ]; then + cp $RT04_DEV_ROOT/data/reference/dev04s/concatenated/dev04s.040809.${mic}.stm $dir/stm +elif [ $task == "spkr" ]; then + cp $RT04_DEV_ROOT/data/reference/dev04s/concatenated/dev04s.040809.${mic}.rttm $dir/rttm +else + cat $RT04_DEV_ROOT/data/reference/dev04s/concatenated/dev04s.040809.${mic}.rttm | spkr2sad.pl | rttmSmooth.pl -s 0 > $dir/rttm +fi +cp $RT04_DEV_ROOT/data/indices/dev04s/dev04s.${mic}.uem $dir/uem +cat $dir/uem | awk '!/;;/{if (NF > 0) print $1}' | perl -pe 's/(.*)\.sph/$1/g' | sort -u > $dir/list +utils/subset_data_dir.sh --utt-list $dir/list data/rt04_dev data/rt04_dev_${mic}_${task} +[ -f $dir/stm ] && cp $dir/stm data/rt04_dev_${mic}_${task} +[ -f $dir/uem ] && cp $dir/uem data/rt04_dev_${mic}_${task} +[ -f $dir/rttm ] && cp $dir/rttm data/rt04_dev_${mic}_${task} + +dir=data/local/rt04_eval/$mic/$task +mkdir -p $dir + +if [ $task == "stt" ]; then + cp $RT04_EVAL_ROOT/data/reference/eval04s/concatenated/eval04s.040511.${mic}.stm $dir/stm +elif [ $task == "spkr" ]; then + cp $RT04_EVAL_ROOT/data/reference/eval04s/concatenated/eval04s.040511.${mic}.rttm $dir/rttm +else + cat $RT04_EVAL_ROOT/data/reference/eval04s/concatenated/eval04s.040511.${mic}.rttm | spkr2sad.pl | rttmSmooth.pl -s 0 > $dir/rttm +fi +cp $RT04_EVAL_ROOT/data/indices/eval04s/eval04s.${mic}.uem $dir/uem +cat $dir/uem | awk '!/;;/{if (NF > 0) print $1}' | perl -pe 's/(.*)\.sph/$1/g' | sort -u > $dir/list +utils/subset_data_dir.sh --utt-list $dir/list data/rt04_eval data/rt04_eval_${mic}_${task} +[ -f $dir/stm ] && cp $dir/stm data/rt04_eval_${mic}_${task} +[ -f $dir/uem ] && cp $dir/uem data/rt04_eval_${mic}_${task} +[ -f $dir/rttm ] && cp $dir/rttm data/rt04_eval_${mic}_${task} diff --git a/egs/rt/s5/local/score.sh b/egs/rt/s5/local/score.sh new file mode 100755 index 00000000000..1c3e2cbe8c4 --- /dev/null +++ b/egs/rt/s5/local/score.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Copyright Johns Hopkins University (Author: Daniel Povey) 2012 +# Copyright University of Edinburgh (Author: Pawel Swietojanski) 2014 +# Apache 2.0 + +orig_args= +for x in "$@"; do orig_args="$orig_args '$x'"; done + +# begin configuration section. we include all the options that score_sclite.sh or +# score_basic.sh might need, or parse_options.sh will die. +cmd=run.pl +stage=0 +min_lmwt=9 # unused, +max_lmwt=15 # unused, +asclite=true +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score.sh [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + echo " --asclite (true/false) # score with ascltie instead of sclite (overlapped speech)" + exit 1; +fi + +data=$1 + +mic=$(echo $data | awk -F '/' '{print $2}') +case $mic in + ihm*) + echo "Using sclite for IHM (close talk)," + eval local/score_asclite.sh --asclite false $orig_args + ;; + sdm*) + echo "Using asclite for overlapped speech SDM (single distant mic)," + eval local/score_asclite.sh --asclite $asclite $orig_args + ;; + mdm*) + echo "Using asclite for overlapped speech MDM (multiple distant mics)," + eval local/score_asclite.sh --asclite $asclite $orig_args + ;; + *) + echo "local/score.sh: no ihm/sdm/mdm directories found. AMI recipe assumes data/{ihm,sdm,mdm}/..." + exit 1; + ;; +esac diff --git a/egs/rt/s5/local/score_asclite.sh b/egs/rt/s5/local/score_asclite.sh new file mode 100755 index 00000000000..86b801b975d --- /dev/null +++ b/egs/rt/s5/local/score_asclite.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. +# 2014, University of Edinburgh, (Author: Pawel Swietojanski) + +# begin configuration section. +cmd=run.pl +stage=0 +min_lmwt=9 +max_lmwt=15 +reverse=false +asclite=true +overlap_spk=4 +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score_asclite.sh [--cmd (run.pl|queue.pl...)] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + echo " --reverse (true/false) # score with time reversed features " + exit 1; +fi + +data=$1 +lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. +dir=$3 + +model=$dir/../final.mdl # assume model one level up from decoding dir. + +hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl +[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; +hubdir=`dirname $hubscr` + +for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ + $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + +name=`basename $data`; # e.g. eval2000 + +mkdir -p $dir/ascoring/log + +if [ $stage -le 0 ]; then + if $reverse; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/get_ctm.LMWT.log \ + mkdir -p $dir/ascore_LMWT/ '&&' \ + lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-reverse ark:- ark:- \| \ + lattice-align-words --reorder=false $lang/phones/word_boundary.int $model ark:- ark:- \| \ + nbest-to-ctm ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/ascore_LMWT/$name.ctm || exit 1; + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/get_ctm.LMWT.log \ + mkdir -p $dir/ascore_LMWT/ '&&' \ + lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + nbest-to-ctm ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/ascore_LMWT/$name.ctm || exit 1; + fi +fi + +if [ $stage -le 1 ]; then +# Remove some stuff we don't want to score, from the ctm. + for x in $dir/ascore_*/$name.ctm; do + cp $x $dir/tmpf; + cat $dir/tmpf | grep -i -v -E '\[noise|laughter|vocalized-noise\]' | \ + grep -i -v -E '' > $x; +# grep -i -v -E '|%HESITATION' > $x; + done +fi + +if [ $stage -le 2 ]; then + if [ "$asclite" == "true" ]; then + oname=$name + [ ! -z $overlap_spk ] && oname=${name}_o$overlap_spk + echo "asclite is starting" + # Run scoring, meaning of hubscr.pl options: + # -G .. produce alignment graphs, + # -v .. verbose, + # -m .. max-memory in GBs, + # -o .. max N of overlapping speakers, + # -a .. use asclite, + # -C .. compression for asclite, + # -B .. blocksize for asclite (kBs?), + # -p .. path for other components, + # -V .. skip validation of input transcripts, + # -h rt-stt .. removes non-lexical items from CTM, + $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/score.LMWT.log \ + cp $data/stm $dir/ascore_LMWT/ '&&' \ + cp $dir/ascore_LMWT/${name}.ctm $dir/ascore_LMWT/${oname}.ctm '&&' \ + $hubscr -G -v -m 1:2 -o$overlap_spk -a -C -B 8192 -p $hubdir -V -l english \ + -h rt-stt -g $data/glm -r $dir/ascore_LMWT/stm $dir/ascore_LMWT/${oname}.ctm || exit 1 + # Compress some scoring outputs : alignment info and graphs, + echo -n "compressing asclite outputs " + for LMWT in $(seq $min_lmwt $max_lmwt); do + ascore=$dir/ascore_${LMWT} + gzip -f $ascore/${oname}.ctm.filt.aligninfo.csv + cp $ascore/${oname}.ctm.filt.alignments/index.html $ascore/${oname}.ctm.filt.overlap.html + tar -C $ascore -czf $ascore/${oname}.ctm.filt.alignments.tar.gz ${oname}.ctm.filt.alignments + rm -r $ascore/${oname}.ctm.filt.alignments + echo -n "LMWT:$LMWT " + done + echo done + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/score.LMWT.log \ + cp $data/stm $dir/ascore_LMWT/ '&&' \ + $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/ascore_LMWT/stm $dir/ascore_LMWT/${name}.ctm || exit 1 + fi +fi + +exit 0 diff --git a/egs/rt/s5/local/snr b/egs/rt/s5/local/snr new file mode 120000 index 00000000000..6d422e11960 --- /dev/null +++ b/egs/rt/s5/local/snr @@ -0,0 +1 @@ +../../../wsj_noisy/s5/local/snr \ No newline at end of file diff --git a/egs/rt/s5/path.sh b/egs/rt/s5/path.sh new file mode 100755 index 00000000000..8461d980758 --- /dev/null +++ b/egs/rt/s5/path.sh @@ -0,0 +1,5 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/nnet3bin/:$KALDI_ROOT/src/segmenterbin/:$PWD:$PATH:$KALDI_ROOT/tools/sctk/bin +export PATH=$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH +export LC_ALL=C diff --git a/egs/rt/s5/sid b/egs/rt/s5/sid new file mode 120000 index 00000000000..5cb0274b7d6 --- /dev/null +++ b/egs/rt/s5/sid @@ -0,0 +1 @@ +../../sre08/v1/sid/ \ No newline at end of file diff --git a/egs/rt/s5/steps b/egs/rt/s5/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/rt/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/rt/s5/utils b/egs/rt/s5/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/rt/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file diff --git a/egs/sprakbanken/s5/RESULTS b/egs/sprakbanken/s5/RESULTS index 628507ac85d..d64f006f806 100644 --- a/egs/sprakbanken/s5/RESULTS +++ b/egs/sprakbanken/s5/RESULTS @@ -1,25 +1,28 @@ -%WER 49.19 [ 5318 / 10811, 481 ins, 1511 del, 3326 sub ] exp/mono0a/decode_3g_test1k/wer_9 -%WER 47.28 [ 5111 / 10811, 443 ins, 1489 del, 3179 sub ] exp/mono0a/decode_b3g_test1k/wer_10 -%WER 16.19 [ 1750 / 10811, 397 ins, 323 del, 1030 sub ] exp/sgmm2_5a/decode_3g_test1k/wer_9 -%WER 15.10 [ 1632 / 10811, 404 ins, 305 del, 923 sub ] exp/sgmm2_5b/decode_3g_test1k/wer_9 -%WER 14.94 [ 1615 / 10811, 390 ins, 310 del, 915 sub ] exp/sgmm2_5b/decode_4g_test1k/wer_9 -%WER 14.36 [ 1553 / 10811, 376 ins, 264 del, 913 sub ] exp/sgmm2_5c/decode_3g_test1k/wer_9 -%WER 14.18 [ 1533 / 10811, 367 ins, 266 del, 900 sub ] exp/sgmm2_5c/decode_4g_test1k/wer_9 -%WER 25.61 [ 2769 / 10811, 511 ins, 539 del, 1719 sub ] exp/tri1/decode_3g_test1k/wer_10 -%WER 25.12 [ 2716 / 10811, 444 ins, 571 del, 1701 sub ] exp/tri1/decode_b3g_test1k/wer_11 -%WER 23.81 [ 2574 / 10811, 426 ins, 564 del, 1584 sub ] exp/tri2a/decode_3g_test1k/wer_12 -%WER 23.22 [ 2510 / 10811, 457 ins, 517 del, 1536 sub ] exp/tri2a/decode_3g_test1k_fromlats/wer_11 -%WER 22.18 [ 2398 / 10811, 436 ins, 495 del, 1467 sub ] exp/tri2b/decode_3g_test1k/wer_11 -%WER 21.87 [ 2364 / 10811, 380 ins, 553 del, 1431 sub ] exp/tri2b/decode_3g_test1k_mbr/wer_13 -%WER 18.98 [ 2052 / 10811, 451 ins, 372 del, 1229 sub ] exp/tri3b_20k/decode_3g_test1k/wer_11 -%WER 22.62 [ 2445 / 10811, 468 ins, 460 del, 1517 sub ] exp/tri3b_20k/decode_3g_test1k.si/wer_10 -%WER 19.31 [ 2088 / 10811, 440 ins, 388 del, 1260 sub ] exp/tri3b/decode_3g_test1k/wer_11 -%WER 23.19 [ 2507 / 10811, 435 ins, 520 del, 1552 sub ] exp/tri3b/decode_3g_test1k.si/wer_12 -%WER 19.06 [ 2061 / 10811, 427 ins, 384 del, 1250 sub ] exp/tri3b/decode_4g_test1k/wer_11 -%WER 23.20 [ 2508 / 10811, 447 ins, 520 del, 1541 sub ] exp/tri3b/decode_4g_test1k.si/wer_11 -%WER 17.42 [ 1883 / 10811, 416 ins, 359 del, 1108 sub ] exp/tri4a/decode_3g_test1k/wer_13 -%WER 20.86 [ 2255 / 10811, 403 ins, 473 del, 1379 sub ] exp/tri4a/decode_3g_test1k.si/wer_13 -%WER 17.52 [ 1894 / 10811, 396 ins, 372 del, 1126 sub ] exp/tri4b/decode_3g_test1k/wer_13 -%WER 20.82 [ 2251 / 10811, 399 ins, 471 del, 1381 sub ] exp/tri4b/decode_3g_test1k.si/wer_13 -%WER 17.53 [ 1895 / 10811, 403 ins, 375 del, 1117 sub ] exp/tri4b/decode_4g_test1k/wer_13 -%WER 20.99 [ 2269 / 10811, 438 ins, 436 del, 1395 sub ] exp/tri4b/decode_4g_test1k.si/wer_11 +GMM-based systems +%WER 22.87 [ 24286 / 106172, 3577 ins, 5321 del, 15388 sub ] exp/tri1/decode_fg_dev/wer_12_0.5 +%WER 23.13 [ 24561 / 106172, 3602 ins, 5411 del, 15548 sub ] exp/tri1/decode_tg_dev/wer_12_0.5 +%WER 21.24 [ 22548 / 106172, 4028 ins, 4246 del, 14274 sub ] exp/tri2a/decode_tg_dev/wer_13_0.0 +%WER 19.46 [ 20664 / 106172, 3276 ins, 4332 del, 13056 sub ] exp/tri2b/decode_tg_dev/wer_15_0.5 +%WER 16.80 [ 17839 / 106172, 3238 ins, 3403 del, 11198 sub ] exp/tri3b/decode_fg_dev/wer_17_0.0 +%WER 19.45 [ 20651 / 106172, 3880 ins, 3671 del, 13100 sub ] exp/tri3b/decode_fg_dev.si/wer_15_0.0 +%WER 14.24 [ 9849 / 69165, 2046 ins, 1365 del, 6438 sub ] exp/tri3b/decode_fg_test/wer_16_0.5 +%WER 17.31 [ 11972 / 69165, 2330 ins, 1695 del, 7947 sub ] exp/tri3b/decode_fg_test.si/wer_15_0.5 +%WER 16.94 [ 17984 / 106172, 3361 ins, 3377 del, 11246 sub ] exp/tri3b/decode_tg_dev/wer_16_0.0 +%WER 19.52 [ 20720 / 106172, 3654 ins, 3846 del, 13220 sub ] exp/tri3b/decode_tg_dev.si/wer_17_0.0 +%WER 14.40 [ 9957 / 69165, 2291 ins, 1184 del, 6482 sub ] exp/tri3b/decode_tg_test/wer_16_0.0 +%WER 17.41 [ 12044 / 69165, 2291 ins, 1736 del, 8017 sub ] exp/tri3b/decode_tg_test.si/wer_15_0.5 +nnet3 xent systems +%WER 11.57 [ 12279 / 106172, 2640 ins, 2442 del, 7197 sub ] exp/nnet3/tdnn0_sp/decode_dev/wer_10_0.0 +%WER 9.89 [ 6841 / 69165, 1542 ins, 917 del, 4382 sub ] exp/nnet3/tdnn0_sp/decode_test/wer_11_0.5 +%WER 10.45 [ 11098 / 106172, 2199 ins, 2272 del, 6627 sub ] exp/nnet3/lstm_0_ld5_sp/decode_dev/wer_9_0.0 +%WER 12.34 [ 8533 / 69165, 1740 ins, 1393 del, 5400 sub ] exp/nnet3/lstm_0_ld5_sp/decode_test/wer_11_1.0 +%WER 10.59 [ 11241 / 106172, 2208 ins, 2304 del, 6729 sub ] exp/nnet3/lstm_bidirectional_ld5_sp/decode_dev/wer_9_0.0 +%WER 12.43 [ 8596 / 69165, 1742 ins, 1426 del, 5428 sub ] exp/nnet3/lstm_bidirectional_ld5_sp/decode_test/wer_11_1.0 +%WER 9.18 [ 9747 / 106172, 1987 ins, 1913 del, 5847 sub ] exp/nnet3/lstm_bidirectional_sp/decode_dev/wer_8_0.0 +Nnet3 chain systems +%WER 8.48 [ 9001 / 106172, 1559 ins, 1624 del, 5818 sub ] exp/chain/tdnn_lstm1a_sp_bi/decode_dev/wer_9_0.0 +%WER 7.20 [ 4981 / 69165, 915 ins, 402 del, 3664 sub ] exp/chain/tdnn_lstm1a_sp_bi/decode_test/wer_8_1.0 +%WER 10.00 [ 10619 / 106172, 1980 ins, 1896 del, 6743 sub ] exp/chain/tdnn_sp_bi/decode_dev/wer_9_0.0 +%WER 8.58 [ 5936 / 69165, 1059 ins, 667 del, 4210 sub ] exp/chain/tdnn_sp_bi/decode_test/wer_9_1.0 +%WER 9.39 [ 9969 / 106172, 1624 ins, 1912 del, 6433 sub ] exp/chain/lstm1e_sp_bi/decode_dev/wer_8_0.5 +%WER 7.72 [ 5341 / 69165, 1002 ins, 497 del, 3842 sub ] exp/chain/lstm1e_sp_bi/decode_test/wer_8_0.5 diff --git a/egs/sprakbanken/s5/conf/mfcc_hires.conf b/egs/sprakbanken/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..b5aeaafe704 --- /dev/null +++ b/egs/sprakbanken/s5/conf/mfcc_hires.conf @@ -0,0 +1,11 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. + # Needs to be this low to be sensitive to creaky voice +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/sprakbanken/s5/conf/online_cmvn.conf b/egs/sprakbanken/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/sprakbanken/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/sprakbanken/s5/local/chain/compare_wer_general.sh b/egs/sprakbanken/s5/local/chain/compare_wer_general.sh new file mode 100755 index 00000000000..4074b0c12c3 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/compare_wer_general.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Prints a table makes it easy to compare WER and objective values across nnet3 +# and chain training runs + +echo -n "System " +for x in "$@"; do printf "% 10s" $x; done +echo + +echo -n "WER on dev(tg) " +for x in "$@"; do + wer=$(grep WER ${x}/decode_dev/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "WER on test(tg) " +for x in "$@"; do + wer=$(grep WER ${x}/decode_test/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "Final train prob " +for x in "$@"; do + prob=$(grep Overall ${x}/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "Final valid prob " +for x in "$@"; do + prob=$(grep Overall ${x}/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "Final train prob (xent) " +for x in "$@"; do + prob=$(grep Overall ${x}/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "Final valid prob (xent) " +for x in "$@"; do + prob=$(grep Overall ${x}/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo diff --git a/egs/sprakbanken/s5/local/chain/run_lstm.sh b/egs/sprakbanken/s5/local/chain/run_lstm.sh new file mode 120000 index 00000000000..afba2a1ce94 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/run_lstm.sh @@ -0,0 +1 @@ +tuning/run_lstm_1e.sh \ No newline at end of file diff --git a/egs/sprakbanken/s5/local/chain/run_tdnn.sh b/egs/sprakbanken/s5/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..61f8f499182 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1b.sh \ No newline at end of file diff --git a/egs/sprakbanken/s5/local/chain/run_tdnn_lstm.sh b/egs/sprakbanken/s5/local/chain/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh new file mode 100755 index 00000000000..3ea61800869 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh @@ -0,0 +1,260 @@ +#!/bin/bash + +# run_lstm_1a.sh is a first attempt at an LSTM system, based on xconfigs-- it's +# probably not very well configured, e.g. the num-params might be too small. +# recurrent-projection-dim is less than non-recurrent-projection-dim due to an +# oversight. + +# comparison with TDNN system (WER is worse): +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1b_sp_bi exp/chain_cleaned/lstm1a_sp_bi +# System tdnn1b_sp_bi lstm1a_sp_bi +# WER on dev(orig) 10.2 10.8 +# WER on dev(rescored) 9.6 10.2 +# WER on test(orig) 9.7 10.0 +# WER on test(rescored) 9.2 9.6 +# Final train prob -0.0928 -0.0848 +# Final valid prob -0.1178 -0.1098 +# Final train prob (xent) -1.4666 -1.1692 +# Final valid prob (xent) -1.5473 -1.2520 + + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_lstm.sh + +# without cleanup: +# local/chain/run_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script (run_lstm_1a) is like run_tdnn_1b.sh except modified to use an LSTM +# configuration (some aspects borrowed from egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh). + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1a #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh new file mode 100755 index 00000000000..a22d4eb53d7 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh @@ -0,0 +1,261 @@ +#!/bin/bash + +# run_lstm_1b.sh is as run_lstm_1a.sh but replacing the projected LSTM +# with a regular LSTM. This is done in order to have an LSTM-only baseline +# for the 'fast lstm', where we need to test the regular as well as projected +# LSTM layers. + +# It's worse than the LSTMP, as expected, due to more overtraining. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/lstm1b_sp_bi +# exp/chain_cleaned/lstm1b_sp_bi: num-iters=253 nj=2..12 num-params=9.6M dim=40+100->3607 combine=-0.09->-0.09 xent:train/valid[167,252,final]=(-1.24,-1.14,-1.14/-1.35,-1.28,-1.28) logprob:train/valid[167,252,final]=(-0.092,-0.079,-0.079/-0.119,-0.110,-0.110) + +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1a_sp_bi exp/chain_cleaned/lstm1b_sp_bi +# System lstm1a_sp_bi lstm1b_sp_bi +# WER on dev(orig) 10.8 11.3 +# WER on dev(rescored) 10.2 10.7 +# WER on test(orig) 10.0 10.6 +# WER on test(rescored) 9.6 10.0 +# Final train prob -0.0848 -0.0787 +# Final valid prob -0.1098 -0.1104 +# Final train prob (xent) -1.1692 -1.1442 +# Final valid prob (xent) -1.2520 -1.2782 + + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_lstm.sh + +# without cleanup: +# local/chain/run_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1b #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstm-layer name=lstm1 cell-dim=512 delay=-3 + lstm-layer name=lstm2 cell-dim=512 delay=-3 + lstm-layer name=lstm3 cell-dim=512 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh new file mode 100755 index 00000000000..718992fc909 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh @@ -0,0 +1,259 @@ +#!/bin/bash + + +# run_lstm_1c.sh is like run_lstm_1b.sh but changing from the old LSTM +# implementation to our new 'fast' LSTM layer. The xconfig changes from +# 'lstm-layer' to 'fast-lstm-layer'. It's as good as or maybe slightly better +# than the old setup. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/lstm1c_sp_bi +# exp/chain_cleaned/lstm1c_sp_bi: num-iters=253 nj=2..12 num-params=9.6M dim=40+100->3607 combine=-0.09->-0.09 xent:train/valid[167,252,final]=(-1.26,-1.14,-1.14/-1.34,-1.27,-1.27) logprob:train/valid[167,252,final]=(-0.092,-0.078,-0.078/-0.116,-0.111,-0.111) + + +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1b_sp_bi exp/chain_cleaned/lstm1c_sp_bi +# System lstm1b_sp_bi lstm1c_sp_bi +# WER on dev(orig) 11.3 11.2 +# WER on dev(rescored) 10.7 10.5 +# WER on test(orig) 10.6 10.6 +# WER on test(rescored) 10.0 10.1 +# Final train prob -0.0787 -0.0777 +# Final valid prob -0.1104 -0.1108 +# Final train prob (xent) -1.1442 -1.1445 +# Final valid prob (xent) -1.2782 -1.2692 + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_lstm.sh + +# without cleanup: +# local/chain/run_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1c #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstm-layer name=lstm1 cell-dim=512 delay=-3 + fast-lstm-layer name=lstm2 cell-dim=512 delay=-3 + fast-lstm-layer name=lstm3 cell-dim=512 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh new file mode 100755 index 00000000000..8cf543f5096 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh @@ -0,0 +1,272 @@ +#!/bin/bash + + +# run_lstm_1d.sh is like run_lstm_1c.sh, but switching back to projected +# LSTM (LSTMP)... the configuration is the same 1a (but unlike 1a it uses +# the fast lstm layer). Note: 1a and 1d are a little broken +# in that their non-recurrent-projection-dim are twice the recurrent-projection-dim, +# but it's better for comparison purposes to have this the same as 1a. + +# As you can see, compared to 1a, 1d is 0.3% to 0.5% better absolute; +# this comes with the upgrade to 'fast' LSTM. There were differences to how +# the gradient truncation is done, maybe that's it; also there are +# other differences, like how the update of the diagonal matrices +# are done, and the integration of 4 matrix multiplies into one which +# will affect the natural gradient. Anyway, we're not complaining. + + +# steps/info/chain_dir_info.pl exp/chain_cleaned/lstm1d_sp_bi +# exp/chain_cleaned/lstm1d_sp_bi: num-iters=253 nj=2..12 num-params=6.4M dim=40+100->3607 combine=-0.09->-0.09 xent:train/valid[167,252,final]=(-1.21,-1.13,-1.13/-1.29,-1.22,-1.23) logprob:train/valid[167,252,final]=(-0.092,-0.083,-0.081/-0.114,-0.105,-0.105) + +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1a_sp_bi exp/chain_cleaned/lstm1c_sp_bi exp/chain_cleaned/lstm1d_sp_bi +# System lstm1a_sp_bi lstm1c_sp_bi lstm1d_sp_bi +# WER on dev(orig) 10.8 11.2 10.3 +# WER on dev(rescored) 10.2 10.5 9.8 +# WER on test(orig) 10.0 10.6 9.7 +# WER on test(rescored) 9.6 10.1 9.2 +# Final train prob -0.0848 -0.0777 -0.0812 +# Final valid prob -0.1098 -0.1108 -0.1049 +# Final train prob (xent) -1.1692 -1.1445 -1.1334 +# Final valid prob (xent) -1.2520 -1.2692 -1.2263 + + + + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_lstm.sh + +# without cleanup: +# local/chain/run_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1d #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh new file mode 100755 index 00000000000..11af644e765 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh @@ -0,0 +1,259 @@ +#!/bin/bash + +# (From the original script: +# run_lstm_1e.sh is like run_lstm_1d.sh, but reducing non-recurrent-projection-dim +# from 256 to 128 (fixes an earlier mistake). +# However, this doesn't improve WER results-- see below. Probably the system +# has too few parameters. Anyway we probably won't tune this further +# as LSTMs by themselves aren't expected to perform that well: +# see run_tdnn_lstm_1a.sh and others in that sequence.) + +# steps/info/chain_dir_info.pl exp/chain/lstm1e_sp_bi +# exp/chain/lstm1e_sp_bi: num-iters=384 nj=2..12 num-params=4.7M dim=40+100->3557 combine=-0.07->-0.07 xent:train/valid[255,383,final]=(-0.755,-0.703,-0.712/-0.793,-0.755,-0.761) logprob:train/valid[255,383,final]=(-0.060,-0.053,-0.053/-0.071,-0.066,-0.065) + +# local/chain/compare_wer_general.sh exp/chain/tdnn_sp_bi/ exp/chain/lstm1e_sp_bi/ +# System exp/chain/tdnn_sp_bi/exp/chain/lstm1e_sp_bi/ +# WER on dev(tg) 10.00 9.39 +# WER on test(tg) 8.58 7.72 +# Final train prob -0.0642 -0.0528 +# Final valid prob -0.0788 -0.0651 +# Final train prob (xent) -0.9113 -0.7117 +# Final valid prob (xent) -0.9525 -0.7607 + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default: +# local/chain/run_lstm.sh + +# note, that you should probably adjust parallelisation to your setup +# if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# note, if you have already run one of the chain nnet3 systems, +# you may want to run with --stage 17. + + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=7 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train +gmm=tri3b # the gmm for the target data +num_threads_ubm=32 +nnet3_affix= # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1e #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_tg $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..21e3edac5f3 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is the original TDNN script before we introduced xconfigs. +# See run_tdnn_1b.sh for comparative results. + + +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale-nonlinearity 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh new file mode 100755 index 00000000000..14973a5d029 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh @@ -0,0 +1,239 @@ +#!/bin/bash + +# steps/info/chain_dir_info.pl exp/chain/tdnn_sp_bi/ +# exp/chain/tdnn_sp_bi/: num-iters=384 nj=2..12 num-params=7.0M dim=40+100->3557 combine=-0.08->-0.08 xent:train/valid[255,383,final]=(-0.954,-0.911,-0.911/-0.979,-0.953,-0.952) logprob:train/valid[255,383,final]=(-0.071,-0.064,-0.064/-0.084,-0.079,-0.079) + +# local/chain/compare_wer_general.sh exp/nnet3/tdnn0_sp exp/chain/tdnn_sp_bi +# System exp/nnet3/tdnn0_spexp/chain/tdnn_sp_bi +# WER on dev(tg) 11.57 10.00 +# WER on test(tg) 9.89 8.58 +# Final train prob -0.79890.7538 -0.0642 +# Final valid prob -0.77280.7590 -0.0788 +# Final train prob (xent) -0.9113 +# Final valid prob (xent) -0.9525 + +## how you run this (note: this assumes that the run_tdnn.sh soft link points here; +## otherwise call it directly in its location). +# by default: +# local/chain/run_tdnn.sh + +# note, that you should probably adjust parallelisation to your setup +# if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script is like run_tdnn_1a.sh except it uses an xconfig-based mechanism +# to get the configuration. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=7 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train +gmm=tri3b # the gmm for the target data +num_threads_ubm=32 +nnet3_affix= # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_tg $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..7f7f263a741 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,258 @@ +#!/bin/bash + +# steps/info/chain_dir_info.pl exp/chain/tdnn_lstm1a_sp_bi/ +# exp/chain/tdnn_lstm1a_sp_bi/: num-iters=384 nj=2..12 num-params=9.5M dim=40+100->3557 combine=-0.05->-0.05 xent:train/valid[255,383,final]=(-0.579,-0.518,-0.523/-0.651,-0.616,-0.619) logprob:train/valid[255,383,final]=(-0.046,-0.038,-0.038/-0.063,-0.060,-0.059) + +# local/chain/compare_wer_general.sh exp/chain/tdnn_sp_bi/ exp/chain/lstm1e_sp_bi/ exp/chain/tdnn_lstm1a_sp_bi/ +# System exp/chain/tdnn_sp_bi/exp/chain/lstm1e_sp_bi/exp/chain/tdnn_lstm1a_sp_bi/ +# WER on dev(tg) 10.00 9.39 8.48 +# WER on test(tg) 8.58 7.72 7.20 +# Final train prob -0.0642 -0.0528 -0.0378 +# Final valid prob -0.0788 -0.0651 -0.0595 +# Final train prob (xent) -0.9113 -0.7117 -0.5228 +# Final valid prob (xent) -0.9525 -0.7607 -0.6185 + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default: +# local/chain/run_tdnn_lstm.sh + +# note, that you may want to adjust parallelisation to your setup +# if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=7 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train +gmm=tri3b # the gmm for the target data +num_threads_ubm=32 +nnet3_affix= # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1a #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_tg $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/copy_dict.sh b/egs/sprakbanken/s5/local/copy_dict.sh index c5cd1fc77b4..5ae5e9697b1 100755 --- a/egs/sprakbanken/s5/local/copy_dict.sh +++ b/egs/sprakbanken/s5/local/copy_dict.sh @@ -1,7 +1,8 @@ #!/bin/bash # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) -# Copyright 2014 Mirsk Digital ApS (Author: Andreas Kirkedal) +# Copyright 2014-15 Mirsk Digital ApS (Author: Andreas Kirkedal) +# Copyright 2016 Andreas Kirkedal # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +17,7 @@ # See the Apache 2 License for the specific language governing permissions and # limitations under the License. -KALDI_ROOT=$(pwd)/../../.. - -exproot=$(pwd) +lex=lexicon-da-nonorm.tgz dir=data/local/dict mkdir -p $dir @@ -31,22 +30,13 @@ cp local/dictsrc/complexphones.txt $dir/nonsilence_phones.txt cp local/dictsrc/extra_questions.txt $dir/extra_questions.txt # Copy pre-made lexicon -wget http://www.openslr.org/resources/8/lexicon-da.tgz --directory-prefix=data/local/data/download -tar -xzf data/local/data/download/lexicon-da.tgz -C $dir +wget http://www.openslr.org/resources/8/$lex --directory-prefix=data/local/data/download +tar -xzf data/local/data/download/$lex -C $dir # silence phones, one per line. -echo SIL > $dir/silence_phones.txt +echo -e "SIL\nSPN" > $dir/silence_phones.txt echo SIL > $dir/optional_silence.txt - - - - -wait - - -## TODO: add cleanup commands - echo "Dictionary preparation succeeded" diff --git a/egs/sprakbanken/s5/local/create_datasets.sh b/egs/sprakbanken/s5/local/create_datasets.sh index b0d87a730e8..891771dbce1 100755 --- a/egs/sprakbanken/s5/local/create_datasets.sh +++ b/egs/sprakbanken/s5/local/create_datasets.sh @@ -24,7 +24,7 @@ fi src=$1 dest=$2 mkdir $dest -python3 local/normalize_transcript_prefixed.py local/norm_dk/numbersUp.tbl $src/text.unnormalised $src/onlyids $src/transcripts.am +python local/normalize_transcript_prefixed.py local/norm_dk/numbersLow.tbl $src/text.unnormalised $src/onlyids $src/transcripts.am local/norm_dk/format_text.sh am $src/transcripts.am > $src/onlytext paste -d ' ' $src/onlyids $src/onlytext > $dest/text for f in wav.scp utt2spk; do diff --git a/egs/sprakbanken/s5/local/dict_prep.sh b/egs/sprakbanken/s5/local/dict_prep.sh index 8ecfa028408..1e37460dbe5 100755 --- a/egs/sprakbanken/s5/local/dict_prep.sh +++ b/egs/sprakbanken/s5/local/dict_prep.sh @@ -2,6 +2,7 @@ # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) # Copyright 2014 Mirsk Digital ApS (Author: Andreas Kirkedal) +# Copyright 2014-2016 Andreas Kirkedal5D # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,57 +20,24 @@ KALDI_ROOT=$(pwd)/../../.. exproot=$(pwd) -dir=data/local/dict +lmdir=data/local/transcript_lm +dictsrc=data/local/dictsrc +dictdir=data/local/dict espeakdir='espeak-1.48.04-source' -mkdir -p $dir +mkdir -p $dictsrc $dictdir # Dictionary preparation: - -# Normalise transcripts and create a transcript file -# Removes '.,:;?' and removes '\' before '\Komma' (dictated ',') -# outputs a normalised transcript without utterance ids and a list of utterance ids -echo "Normalising" - -# Create dir to hold lm files and other non-standard files, useful for debugging -trainsrc=data/local/trainsrc -rm -rf $trainsrc -mkdir $trainsrc -mv data/train/text1 $trainsrc/text1 -python3 local/normalize_transcript_prefixed.py local/norm_dk/numbersUp.tbl $trainsrc/text1 $trainsrc/onlyids $dir/transcripts.tmp - -# Additional normalisation, uppercasing, writing numbers etc. -# and recombine with -local/norm_dk/format_text.sh am $dir/transcripts.tmp > $dir/transcripts.am -cp $dir/transcripts.am $trainsrc/onlytext -paste $trainsrc/onlyids $trainsrc/onlytext > data/train/text -utils/validate_data_dir.sh --no-feat data/train || exit 1; - - - -# lmsents is output by sprak_data_prep.sh and contains -# sentences that are disjoint from the test and dev set -python3 local/normalize_transcript.py local/norm_dk/numbersUp.tbl data/local/data/lmsents $dir/lmsents.norm -wait - # Create wordlist from the AM transcripts -cat $dir/transcripts.am | tr [:blank:] '\n' | sort -u > $dir/wlist.txt & - -# Because training data is read aloud, there are many occurences of the same -# sentence and bias towards the domain. Make a version where -# the sentences are unique to reduce bias. -local/norm_dk/format_text.sh lm $dir/lmsents.norm > $dir/transcripts.txt -sort -u $dir/transcripts.txt > $dir/transcripts.uniq - +cat $lmdir/transcripts.uniq | tr [:blank:] '\n' | sort -u > $dictsrc/wlist.txt & # Install eSpeak if it is not installed already - if hash espeak 2>/dev/null; - then +then echo 'eSpeak installed' - else - cd $KALDI_ROOT/tools || exit 1; +else + cd $KALDI_ROOT/tools || exit 1; wget http://sourceforge.net/projects/espeak/files/espeak/espeak-1.48/${espeakdir}.zip wait unzip -q $espeakdir.zip @@ -81,87 +49,60 @@ if hash espeak 2>/dev/null; cd $exproot || exit 1; fi - - # Wait for the wordlist to be fully created -wait - +wait # Run wordlist through espeak to get phonetics # improvised parallelisation - simple call because 'split' often has different versions -split -l 10000 $dir/wlist.txt $dir/Wtemp_ -for w in $dir/Wtemp_*; do - (cat $w | espeak -q -vda -x > $w.pho) & +split -l 10000 $dictsrc/wlist.txt $dictsrc/Wtemp_ +for w in $dictsrc/Wtemp_*; do + (cat $w | espeak -q -vda -x > $w.pho) & done wait -cat $dir/Wtemp_*.pho > $dir/plist.txt -rm -f $dir/Wtemp_* +cat $dictsrc/Wtemp_*.pho > $dictsrc/plist.txt +rm -f $dictsrc/Wtemp_* # Filter transcription -# Remove diacritics, language annotation ((da), (en), (fr) etc.), insert space between symbols, remove +# Remove diacritics, language annotation ((da), (en), (fr) etc.), insert space between symbols, remove # initial and trailing spaces and collapse 2 or more spaces to one space -cat $dir/plist.txt | perl -pe 's/\([[a-z]{2}\)//g' | perl -pe 's// /g' | perl -pe 's/ a I / aI /g' | perl -pe 's/ d Z / dZ /g' | perl -pe 's/ \? / /g' | perl -pe 's/ ([\#]) /\+ /g' | perl -pe 's/([\@n3]) \- /\1\- /g' | perl -pe "s/[\_\:\!\'\,\|2]//g" | perl -pe 's/ \- / /g' | tr -s ' ' | perl -pe 's/^ +| +$//g' > $dir/plist2.txt +cat $dictsrc/plist.txt | perl -pe 's/\([[a-z]{2}\)//g' | perl -pe 's// /g' | perl -pe 's/ a I / aI /g' | perl -pe 's/ d Z / dZ /g' | perl -pe 's/ \? / /g' | perl -pe 's/ ([\#]) /\+ /g' | perl -pe 's/([\@n3]) \- /\1\- /g' | perl -pe "s/[\_\:\!\'\,\|2]//g" | perl -pe 's/ \- / /g' | tr -s ' ' | perl -pe 's/^ +| +$//g' > $dictsrc/plist2.txt #Some question marks are not caught above -perl -pe 's/ \? / /g' $dir/plist2.txt > $dir/plist3.txt +perl -pe 's/ \? / /g' $dictsrc/plist2.txt > $dictsrc/plist3.txt # Create lexicon.txt and put it in data/local/dict -paste $dir/wlist.txt $dir/plist3.txt > $dir/lexicon1.txt +paste $dictsrc/wlist.txt $dictsrc/plist3.txt > $dictsrc/lexicon1.txt # Remove entries without transcription -grep -P "^.+\t.+$" $dir/lexicon1.txt > $dir/lexicon2.txt +grep -P "^.+\t.+$" $dictsrc/lexicon1.txt > $dictsrc/lexicon2.txt # Copy pre-made phone table with -cp local/dictsrc/complexphones.txt $dir/nonsilence_phones.txt +cp local/dictsrc/complexphones.txt $dictdir/nonsilence_phones.txt # Add "!SIL SIL" to lexicon.txt -echo -e '!SIL\tSIL' > $dir/lex_first -echo -e '\tSPN' >> $dir/lex_first -cat $dir/lexicon2.txt >> $dir/lex_first -mv $dir/lex_first $dir/lexicon.txt +echo -e '!SIL\tSIL' > $dictsrc/lex_first +echo -e '\tSPN' >> $dictsrc/lex_first +cat $dictsrc/lexicon2.txt >> $dictsrc/lex_first +mv $dictsrc/lex_first $dictdir/lexicon.txt # silence phones, one per line. -echo SIL > $dir/silence_phones.txt -echo SIL > $dir/optional_silence.txt - -touch $dir/extra_questions.txt - -# Repeat text preparation on test set, but do not add to dictionary -# Create dir to hold lm files and other non-standard files -testsrc=data/local/testsrc -rm -rf $testsrc -mkdir $testsrc -mv data/test/text1 $testsrc/text1 -python3 local/normalize_transcript_prefixed.py local/norm_dk/numbersUp.tbl $testsrc/text1 $testsrc/onlyids $testsrc/transcripts.am -local/norm_dk/format_text.sh am $testsrc/transcripts.am > $testsrc/onlytext -paste $testsrc/onlyids $testsrc/onlytext > data/test/text -utils/validate_data_dir.sh --no-feat data/test || exit 1; - -# Repeat text preparation on dev set, but do not add to dictionary -# Create dir to hold lm files and other non-standard files -devsrc=data/local/devsrc -rm -rf $devsrc -mkdir $devsrc -mv data/dev/text1 $devsrc/text1 -python3 local/normalize_transcript_prefixed.py local/norm_dk/numbersUp.tbl $devsrc/text1 $devsrc/onlyids $devsrc/transcripts.tmp -local/norm_dk/format_text.sh am $devsrc/transcripts.tmp > $devsrc/onlytext -paste $devsrc/onlyids $devsrc/onlytext > data/dev/text & - -# Also create a file that can be used for reranking using text features -local/norm_dk/format_text.sh lm $devsrc/transcripts.tmp > data/dev/transcripts.txt -sort -u data/dev/transcripts.txt > data/dev/transcripts.uniq - - -utils/validate_data_dir.sh --no-feat data/dev || exit 1; +if [ ! -f $dictdir/silence_phones.txt ]; then + echo SIL > $dictdir/silence_phones.txt +fi +if [ ! -f $dictdir/optional_silence.txt ]; then + echo SIL > $dictdir/optional_silence.txt +fi -## TODO: add cleanup commands +if [ ! -f $dictdir/extra_questions.txt ]; then + touch $dictdir/extra_questions.txt +fi -echo "Normalisation and dictionary preparation succeeded" +echo "Dictionary preparation succeeded" diff --git a/egs/sprakbanken/s5/local/find_transcripts.pl b/egs/sprakbanken/s5/local/find_transcripts.pl deleted file mode 100755 index 6429411b864..00000000000 --- a/egs/sprakbanken/s5/local/find_transcripts.pl +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2010-2011 Microsoft Corporation - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - - - -# This program takes on its standard input a list of utterance -# id's, one for each line. (e.g. 4k0c030a is a an utterance id). -# It takes as -# Extracts from the dot files the transcripts for a given -# dataset (represented by a file list). -# - -@ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts"; -$dot_flist = shift @ARGV; - -open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n"; -while(){ - chop; - m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_"; - $spk = $1; - $spk2dot{$spk} = $_; -} - - - -while(){ - chop; - $uttid = $_; - $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_"; - $spk = $1; - if($spk ne $curspk) { - %utt2trans = { }; # Don't keep all the transcripts in memory... - $curspk = $spk; - $dotfile = $spk2dot{$spk}; - defined $dotfile || die "No dot file for speaker $spk\n"; - open(F, "<$dotfile") || die "Error opening dot file $dotfile\n"; - while() { - $_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n"; - $trans = $1; - $utt = $2; - $utt2trans{$utt} = $trans; - } - } - if(!defined $utt2trans{$uttid}) { - print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n"; - } else { - print "$uttid $utt2trans{$uttid}\n"; - } -} - - diff --git a/egs/sprakbanken/s5/local/flist2scp.pl b/egs/sprakbanken/s5/local/flist2scp.pl deleted file mode 100755 index 234e4add1ed..00000000000 --- a/egs/sprakbanken/s5/local/flist2scp.pl +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2010-2011 Microsoft Corporation - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - - -# takes in a file list with lines like -# /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1 -# and outputs an scp in kaldi format with lines like -# 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1 -# (the first thing is the utterance-id, which is the same as the basename of the file. - - -while(<>){ - m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_"; - $id = $1; - $id =~ tr/A-Z/a-z/; # Necessary because of weirdness on disk 13-16.1 (uppercase filenames) - print "$id $_"; -} - diff --git a/egs/sprakbanken/s5/local/generate_results_file.sh b/egs/sprakbanken/s5/local/generate_results_file.sh new file mode 100755 index 00000000000..4659c36fc5a --- /dev/null +++ b/egs/sprakbanken/s5/local/generate_results_file.sh @@ -0,0 +1,16 @@ + +echo "GMM-based systems" +for x in exp/*/decode*;do + [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; +done + +echo "nnet3 xent systems" +for x in exp/nnet3/tdnn*/decode* exp/nnet3/lstm*/decode* ;do + [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; +done + +echo "Nnet3 chain systems" +for x in exp/chain/tdnn*/decode* exp/chain/lstm*/decode*;do + [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; +done + diff --git a/egs/sprakbanken/s5/local/kws_data_prep.sh b/egs/sprakbanken/s5/local/kws_data_prep.sh deleted file mode 100755 index 5222a88c9ef..00000000000 --- a/egs/sprakbanken/s5/local/kws_data_prep.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) -# Apache 2.0. - - -if [ $# -ne 3 ]; then - echo "Usage: local/kws_data_prep.sh " - echo " e.g.: local/kws_data_prep.sh data/lang_test_bd_tgpr/ data/test_eval92/ data/kws/" - exit 1; -fi - -langdir=$1; -datadir=$2; -kwsdatadir=$3; - -mkdir -p $kwsdatadir; - -# Create keyword id for each keyword -cat $kwsdatadir/raw_keywords.txt | perl -e ' - $idx=1; - while(<>) { - chomp; - printf "WSJ-%04d $_\n", $idx; - $idx++; - }' > $kwsdatadir/keywords.txt - -# Map the keywords to integers; note that we remove the keywords that -# are not in our $langdir/words.txt, as we won't find them anyway... -cat $kwsdatadir/keywords.txt | \ - sym2int.pl --map-oov 0 -f 2- $langdir/words.txt | \ - grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int - -# Compile keywords into FSTs -transcripts-to-fsts ark:$kwsdatadir/keywords.int ark:$kwsdatadir/keywords.fsts - -# Create utterance id for each utterance; Note that by "utterance" here I mean -# the keys that will appear in the lattice archive. You may have to modify here -cat $datadir/wav.scp | \ - awk '{print $1}' | \ - sort | uniq | perl -e ' - $idx=1; - while(<>) { - chomp; - print "$_ $idx\n"; - $idx++; - }' > $kwsdatadir/utter_id - -# Map utterance to the names that will appear in the rttm file. You have -# to modify the commands below accoring to your rttm file. In the WSJ case -# since each file is an utterance, we assume that the actual file names will -# be the "names" in the rttm, so the utterance names map to themselves. -cat $datadir/wav.scp | \ - awk '{print $1}' | \ - sort | uniq | perl -e ' - while(<>) { - chomp; - print "$_ $_\n"; - }' > $kwsdatadir/utter_map; -echo "Kws data preparation succeeded" diff --git a/egs/sprakbanken/s5/local/nnet2/run_5b_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_5b_gpu.sh index 8f858da739d..4f88b2334f4 100755 --- a/egs/sprakbanken/s5/local/nnet2/run_5b_gpu.sh +++ b/egs/sprakbanken/s5/local/nnet2/run_5b_gpu.sh @@ -59,7 +59,7 @@ if [ $stage -le 2 ]; then steps/nnet2/train_block.sh --stage "$train_stage" \ --num-threads 1 --max-change 40.0 --minibatch-size 512 --num-jobs-nnet 8 \ - --parallel-opts "-l gpu=1" \ + --parallel-opts "--gpu 1" \ --initial-learning-rate 0.0075 --final-learning-rate 0.00075 \ --num-epochs 10 --num-epochs-extra 5 \ --cmd "$decode_cmd" \ diff --git a/egs/sprakbanken/s5/local/nnet2/run_5c2_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_5c2_gpu.sh index 55017386f08..00bd16bf00f 100755 --- a/egs/sprakbanken/s5/local/nnet2/run_5c2_gpu.sh +++ b/egs/sprakbanken/s5/local/nnet2/run_5c2_gpu.sh @@ -9,7 +9,7 @@ train_stage=-100 temp_dir= # e.g. --temp-dir /export/m1-02/dpovey/kaldi-dan2/egs/wsj/s5/ -parallel_opts="-l gpu=1,hostname=g*" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. dir=exp/nnet5c2_gpu # Note: since we multiplied the num-jobs by 1/4, we halved the @@ -18,7 +18,7 @@ dir=exp/nnet5c2_gpu . ./cmd.sh . utils/parse_options.sh -( +( if [ ! -z "$temp_dir" ] && [ ! -e $dir/egs ]; then mkdir -p $dir @@ -33,7 +33,7 @@ dir=exp/nnet5c2_gpu --num-hidden-layers 4 --hidden-layer-dim 1024 \ --cmd "$decode_cmd" \ data/train_si284 data/lang exp/tri4b_ali_si284 $dir || exit 1 - + steps/nnet2/decode.sh --cmd "$decode_cmd" --nj 10 \ --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \ exp/tri4b/graph_bd_tgpr data/test_dev93 $dir/decode_bd_tgpr_dev93 diff --git a/egs/sprakbanken/s5/local/nnet2/run_5c_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_5c_gpu.sh index 4aaafde4eb5..2bf13a0a399 100755 --- a/egs/sprakbanken/s5/local/nnet2/run_5c_gpu.sh +++ b/egs/sprakbanken/s5/local/nnet2/run_5c_gpu.sh @@ -1,12 +1,12 @@ #!/bin/bash # This is neural net training on top of adapted 40-dimensional features. -# +# train_stage=-100 temp_dir= # e.g. --temp-dir /export/m1-02/dpovey/kaldi-dan2/egs/wsj/s5/ -parallel_opts="-l gpu=1,hostname=g*" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. dir=exp/nnet5c_gpu # Note: since we multiplied the num-jobs by 1/4, we halved the @@ -15,7 +15,7 @@ dir=exp/nnet5c_gpu . ./cmd.sh . utils/parse_options.sh -( +( if [ ! -z "$temp_dir" ] && [ ! -e $dir/egs ]; then mkdir -p $dir @@ -30,7 +30,7 @@ dir=exp/nnet5c_gpu --num-hidden-layers 4 --hidden-layer-dim 1024 \ --cmd "$decode_cmd" \ data/train_si284 data/lang exp/tri4b_ali_si284 $dir || exit 1 - + steps/nnet2/decode.sh --cmd "$decode_cmd" --nj 10 \ --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \ exp/tri4b/graph_bd_tgpr data/test_dev93 $dir/decode_bd_tgpr_dev93 diff --git a/egs/sprakbanken/s5/local/nnet2/run_5d_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_5d_gpu.sh index f52a0028074..1b87fec6419 100755 --- a/egs/sprakbanken/s5/local/nnet2/run_5d_gpu.sh +++ b/egs/sprakbanken/s5/local/nnet2/run_5d_gpu.sh @@ -5,7 +5,7 @@ train_stage=-100 temp_dir= # e.g. --temp-dir /export/m1-02/dpovey/kaldi-dan2/egs/wsj/s5/ -parallel_opts="-l gpu=1,hostname=g*" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. dir=exp/nnet5d_gpu # Note: since we multiplied the num-jobs by 1/4, we halved the diff --git a/egs/sprakbanken/s5/local/nnet2/run_6c_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_6c_gpu.sh index eee51fd9c9b..e61843ef4b4 100755 --- a/egs/sprakbanken/s5/local/nnet2/run_6c_gpu.sh +++ b/egs/sprakbanken/s5/local/nnet2/run_6c_gpu.sh @@ -7,7 +7,7 @@ # directory name. -gpu_opts="-l gpu=1,hostname=g*" # This is suitable for the CLSP network, +gpu_opts="--gpu 1" # This is suitable for the CLSP network, # you'll likely have to change it. we'll # use it later on, in the training (it's # not used in denlat creation) @@ -15,7 +15,7 @@ gpu_opts="-l gpu=1,hostname=g*" # This is suitable for the CLSP network, # The denominator lattice creation currently doesn't use GPUs. -# Note: we specify 1G each for the mem_free and ram_free which, is per +# Note: we specify 1G for --mem, which is per # thread... it will likely be less than the default. Increase the beam relative # to the defaults; this is just for this RM setup, where the default beams will # likely generate very thin lattices. Note: the transform-dir is important to @@ -25,8 +25,8 @@ set -e # exit on error. nj=$(cat exp/tri4b_ali_si284/num_jobs) -steps/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G" \ - --nj $nj --sub-split 20 --num-threads 6 --parallel-opts "-pe smp 6" \ +steps/nnet2/make_denlats.sh --cmd "$decode_cmd --mem 1G" \ + --nj $nj --sub-split 20 --num-threads 6 --parallel-opts "--num-threads 6" \ --transform-dir exp/tri4b_ali_si284 \ data/train_si284 data/lang exp/nnet5c_gpu exp/nnet5c_gpu_denlats diff --git a/egs/sprakbanken/s5/local/nnet3/run_blstm.sh b/egs/sprakbanken/s5/local/nnet3/run_blstm.sh new file mode 100755 index 00000000000..f29731397fe --- /dev/null +++ b/egs/sprakbanken/s5/local/nnet3/run_blstm.sh @@ -0,0 +1,48 @@ +stage=0 +train_stage=-10 +affix=bidirectional +nnet3_affix= +common_egs_dir= +remove_egs=true +train_set=train +gmm=tri3b + + +# BLSTM params +cell_dim=1024 +rp_dim=128 +nrp_dim=128 +chunk_left_context=40 +chunk_right_context=40 + +# training options +srand=0 +num_jobs_initial=3 +num_jobs_final=15 +samples_per_iter=20000 +num_epochs=6 +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +local/nnet3/run_lstm.sh --affix $affix \ + --srand $srand \ + --stage $stage \ + --train-stage $train_stage \ + --train-set $train_set \ + --gmm $gmm \ + --lstm-delay " [-1,1] [-2,2] [-3,3] " \ + --label-delay 0 \ + --cell-dim $cell_dim \ + --recurrent-projection-dim $rp_dim \ + --non-recurrent-projection-dim $nrp_dim \ + --common-egs-dir "$common_egs_dir" \ + --chunk-left-context $chunk_left_context \ + --chunk-right-context $chunk_right_context \ + --num-jobs-initial $num_jobs_initial \ + --num-jobs-final $num_jobs_final \ + --samples-per-iter $samples_per_iter \ + --num-epochs $num_epochs \ + --remove-egs $remove_egs + diff --git a/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh b/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..9a730348dfa --- /dev/null +++ b/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh @@ -0,0 +1,238 @@ +#!/bin/bash + +set -e -o pipefail + + +# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually +# be called by more scripts). It contains the common feature preparation and iVector-related parts +# of the script. See those scripts for examples of usage. + + +stage=0 +nj=30 +min_seg_len=1.55 # min length in seconds... we do this because chain training + # will discard segments shorter than 1.5 seconds. Must remain in sync + # with the same option given to prepare_lores_feats_and_alignments.sh +train_set=train # you might set this to e.g. train. +gmm=tri3b # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. + +num_threads_ubm=32 +nnet3_affix=_n3 # affix for exp/nnet3 directory to put iVector stuff in, so it + # becomes exp/nnet3_cleaned or whatever. + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp_comb + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 2." + exit 1 +fi + + +if [ $stage -le 1 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/sprakbanken-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp dev test; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp dev test; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 3 ]; then + echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" + # we have to combine short segments or we won't be able to train chain models + # on those segments. + utils/data/combine_short_segments.sh \ + data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb + + # just copy over the CMVN to avoid having to recompute it. + cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ + utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ +fi + +if [ $stage -le 4 ]; then + echo "$0: selecting segments of hires training data that were also present in the" + echo " ... original training data." + + # note, these data-dirs are temporary; we put them in a sub-directory + # of the place where we'll make the alignments. + temp_data_root=exp/nnet3${nnet3_affix}/tri5 + mkdir -p $temp_data_root + + utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ + data/${train_set}_sp_hires $temp_data_root/${train_set}_hires + + # note: essentially all the original segments should be in the hires data. + n1=$(wc -l /dev/null || true + ( + steps/nnet3/decode.sh --nj 12 --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_dev_hires \ + ${graph_dir} data/dev_hires ${dir}/decode_dev || exit 1 + steps/nnet3/decode.sh --nj 7 --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + ${graph_dir} data/test_hires ${dir}/decode_test || exit 1 + ) || touch $dir/.error & + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh b/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh new file mode 100755 index 00000000000..45794ac9ee4 --- /dev/null +++ b/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +# This is the standard "tdnn" system, built in nnet3 + +# by default: +# local/nnet3/run_tdnn.sh + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=7 +min_seg_len=1.55 +train_set=train +gmm=tri3b # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix= # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" +remove_egs=true +relu_dim=750 +num_epochs=3 + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat </dev/null + ( + steps/nnet3/decode.sh --nj 7 --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + ${graph_dir} data/test_hires ${dir}/decode_test || exit 1 + steps/nnet3/decode.sh --nj 12 --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_dev_hires \ + ${graph_dir} data/dev_hires ${dir}/decode_dev || exit 1 + ) || touch $dir/.error & + + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/sprakbanken/s5/local/norm_dk/format_text.sh b/egs/sprakbanken/s5/local/norm_dk/format_text.sh index ff85c8cc0ef..abbf975dbdf 100755 --- a/egs/sprakbanken/s5/local/norm_dk/format_text.sh +++ b/egs/sprakbanken/s5/local/norm_dk/format_text.sh @@ -34,8 +34,8 @@ nonum=$tmp/nonum.tmp cat $2 | tr -d '\r' > $src -$dir/expand_abbr_medical.sh $src > $abbr; -$dir/remove_annotation.sh $abbr > $rem; +#$dir/expand_abbr_medical.sh $src > $abbr; +$dir/remove_annotation.sh $src > $rem; if [ $mode != "am" ]; then $dir/sent_split.sh $rem > $line; else @@ -45,10 +45,11 @@ fi $dir/expand_dates.sh $line |\ $dir/format_punct.sh > $num; #python3 $dir/writenumbers.py $dir/numbersUp.tbl $num $nonum; -cat $num | $dir/write_punct.sh | \ +# $dir/write_punct.sh | \ +cat $num | \ perl -pi -e "s/^\n//" | \ -perl -pe 's/ (.{4}.*?)\./ \1/g' | \ -PERLIO=:utf8 perl -pe '$_=uc' +perl -pe 's/ (.{4}.*?)\./ \1/g' +# | PERLIO=:utf8 perl -pe '$_=lc' # Comment this line for debugging wait diff --git a/egs/sprakbanken/s5/local/norm_dk/numbersLow.tbl b/egs/sprakbanken/s5/local/norm_dk/numbersLow.tbl new file mode 100644 index 00000000000..824c0afa3b2 --- /dev/null +++ b/egs/sprakbanken/s5/local/norm_dk/numbersLow.tbl @@ -0,0 +1,265 @@ +¼ en fjerdedel +½ en halv +0 nul +² i anden +enogfirs en og firs +enogfyrre en og fyrre +enoghalvfems en og halvfems +enoghalvfjerds en og halvfjerds +enoghalvtreds en og halvtreds +enogtredive en og tredive +enogtredivte en og tredivte +enogtres en og tres +enogtyvende en og tyvende +femogfirs fem og firs +femogfyrre fem og fyrre +femoghalvfems fem og halvfems +femoghalvfjerds fem og halvfjerds +femoghalvtreds fem og halvtreds +femogtredive fem og tredive +femogtres fem og tres +femogtyve fem og tyve +femogtyvende fem og tyvende +fireogfirs fire og firs +fireogfyrre fire og fyrre +fireoghalvfems fire og halvfems +fireoghalvfjerds fire og halvfjerds +fireoghalvtreds fire og halvtreds +fireogtredive fire og tredive +fireogtres fire og tres +fireogtyve fire og tyve +fireogtyvende fire og tyvende +fyrreogtyvende fyrre og tyvende +niogfirs ni og firs +niogfyrre ni og fyrre +nioghalvfems ni og halvfems +nioghalvfjerds ni og halvfjerds +nioghalvtreds ni og halvtreds +niogtredive ni og tredive +niogtres ni og tres +niogtyvende ni og tyvende +niogtyve ni og tyve +otteogfirs otte og firs +otteogfyrre otte og fyrre +otteoghalvfems otte og halvfems +otteoghalvfjerds otte og halvfjerds +otteoghalvtreds otte og halvtreds +otteogtredive otte og tredive +otteogtres otte og tres +otteogtyvende otte og tyvende +otteogtyve otte og tyve +seksogfirs seks og firs +seksogfyrre seks og fyrre +seksoghalvfems seks og halvfems +seksoghalvfjerds seks og halvfjerds +seksoghalvtreds seks og halvtreds +seksogtredive seks og tredive +seksogtres seks og tres +seksogtyvende seks og tyvende +seksogtyve seks og tyve +syvogfirs syv og firs +syvogfyrre syv og fyrre +syvoghalvfems syv og halvfems +syvoghalvfjerds syv og halvfjerds +syvoghalvtreds syv og halvtreds +syvogtredive syv og tredive +syvogtres syv og tres +syvogtyvende syv og tyvende +syvogtyve syv og tyve +toogfirs to og firs +toogfyrre to og fyrre +tooghalvfems to og halvfems +tooghalvfjerds to og halvfjerds +tooghalvtreds to og halvtreds +toogtredive to og tredive +toogtres to og tres +toogtyvende to og tyvende +toogtyve to og tyve +totusindogatten to tusind og atten +totusindogelleve to tusind og elleve +totusindoget to tusind og et +totusindogfemten to tusind og femten +totusindogfem to tusind og fem +totusindogfire to tusind og fire +totusindogfjorten to tusind og fjorten +totusindogni to tusind og ni +totusindognitten to tusind og nitten +totusindogotte to tusind og otte +totusindogseksten to tusind og seksten +totusindogseks to tusind og seks +totusindogsytten to tusind og sytten +totusindogsyv to tusind og syv +totusindogti to tusind og ti +totusindogtolv to tusind og tolv +totusindogto to tusind og to +totusindogtre to tusind og tre +totusindogtretten to tusind og tretten +totusindogtyve to tusind og tyve +treogfirs tre og firs +treogfyrre tre og fyrre +treoghalvfems tre og halvfems +treoghalvfjerds tre og halvfjerds +treoghalvtreds tre og halvtreds +treogtredive tre og tredive +treogtres tre og tres +treogtyvende tre og tyvende +treogtyve tre og tyve +1 en +1. første +2. anden +2 to +3 tre +3. tredje +4 fire +4. fjerde +5 fem +5. femte +6 seks +6. sjette +7 syv +7. syvende +8 otte +8. ottende +9 ni +9. niende +10 ti +10. tiende +11 elleve +11. ellevte +12 tolv +12. tolvte +13 tretten +13. trettende +14 fjorten +14. fjortende +15 femten +15. femtende +16 seksten +16. sekstende +17 sytten +17. syttende +18 atten +18. attende +19 nitten +19. nittende +20 tyve +20. tyvende +21 en og tyve +21. en og tyvende +22 to og tyve +22. to og tyvende +23 tre og tyve +23. tre og tyvende +24 fire og tyve +24. fire og tyvende +25 fem og tyve +25. fem og tyvende +26 seks og tyve +26. seks og tyvende +27 syv og tyve +27. syv og tyvende +28 otte og tyve +28. otte og tyvende +29 ni og tyve +29. ni og tyvende +30 tredive +30. tredivte +31 en og tredive +31. en og tredivte +32 to og tredive +33 tre og tredive +34 fire og tredive +35 fem og tredive +36 seks og tredive +37 syv og tredive +38 otte og tredive +39 ni og tredive +40 fyrre +40. fyrre og tyvende +41 en og fyrre +42 to og fyrre +43 tre og fyrre +44 fire og fyrre +45 fem og fyrre +46 seks og fyrre +47 syv og fyrre +48 otte og fyrre +49 ni og fyrre +50 halvtreds +50. halvtredsinds tyvende +51 en og halvtreds +52 to og halvtreds +53 tre og halvtreds +54 fire og halvtreds +55 fem og halvtreds +56 seks og halvtreds +57 syv og halvtreds +58 otte og halvtreds +59 ni og halvtreds +60 tres +60. tresinds tyvende +61 en og tres +62 to og tres +63 tre og tres +64 fire og tres +65 fem og tres +66 seks og tres +67 syv og tres +68 otte og tres +69 ni og tres +70 halvfjerds +70. halvfjerdsinds tyvende +71 en og halvfjerds +72 to og halvfjerds +73 tre og halvfjerds +74 fire og halvfjerds +75 fem og halvfjerds +76 seks og halvfjerds +77 syv og halvfjerds +78 otte og halvfjerds +79 ni og halvfjerds +80 firs +80. firsindstyvende +81 en og firs +82 to og firs +83 tre og firs +84 fire og firs +85 fem og firs +86 seks og firs +87 syv og firs +88 otte og firs +89 ni og firs +90 halvfems +90. halvfemsinds tyvende enogtyve en og tyve +91 en og halvfems +92 to og halvfems +93 tre og halvfems +94 fire og halvfems +95 fem og halvfems +96 seks og halvfems +97 syv og halvfems +98 otte og halvfems +99 ni og halvfems +100 hundrede +1000 tusind +2000 to tusind +2001 to tusind og et +2002 to tusind og to +2003 to tusind og tre +2004 to tusind og fire +2005 to tusind og fem +2006 to tusind og seks +2007 to tusind og syv +2008 to tusind og otte +2009 to tusind og ni +2010 to tusind og ti +2011 to tusind og elleve +2012 to tusind og tolv +2013 to tusind og tretten +2014 to tusind og fjorten +2015 to tusind og femten +2016 to tusind og seksten +2017 to tusind og sytten +2018 to tusind og atten +2019 to tusind og nitten +2020 to tusind og tyve diff --git a/egs/sprakbanken/s5/local/normalize_transcript.py b/egs/sprakbanken/s5/local/normalize_transcript.py index f759a39731d..2374418bee7 100755 --- a/egs/sprakbanken/s5/local/normalize_transcript.py +++ b/egs/sprakbanken/s5/local/normalize_transcript.py @@ -1,9 +1,10 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- import codecs import sys import re import writenumbers - +from string import maketrans ## Global vars @@ -16,7 +17,10 @@ "\t": " " } -t_table = str.maketrans(normdict) +from_chars = ''.join(normdict.keys()) +to_chars = ''.join(normdict.values()) + +#t_table = maketrans(from_chars, to_chars) ## Main @@ -27,10 +31,11 @@ for line in transcript: - normtext1 = line.translate(t_table) - normtext2 = re.sub(r' +', ' ', normtext1.strip()) - normtext3 = writenumbers.normNumber(normtext2, numtable) - outtext.write(normtext3.upper() + "\n") + normtext1 = re.sub(r'[\.,:;\?]', '', line) + normtext2 = re.sub(r'[\t\\]', ' ', normtext1) + normtext3 = re.sub(r' +', ' ', normtext2.strip()) + normtext4 = writenumbers.normNumber(normtext3, numtable) + outtext.write(normtext4) transcript.close() outtext.close() diff --git a/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py b/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py index e934533a393..557606ae205 100755 --- a/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py +++ b/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py @@ -1,6 +1,7 @@ #!/usr/bin/env python ''' # Copyright 2013-2014 Mirsk Digital Aps (Author: Andreas Kirkedal) +# Copyright 2014-2016 Andreas Kirkedal # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,16 +25,16 @@ ## Global vars -normdict = {",": " ", - ":": " ", - ";": " ", - "?": " ", - "\\": " ", - "\t": " ", - #".": "" - } +# normdict = {",": " ", +# ":": " ", +# ";": " ", +# "?": " ", +# "\\": " ", +# "\t": " ", +# #".": "" +# } -t_table = str.maketrans(normdict) +# t_table = str.maketrans(normdict) ## Utility function @@ -51,12 +52,13 @@ def getuttid_text(line): for line in textin: utt_id, text = getuttid_text(line) - normtext1 = text.translate(t_table) - normtext2 = re.sub(r' +', ' ', normtext1.strip()) - normtext3 = writenumbers.normNumber(normtext2, numtable) - + normtext1 = re.sub(r'[\.,:;\?]', '', text) + normtext2 = re.sub(r'[\t\\]', ' ', normtext1) + normtext3 = re.sub(r' +', ' ', normtext2.strip()) + normtext4 = writenumbers.normNumber(normtext3, numtable) + outtext.write(normtext4) fid.write(utt_id + "\n") - outtext.write(normtext3) + textin.close() outtext.close() diff --git a/egs/sprakbanken/s5/local/run_basis_fmllr.sh b/egs/sprakbanken/s5/local/run_basis_fmllr.sh deleted file mode 100755 index 3c04e480a0a..00000000000 --- a/egs/sprakbanken/s5/local/run_basis_fmllr.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -. cmd.sh - -mfccdir=mfcc - -# Make "per-utterance" versions of the test sets where the speaker -# information corresponds to utterances-- to demonstrate adaptation on -# short utterances, particularly for basis fMLLR -for x in test_eval92 test_eval93 test_dev93 ; do - y=${x}_utt - rm -r data/$y - cp -r data/$x data/$y - cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk; - cp data/$y/utt2spk data/$y/spk2utt; - steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; -done - - - # basis fMLLR experiments. - # First a baseline: decode per-utterance with normal fMLLR. -steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_utt || exit 1; -steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_utt || exit 1; - - # get the fMLLR basis. -steps/get_fmllr_basis.sh --cmd "$train_cmd" data/train_si84 data/lang exp/tri3b - - # decoding tri3b with basis fMLLR -steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_dev93 exp/tri3b/decode_tgpr_dev93_basis || exit 1; -steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_eval92 exp/tri3b/decode_tgpr_eval92_basis || exit 1; - - # The same, per-utterance. -steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_basis_utt || exit 1; -steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_basis_utt || exit 1; - - diff --git a/egs/sprakbanken/s5/local/run_kl_hmm.sh b/egs/sprakbanken/s5/local/run_kl_hmm.sh deleted file mode 100644 index 9e7679a7675..00000000000 --- a/egs/sprakbanken/s5/local/run_kl_hmm.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# Copyright 2013 Idiap Research Institute (Author: David Imseng) -# Apache 2.0 - -. cmd.sh - -states=20000 -dir=exp/tri4b_pretrain-dbn_dnn/ - -steps/kl_hmm/build_tree.sh --cmd "$big_memory_cmd" --thresh -1 --nnet_dir exp/tri4b_pretrain-dbn_dnn/ \ - ${states} data-fmllr-tri4b/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b-${states} || exit 1; - -utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri4b-${states} exp/tri4b-${states}/graph_bd_tgpr || exit 1; - -steps/kl_hmm/train_kl_hmm.sh --nj 30 --cmd "$big_memory_cmd" --model exp/tri4b-${states}/final.mdl data-fmllr-tri4b/train_si284 exp/tri4b-${states} $dir/kl-hmm-${states} - -steps/kl_hmm/decode_kl_hmm.sh --nj 10 --cmd "$big_memory_cmd" --acwt 0.1 --nnet $dir/kl-hmm-${states}/final.nnet --model exp/tri4b-${states}/final.mdl \ - --config conf/decode_dnn.config exp/tri4b-${states}/graph_bd_tgpr/ data-fmllr-tri4b/test_dev93 $dir/decode_dev93_kl-hmm-bd-${states}_tst - -steps/kl_hmm/decode_kl_hmm.sh --nj 8 --cmd "$big_memory_cmd" --acwt 0.1 --nnet $dir/kl-hmm-${states}/final.nnet --model exp/tri4b-${states}/final.mdl \ - --config conf/decode_dnn.config exp/tri4b-${states}/graph_bd_tgpr/ data-fmllr-tri4b/test_eval92 $dir/decode_eval92_kl-hmm-bd-${states}_tst - - diff --git a/egs/sprakbanken/s5/local/run_raw_fmllr.sh b/egs/sprakbanken/s5/local/run_raw_fmllr.sh deleted file mode 100644 index c4847a93f27..00000000000 --- a/egs/sprakbanken/s5/local/run_raw_fmllr.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash - - -steps/align_raw_fmllr.sh --nj 10 --cmd "$train_cmd" --use-graphs true \ - data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84_raw - -steps/train_raw_sat.sh --cmd "$train_cmd" \ - 2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84_raw exp/tri3c || exit 1; - - -mfccdir=mfcc -for x in test_eval92 test_eval93 test_dev93 ; do - y=${x}_utt - mkdir -p data/$y - cp data/$x/* data/$y || true - cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk; - cp data/$y/utt2spk data/$y/spk2utt; - steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; -done - -( -utils/mkgraph.sh data/lang_test_tgpr exp/tri3c exp/tri3c/graph_tgpr || exit 1; -steps/decode_raw_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_dev93 exp/tri3c/decode_tgpr_dev93 || exit 1; -steps/decode_raw_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_eval92 exp/tri3c/decode_tgpr_eval92 || exit 1; - -steps/decode_raw_fmllr.sh --nj 30 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_dev93_utt exp/tri3c/decode_tgpr_dev93_utt || exit 1; -steps/decode_raw_fmllr.sh --nj 30 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_eval92_utt exp/tri3c/decode_tgpr_eval92_utt || exit 1; - -steps/decode_raw_fmllr.sh --use-normal-fmllr true --nj 10 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_dev93 exp/tri3c/decode_tgpr_dev93_2fmllr || exit 1; -steps/decode_raw_fmllr.sh --use-normal-fmllr true --nj 8 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_eval92 exp/tri3c/decode_tgpr_eval92_2fmllr || exit 1; -)& - -( -utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri3c exp/tri3c/graph_bd_tgpr || exit 1; - -steps/decode_raw_fmllr.sh --cmd "$decode_cmd" --nj 8 exp/tri3c/graph_bd_tgpr \ - data/test_eval92 exp/tri3c/decode_bd_tgpr_eval92 - steps/decode_raw_fmllr.sh --cmd "$decode_cmd" --nj 10 exp/tri3c/graph_bd_tgpr \ - data/test_dev93 exp/tri3c/decode_bd_tgpr_dev93 -)& - -steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \ - data/train_si284 data/lang exp/tri3c exp/tri3c_ali_si284 || exit 1; - - -steps/train_raw_sat.sh --cmd "$train_cmd" \ - 4200 40000 data/train_si284 data/lang exp/tri3c_ali_si284 exp/tri4d || exit 1; -( - utils/mkgraph.sh data/lang_test_tgpr exp/tri4d exp/tri4d/graph_tgpr || exit 1; - steps/decode_raw_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri4d/graph_tgpr data/test_dev93 exp/tri4d/decode_tgpr_dev93 || exit 1; - steps/decode_raw_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri4d/graph_tgpr data/test_eval92 exp/tri4d/decode_tgpr_eval92 || exit 1; -) & - - -wait - - -#for x in exp/tri3{b,c}/decode_tgpr*; do grep WER $x/wer_* | utils/best_wer.sh ; done - diff --git a/egs/sprakbanken/s5/local/run_sgmm.sh b/egs/sprakbanken/s5/local/run_sgmm.sh deleted file mode 100755 index 27d8449896f..00000000000 --- a/egs/sprakbanken/s5/local/run_sgmm.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash - -# This script is invoked from ../run.sh -# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity. - -. cmd.sh - -# SGMM system on si84 data [sgmm5a]. Note: the system we aligned from used the si284 data for -# training, but this shouldn't have much effect. - -( - steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1; - - steps/train_ubm.sh --cmd "$train_cmd" \ - 400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \ - exp/ubm5a/final.ubm exp/sgmm5a || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5a/graph_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \ - --use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm5a exp/sgmm5a_ali_si84 || exit 1; - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1/decode_tgpr_dev93_it$iter & - done - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - --update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1_m0.9 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter & - done - -) & - - -( -# The next commands are the same thing on all the si284 data. - -# SGMM system on the si284 data [sgmm5b] - steps/train_ubm.sh --cmd "$train_cmd" \ - 600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5b || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5b exp/sgmm5b/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5b/graph_tgpr data/test_dev93 exp/sgmm5b/decode_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \ - exp/sgmm5b/graph_tgpr data/test_eval92 exp/sgmm5b/decode_tgpr_eval92 - - utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm5b exp/sgmm5b/graph_bd_tgpr || exit 1; - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \ - exp/sgmm5b/graph_bd_tgpr data/test_dev93 exp/sgmm5b/decode_bd_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \ - exp/sgmm5b/graph_bd_tgpr data/test_eval92 exp/sgmm5b/decode_bd_tgpr_eval92 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \ - --use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm5b exp/sgmm5b_ali_si284 - - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 exp/sgmm5b_mmi_b0.1 - - for iter in 1 2 3 4; do - for test in dev93 eval92; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_${test} data/lang_test_tgpr data/test_${test} exp/sgmm5b/decode_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_tgpr_${test}_it$iter & - - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_tgpr data/test_${test} exp/sgmm5b/decode_bd_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter & - done - done -) & - - - -# Train quinphone SGMM system. - -steps/train_sgmm.sh --cmd "$train_cmd" \ - --context-opts "--context-width=5 --central-position=2" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5c || exit 1; - -# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93. -steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93 diff --git a/egs/sprakbanken/s5/local/score.sh b/egs/sprakbanken/s5/local/score.sh index abd8149a672..9fcafdc0b5c 100755 --- a/egs/sprakbanken/s5/local/score.sh +++ b/egs/sprakbanken/s5/local/score.sh @@ -1,18 +1,24 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal) # Apache 2.0 +# See the script steps/scoring/score_kaldi_cer.sh in case you need to evalutate CER + [ -f ./path.sh ] && . ./path.sh # begin configuration section. cmd=run.pl stage=0 -decode_mbr=true -word_ins_penalty=0.0 +decode_mbr=false +stats=true +beam=6 +word_ins_penalty=0.0,0.5,1.0 min_lmwt=7 max_lmwt=17 +iter=final #end configuration section. +echo "$0 $@" # Print the command line for logging [ -f ./path.sh ] && . ./path.sh . parse_options.sh || exit 1; @@ -37,21 +43,107 @@ for f in $symtab $dir/lat.1.gz $data/text; do [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; done -mkdir -p $dir/scoring/log -cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt +ref_filtering_cmd="cat" +[ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter" +[ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter" +hyp_filtering_cmd="cat" +[ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter" +[ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter" + + +if $decode_mbr ; then + echo "$0: scoring with MBR, word insertion penalty=$word_ins_penalty" +else + echo "$0: scoring with word insertion penalty=$word_ins_penalty" +fi + + +mkdir -p $dir/scoring_kaldi +cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1; +if [ $stage -le 0 ]; then + + for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + mkdir -p $dir/scoring_kaldi/penalty_$wip/log + + if $decode_mbr ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ + acwt=\`perl -e \"print 1.0/LMWT\"\`\; \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-mbr-decode --word-symbol-table=$symtab \ + ark:- ark,t:- \| \ + utils/int2sym.pl -f 2- $symtab \| \ + $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; + + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \ + utils/int2sym.pl -f 2- $symtab \| \ + $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; + fi + + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \ + cat $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring_kaldi/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; + + done +fi -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-best-path --word-symbol-table=$symtab \ - ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; -# Note: the double level of quoting for the sed command -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cat $dir/scoring/LMWT.tra \| \ - utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ - compute-wer --text --mode=present \ - ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + +if [ $stage -le 1 ]; then + + for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + for lmwt in $(seq $min_lmwt $max_lmwt); do + # adding /dev/null to the command list below forces grep to output the filename + grep WER $dir/wer_${lmwt}_${wip} /dev/null + done + done | utils/best_wer.sh >& $dir/scoring_kaldi/best_wer || exit 1 + + best_wer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_wer) + best_wip=$(echo $best_wer_file | awk -F_ '{print $NF}') + best_lmwt=$(echo $best_wer_file | awk -F_ '{N=NF-1; print $N}') + + if [ -z "$best_lmwt" ]; then + echo "$0: we could not get the details of the best WER from the file $dir/wer_*. Probably something went wrong." + exit 1; + fi + + if $stats; then + mkdir -p $dir/scoring_kaldi/wer_details + echo $best_lmwt > $dir/scoring_kaldi/wer_details/lmwt # record best language model weight + echo $best_wip > $dir/scoring_kaldi/wer_details/wip # record best word insertion penalty + + $cmd $dir/scoring_kaldi/log/stats1.log \ + cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \ + align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.txt ark:- ark,t:- \| \ + utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\ + utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1; + + $cmd $dir/scoring_kaldi/log/stats2.log \ + cat $dir/scoring_kaldi/wer_details/per_utt \| \ + utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \ + sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/wer_details/ops || exit 1; + + $cmd $dir/scoring_kaldi/log/wer_bootci.log \ + compute-wer-bootci --mode=present \ + ark:$dir/scoring_kaldi/test_filt.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \ + '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1; + + fi +fi + +# If we got here, the scoring was successful. +# As a small aid to prevent confusion, we remove all wer_{?,??} files; +# these originate from the previous version of the scoring files +# i keep both statement here because it could lead to confusion about +# the capabilities of the script (we don't do cer in the script) +rm $dir/wer_{?,??} 2>/dev/null +rm $dir/cer_{?,??} 2>/dev/null exit 0; diff --git a/egs/sprakbanken/s5/local/sprak_data_prep.sh b/egs/sprakbanken/s5/local/sprak_data_prep.sh index c7a1d048a4f..c336b06e8af 100755 --- a/egs/sprakbanken/s5/local/sprak_data_prep.sh +++ b/egs/sprakbanken/s5/local/sprak_data_prep.sh @@ -2,6 +2,7 @@ # Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) # Copyright 2013-2014 Mirsk Digital Aps (Author: Andreas Kirkedal) +# Copyright 2015-2016 Andreas Kirkedal # Apache 2.0. @@ -17,33 +18,22 @@ utils=`pwd`/utils . ./path.sh -# Checks if python3 is available on the system and install python3 in userspace if not -# This recipe currently relies on version 3 because python3 uses utf8 as internal -# string representation - -if ! which python3 >&/dev/null; then - echo "Installing python3 since not on your path." - pushd $KALDI_ROOT/tools || exit 1; - extras/install_python3.sh || exit 1; - popd -fi - if [ ! -d $dir/download ]; then mkdir -p $dir/download/0565-1 $dir/download/0565-2 fi -echo "Downloading and unpacking sprakbanken to $dir/corpus_processed. This will take a while." +echo "Downloading and unpacking sprakbanken to $dir/corpus_processed. This will take a while. The connection closes every 50-60 seconds and the repo maintainers do not have othersuggestions than increasing the number of retries." if [ ! -f $dir/download/da.16kHz.0565-1.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-1.tar.gz --directory-prefix=$dir/download ) & + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-1.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/da.16kHz.0565-2.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-2.tar.gz --directory-prefix=$dir/download ) & + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-2.tar.gz --directory-prefix=$dir/download ) fi -if [ ! -f $dir/download/da.16kHz.0565-1.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0611.tar.gz --directory-prefix=$dir/download ) & +if [ ! -f $dir/download/da.16kHz.0611.tar.gz ]; then + ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0611.tar.gz --directory-prefix=$dir/download ) fi wait @@ -51,8 +41,8 @@ echo "Corpus files downloaded." if [ ! -d $dir/download/0611 ]; then echo "Unpacking files." - tar -xzf $dir/download/da.16kHz.0565-1.tar.gz -C $dir/download/0565-1 & - tar -xzf $dir/download/da.16kHz.0565-2.tar.gz -C $dir/download/0565-2 & + tar -xzf $dir/download/da.16kHz.0565-1.tar.gz -C $dir/download/0565-1 + tar -xzf $dir/download/da.16kHz.0565-2.tar.gz -C $dir/download/0565-2 tar -xzf $dir/download/da.16kHz.0611.tar.gz -C $dir/download # Note: rename "da 0611 test" to "da_0611_test" for this to work @@ -62,7 +52,7 @@ if [ ! -d $dir/download/0611 ]; then fi -sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +sph2pipe=$(which sph2pipe) || sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe if [ ! -x $sph2pipe ]; then echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; exit 1; @@ -78,27 +68,25 @@ mkdir -p $dir/corpus_processed/training/0565-1 $dir/corpus_processed/training/05 # Create parallel file lists and text files, but keep sound files in the same location to save disk space # Writes the lists to data/local/data (~ 310h) echo "Creating parallel data for training data." -python3 $local/sprak2kaldi.py $dir/download/0565-1 $dir/corpus_processed/training/0565-1 & # ~130h -python3 $local/sprak2kaldi.py $dir/download/0565-2 $dir/corpus_processed/training/0565-2 & # ~115h -python3 $local/sprak2kaldi.py $dir/download/0611/Stasjon05 $dir/corpus_processed/training/0611_Stasjon05 & # ~51h +python $local/sprak2kaldi.py $dir/download/0565-1 $dir/corpus_processed/training/0565-1 # ~130h +python $local/sprak2kaldi.py $dir/download/0565-2 $dir/corpus_processed/training/0565-2 # ~115h +python $local/sprak2kaldi.py $dir/download/0611/Stasjon05 $dir/corpus_processed/training/0611_Stasjon05 # ~51h ( # Ditto dev set (~ 16h) echo "Creating parallel data for test data." rm -rf $dir/corpus_processed/dev03 mkdir -p $dir/corpus_processed/dev03 - python3 $local/sprak2kaldi.py $dir/download/0611/Stasjon03 $dir/corpus_processed/dev03 & -) & + python $local/sprak2kaldi.py $dir/download/0611/Stasjon03 $dir/corpus_processed/dev03 || exit 1; +) ( # Ditto test set (about 9 hours) echo "Creating parallel data for development data." rm -rf $dir/corpus_processed/test06 mkdir -p $dir/corpus_processed/test06 - python3 $local/sprak2kaldi.py $dir/download/0611/Stasjon06 $dir/corpus_processed/test06 || exit 1; -) & - -wait + python $local/sprak2kaldi.py $dir/download/0611/Stasjon06 $dir/corpus_processed/test06 || exit 1; +) # Create the LM training data # Test and dev data is disjoint from training data, so we use those transcripts) @@ -110,10 +98,10 @@ wait ( echo "Writing the LM text to file and normalising." cat $dir/corpus_processed/training/0565-1/txtlist $dir/corpus_processed/training/0565-2/txtlist | while read l; do cat $l; done > $lmdir/lmsents - python3 local/normalize_transcript.py local/norm_dk/numbersUp.tbl $lmdir/lmsents $lmdir/lmsents.norm + python local/normalize_transcript.py local/norm_dk/numbersLow.tbl $lmdir/lmsents $lmdir/lmsents.norm local/norm_dk/format_text.sh lm $lmdir/lmsents.norm > $lmdir/transcripts.txt sort -u $lmdir/transcripts.txt > $lmdir/transcripts.uniq -) & +) # Combine training file lists echo "Combine file lists." @@ -131,18 +119,15 @@ cp $dir/corpus_processed/test06/sndlist $dir/testsndfiles # Write wav.scp, utt2spk and text.unnormalised for train, test and dev sets with # Use sph2pipe because the wav files are actually sph files echo "Creating wav.scp, utt2spk and text.unnormalised for train, test and dev" -python3 $local/data_prep.py $dir/traintxtfiles $traindir $dir/trainsndfiles $sph2pipe & -python3 $local/data_prep.py $dir/testtxtfiles $testdir $dir/testsndfiles $sph2pipe & -python3 $local/data_prep.py $dir/devtxtfiles $devdir $dir/devsndfiles $sph2pipe & +python $local/data_prep.py $dir/traintxtfiles $traindir $dir/trainsndfiles $sph2pipe +python $local/data_prep.py $dir/testtxtfiles $testdir $dir/testsndfiles $sph2pipe +python $local/data_prep.py $dir/devtxtfiles $devdir $dir/devsndfiles $sph2pipe -wait # Create the main data sets -local/create_datasets.sh $testdir data/test & -local/create_datasets.sh $devdir data/dev & -local/create_datasets.sh $traindir data/train & - -wait +local/create_datasets.sh $testdir data/test +local/create_datasets.sh $devdir data/dev +local/create_datasets.sh $traindir data/train ## TODO diff --git a/egs/sprakbanken/s5/local/sprak_run_mmi_tri4b.sh b/egs/sprakbanken/s5/local/sprak_run_mmi_tri4b.sh deleted file mode 100755 index 83999bada53..00000000000 --- a/egs/sprakbanken/s5/local/sprak_run_mmi_tri4b.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -. ./cmd.sh - -# LM suffix -uid=$1 - -# Test set id -test=$2 - -steps/make_denlats.sh --nj 30 --sub-split 24 --cmd "$train_cmd" \ - --transform-dir exp/tri4b_ali \ - data/train data/lang exp/tri4b exp/tri4b_denlats || exit 1; - -steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \ - data/train data/lang exp/tri4b_ali exp/tri4b_denlats \ - exp/tri4b_mmi_b0.1 || exit 1; - -steps/decode.sh --nj 7 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode_${uid}_$test \ - exp/tri4b_/graph_$uid data/$test exp/tri4b_mmi_b0.1/decode_${uid}_$test - -#first, train UBM for fMMI experiments. -steps/train_diag_ubm.sh --silence-weight 0.5 --nj 50 --cmd "$train_cmd" \ - 600 data/train data/lang exp/tri4b_ali exp/dubm4b - -# Next, fMMI+MMI. -steps/train_mmi_fmmi.sh \ - --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri4b_ali exp/dubm4b exp/tri4b_denlats \ - exp/tri4b_fmmi_a || exit 1; - -for iter in 3 4 5 6 7 8; do - steps/decode_fmmi.sh --nj 5 --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri3b/decode_${uid}_$test exp/tri4b/graph_$uid data/$test \ - exp/tri4b_fmmi_a/decode_${uid}_${test}_it$iter & -done -# decode the last iter with the bd model. -#for iter in 8; do -# steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \ -# --transform-dir exp/tri3b/decode_bd_tgpr_dev93 exp/tri4b/graph_bd_tgpr data/test_dev93 \ -# exp/tri4b_fmmi_a/decode_bd_tgpr_dev93_it$iter & -# steps/decode_fmmi.sh --nj 8 --cmd "$decode_cmd" --iter $iter \ -# --transform-dir exp/tri3b/decode_bd_tgpr_eval92 exp/tri4b/graph_bd_tgpr data/test_eval92 \ -# exp/tri4b_fmmi_a/decode_tgpr_eval92_it$iter & -#done - - -# fMMI + mmi with indirect differential. -steps/train_mmi_fmmi_indirect.sh \ - --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri4b_ali exp/dubm4b exp/tri4b_denlats \ - exp/tri4b_fmmi_indirect || exit 1; - -for iter in 3 4 5 6 7 8; do - steps/decode_fmmi.sh --nj 7 --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri3b/decode_${uid}_$test exp/tri4b/graph_$uid data/$test \ - exp/tri4b_fmmi_indirect/decode_${uid}_${test}_it$iter & -done - diff --git a/egs/sprakbanken/s5/local/sprak_train_cmulm.sh b/egs/sprakbanken/s5/local/sprak_train_cmulm.sh deleted file mode 100755 index 55d6d60bf9d..00000000000 --- a/egs/sprakbanken/s5/local/sprak_train_cmulm.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) -# Apache 2.0 - -# This script takes data prepared in a corpus-dependent way -# in data/local/, and converts it into the "canonical" form, -# in various subdirectories of data/, e.g. data/lang, data/lang_test_ug, -# data/train_si284, data/train_si84, etc. - -# Don't bother doing train_si84 separately (although we have the file lists -# in data/local/) because it's just the first 7138 utterances in train_si284. -# We'll create train_si84 after doing the feature extraction. - -. ./path.sh || exit 1; - -echo "Preparing train and test data" -srcdir=data/local/data -lmdir=data/local/arpa_lm -tmpdir=data/local/lm_tmp -lang_tmp=data/local/lang_tmp -lexicon=data/local/dict/transcripts -ccs=data/local/lang_tmp/cmuclmtk.ccs -lm_suffix=arpa -mkdir -p $lmdir -mkdir -p $tmpdir - -# Create context cue symbol file for cmuclmtk -echo -e '' > $ccs -echo -e '' >> $ccs - - -# Envelop LM training data in context cues -python3 local/sprak_prep_lm.py $lexicon $lmdir/lm_input - - -# Next, for each type of language model, create the corresponding FST -# and the corresponding lang_test_* directory. - -echo Preparing language models for test - -text2wfreq < $lmdir/lm_input | wfreq2vocab -top 40000 > $lmdir/sprak.vocab - -text2idngram -vocab $lmdir/sprak.vocab -idngram $lmdir/sprak.idngram < $lmdir/lm_input - -idngram2lm -linear -idngram $lmdir/sprak.idngram -vocab \ - $lmdir/sprak.vocab -arpa $lmdir/sprak.arpa -context $ccs - - -test=data/lang_test_${lm_suffix} -mkdir -p $test -cp -r data/lang/* $test - -cat $lmdir/sprak.arpa | \ - arpa2fst --disambig-symbol=#0 \ - --read-symbol-table=$test/words.txt - $test/G.fst - - -utils/validate_lang.pl $test || exit 1; - -exit 0; diff --git a/egs/sprakbanken/s5/local/wer_hyp_filter b/egs/sprakbanken/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..8ecbdd3ec04 --- /dev/null +++ b/egs/sprakbanken/s5/local/wer_hyp_filter @@ -0,0 +1,5 @@ +#!/bin/bash + +perl -C -pe 's:::g; s:::g; s:::g' | \ +perl -pe 's/é|è|ë/e/g; s/á|à|ä/a/g; s/ó|ò|ö/o/g; s/ú|ù|ü/u/g; s/É|È|Ë/E/g; s/Ó|Ò|Ö/O/g;' | \ +PERLIO=:utf8 perl -pe '$_=lc' diff --git a/egs/sprakbanken/s5/local/wer_output_filter b/egs/sprakbanken/s5/local/wer_output_filter new file mode 100755 index 00000000000..8ecbdd3ec04 --- /dev/null +++ b/egs/sprakbanken/s5/local/wer_output_filter @@ -0,0 +1,5 @@ +#!/bin/bash + +perl -C -pe 's:::g; s:::g; s:::g' | \ +perl -pe 's/é|è|ë/e/g; s/á|à|ä/a/g; s/ó|ò|ö/o/g; s/ú|ù|ü/u/g; s/É|È|Ë/E/g; s/Ó|Ò|Ö/O/g;' | \ +PERLIO=:utf8 perl -pe '$_=lc' diff --git a/egs/sprakbanken/s5/local/wer_ref_filter b/egs/sprakbanken/s5/local/wer_ref_filter new file mode 100755 index 00000000000..8ecbdd3ec04 --- /dev/null +++ b/egs/sprakbanken/s5/local/wer_ref_filter @@ -0,0 +1,5 @@ +#!/bin/bash + +perl -C -pe 's:::g; s:::g; s:::g' | \ +perl -pe 's/é|è|ë/e/g; s/á|à|ä/a/g; s/ó|ò|ö/o/g; s/ú|ù|ü/u/g; s/É|È|Ë/E/g; s/Ó|Ò|Ö/O/g;' | \ +PERLIO=:utf8 perl -pe '$_=lc' diff --git a/egs/sprakbanken/s5/local/writenumbers.py b/egs/sprakbanken/s5/local/writenumbers.py index 452cd3e7e9c..df3235243d4 100755 --- a/egs/sprakbanken/s5/local/writenumbers.py +++ b/egs/sprakbanken/s5/local/writenumbers.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- ''' # Copyright 2014 Author: Andreas Kirkedal diff --git a/egs/sprakbanken/s5/run.sh b/egs/sprakbanken/s5/run.sh index 34c1f18d964..64a24deeabf 100755 --- a/egs/sprakbanken/s5/run.sh +++ b/egs/sprakbanken/s5/run.sh @@ -4,199 +4,163 @@ ## This relates to the queue. . ./path.sh # so python3 is on the path if not on the system (we made a link to utils/).a -# This is a shell script, but it's recommended that you run the commands one by -# one by copying and pasting into the shell. +nj=12 +stage=0 +. utils/parse_options.sh + +if [ $stage -le 0 ]; then + # Download the corpus and prepare parallel lists of sound files and text files + # Divide the corpus into train, dev and test sets + local/sprak_data_prep.sh || exit 1; +fi + +if [ $stage -le 1 ]; then + # Perform text normalisation, prepare dict folder and LM data transcriptions + # This setup uses previsously prepared data. eSpeak must be installed and in PATH to use dict_prep.sh + # local/dict_prep.sh || exit 1; + local/copy_dict.sh || exit 1; +fi + +if [ $stage -le 2 ]; then + utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang || exit 1; +fi + +if [ $stage -le 3 ]; then + # Extract mfccs + # p was added to the rspecifier (scp,p:$logdir/wav.JOB.scp) in make_mfcc.sh because some + # wave files are corrupt + # Will return a warning message because of the corrupt audio files, but compute them anyway + # If this step fails and prints a partial diff, rerun from sprak_data_prep.sh + for dataset in train test dev; do + steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" data/$dataset || exit 1; + + # Compute cepstral mean and variance normalisation + steps/compute_cmvn_stats.sh data/$dataset || exit 1; + + # Repair data set (remove corrupt data points with corrupt audio) + utils/fix_data_dir.sh data/$dataset || exit 1; + + done + # Make a subset of the training data with the shortest 120k utterances. + utils/subset_data_dir.sh --shortest data/train 120000 data/train_120kshort || exit 1; +fi + +if [ $stage -le 4 ]; then + # Train LM with irstlm + local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 3 "tg" data/lang data/local/train3_lm &> data/local/tg.log || exit 1; + local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 4 "fg" data/lang data/local/train4_lm &> data/local/fg.log || exit 1; +fi + +if [ $stage -le 5 ]; then + # Train monophone model on short utterances + steps/train_mono.sh --nj $nj --cmd "$train_cmd" \ + data/train_120kshort data/lang exp/mono0a || exit 1; + utils/mkgraph.sh --mono data/lang_test_tg exp/mono0a exp/mono0a/graph_tg || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/mono0a/graph_tg data/dev exp/mono0a/decode_tg_dev || exit 1; +fi + +if [ $stage -le 6 ]; then + # Train tri1 (delta+delta-delta) + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + data/train data/lang exp/mono0a exp/mono0a_ali || exit 1; + steps/train_deltas.sh --cmd "$train_cmd" \ + 3000 40000 data/train data/lang exp/mono0a_ali exp/tri1 || exit 1; + + # Decode dev set with both LMs + utils/mkgraph.sh data/lang_test_tg exp/tri1 exp/tri1/graph_tg || exit 1; + utils/mkgraph.sh data/lang_test_fg exp/tri1 exp/tri1/graph_fg || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/tri1/graph_fg data/dev exp/tri1/decode_fg_dev || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/tri1/graph_tg data/dev exp/tri1/decode_tg_dev || exit 1; +fi + +if [ $stage -le 7 ]; then + # Train tri2a (delta + delta-delta) + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + steps/train_deltas.sh --cmd "$train_cmd" \ + 5000 60000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; + utils/mkgraph.sh data/lang_test_tg exp/tri2a exp/tri2a/graph_tg || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/tri2a/graph_tg data/dev exp/tri2a/decode_tg_dev || exit 1; +fi + +if [ $stage -le 8 ]; then + # Train tri2b (LDA+MLLT) + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + data/train data/lang exp/tri2a exp/tri2a_ali || exit 1; + steps/train_lda_mllt.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=5 --right-context=5" \ + 6500 75000 data/train data/lang exp/tri2a_ali exp/tri2b || exit 1; + utils/mkgraph.sh data/lang_test_tg exp/tri2b exp/tri2b/graph_tg || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/tri2b/graph_tg data/dev exp/tri2b/decode_tg_dev || exit 1; +fi + +if [ $stage -le 9 ]; then + # From 2b system, train 3b which is LDA + MLLT + SAT. + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; + steps/train_sat.sh --cmd "$train_cmd" \ + 7500 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; + + # Decode dev with 4gram and 3gram LMs + utils/mkgraph.sh data/lang_test_tg exp/tri3b exp/tri3b/graph_tg || exit 1; + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 12 \ + exp/tri3b/graph_tg data/dev exp/tri3b/decode_tg_dev || exit 1; + utils/mkgraph.sh data/lang_test_fg exp/tri3b exp/tri3b/graph_fg || exit 1; + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 12 \ + exp/tri3b/graph_fg data/dev exp/tri3b/decode_fg_dev || exit 1; + + # Decode test with 4gram and 3gram LMs + # there are fewer speaker (n=7) and decoding usually ends up waiting + # for a single job so we use --num-threads 2 to speed up + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 7 --num-threads 2 \ + exp/tri3b/graph_tg data/test exp/tri3b/decode_tg_test || exit 1; + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 7 --num-threads 2 \ + exp/tri3b/graph_fg data/test exp/tri3b/decode_fg_test || exit 1; +fi + +if [ $stage -le 10 ]; then + # Alignment used to train nnets and sgmms + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; +fi -# Download the corpus and prepare parallel lists of sound files and text files -# Divide the corpus into train, dev and test sets -local/sprak_data_prep.sh || exit 1; - -# Perform text normalisation, prepare dict folder and LM data transcriptions -# This setup uses previsously prepared data. eSpeak must be installed and in PATH to use dict_prep.sh -#local/dict_prep.sh || exit 1; -local/copy_dict.sh || exit 1; - - -utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang || exit 1; - -# Now make MFCC features. -# mfccdir should be some place with a largish disk where you -# want to store MFCC features. -mfccdir=mfcc - - -# Extract mfccs -# p was added to the rspecifier (scp,p:$logdir/wav.JOB.scp) in make_mfcc.sh because some -# wave files are corrupt -# Will return a warning message because of the corrupt audio files, but compute them anyway -# If this step fails and prints a partial diff, rerun from sprak_data_prep.sh - -steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/test exp/make_mfcc/test mfcc & -steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/dev exp/make_mfcc/dev mfcc & -steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/train exp/make_mfcc/train mfcc || exit 1; -wait - -# Compute cepstral mean and variance normalisation -steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test mfcc & -steps/compute_cmvn_stats.sh data/dev exp/make_mfcc/dev mfcc & -steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train mfcc - -wait - -# Repair data set (remove corrupt data points with corrupt audio) - -utils/fix_data_dir.sh data/test & -utils/fix_data_dir.sh data/dev & -utils/fix_data_dir.sh data/train -wait - -# Train LM with CMUCLMTK -# This setup uses IRSTLM -#local/sprak_train_lm.sh &> data/local/cmuclmtk/lm.log - -# Train LM with irstlm -local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 3 "3g" data/lang data/local/train3_lm &> data/local/3g.log & -local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 4 "4g" data/lang data/local/train4_lm &> data/local/4g.log - -# Make subset with 1k utterances for rapid testing -# Randomly selects 980 utterances from 7 speakers -utils/subset_data_dir.sh --per-spk data/test 140 data/test1k & - -# Now make subset of the training data with the shortest 120k utterances. -utils/subset_data_dir.sh --shortest data/train 120000 data/train_120kshort || exit 1; - -# Train monophone model on short utterances -steps/train_mono.sh --nj 30 --cmd "$train_cmd" \ - data/train_120kshort data/lang exp/mono0a || exit 1; - -# Ensure that LMs are created -wait - -utils/mkgraph.sh data/lang_test_3g exp/mono0a exp/mono0a/graph_3g & -utils/mkgraph.sh data/lang_test_4g exp/mono0a exp/mono0a/graph_4g & - -# Ensure that all graphs are constructed -wait - -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/mono0a/graph_3g data/test1k exp/mono0a/decode_3g_test1k - -# steps/align_si.sh --boost-silence 1.25 --nj 42 --cmd "$train_cmd" \ -steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train data/lang exp/mono0a exp/mono0a_ali || exit 1; - -# steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ -steps/train_deltas.sh --cmd "$train_cmd" \ - 2000 10000 data/train data/lang exp/mono0a_ali exp/tri1 || exit 1; - -wait - - -utils/mkgraph.sh data/lang_test_3g exp/tri1 exp/tri1/graph_3g & -utils/mkgraph.sh data/lang_test_4g exp/tri1 exp/tri1/graph_4g || exit 1; - -( -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri1/graph_4g data/test1k exp/tri1/decode_4g_test1k || exit 1; -) & - -( -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri1/graph_3g data/test1k exp/tri1/decode_3g_test1k || exit 1; -) & - -wait - -steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train data/lang exp/tri1 exp/tri1_ali || exit 1; - - -# Train tri2a, which is deltas + delta-deltas. -steps/train_deltas.sh --cmd "$train_cmd" \ - 2500 15000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; - -utils/mkgraph.sh data/lang_test_3g exp/tri2a exp/tri2a/graph_3g || exit 1; - -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri2a/graph_3g data/test1k exp/tri2a/decode_3g_test1k || exit 1; - - -steps/train_lda_mllt.sh --cmd "$train_cmd" \ - --splice-opts "--left-context=5 --right-context=5" \ - 2500 15000 data/train data/lang exp/tri1_ali exp/tri2b || exit 1; - -utils/mkgraph.sh data/lang_test_3g exp/tri2b exp/tri2b/graph_3g || exit 1; -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri2b/graph_3g data/test1k exp/tri2b/decode_3g_test1k || exit 1; - - -steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; - -wait - - -# From 2b system, train 3b which is LDA + MLLT + SAT. -steps/train_sat.sh --cmd "$train_cmd" \ - 2500 15000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; -utils/mkgraph.sh data/lang_test_3g exp/tri3b exp/tri3b/graph_3g || exit 1; -steps/decode_fmllr.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri3b/graph_3g data/test1k exp/tri3b/decode_3g_test1k || exit 1; - - -# Trying 4-gram language model -utils/mkgraph.sh data/lang_test_4g exp/tri3b exp/tri3b/graph_4g || exit 1; - -steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 7 \ - exp/tri3b/graph_4g data/test1k exp/tri3b/decode_4g_test1k || exit 1; - -# This is commented out for now as it's not important for the main recipe. -## Train RNN for reranking -#local/sprak_train_rnnlms.sh data/local/dict data/dev/transcripts.uniq data/local/rnnlms/g_c380_d1k_h100_v130k -## Consumes a lot of memory! Do not run in parallel -#local/sprak_run_rnnlms_tri3b.sh data/lang_test_3g data/local/rnnlms/g_c380_d1k_h100_v130k data/test1k exp/tri3b/decode_3g_test1k - - -# From 3b system -steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; - -# From 3b system, train another SAT system (tri4a) with all the si284 data. +## Works +#local/sprak_run_nnet_cpu.sh tg dev -steps/train_sat.sh --cmd "$train_cmd" \ - 4200 40000 data/train data/lang exp/tri3b_ali exp/tri4a || exit 1; +## Works +#local/sprak_run_sgmm2.sh dev -utils/mkgraph.sh data/lang_test_3g exp/tri4a exp/tri4a/graph_3g || exit 1; -steps/decode_fmllr.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri4a/graph_3g data/test1k exp/tri4a/decode_3g_test1k || exit 1; +# Run neural network setups based in the TEDLIUM recipe -steps/train_quick.sh --cmd "$train_cmd" \ - 4200 40000 data/train data/lang exp/tri3b_ali exp/tri4b || exit 1; +# Running the nnet3-tdnn setup will train an ivector extractor that +# is used by the subsequent nnet3 and chain systems (why --stage is +# specified) +#local/nnet3/run_tdnn.sh --tdnn-affix "0" --nnet3-affix "" -( - utils/mkgraph.sh data/lang_test_3g exp/tri4b exp/tri4b/graph_3g || exit 1; - steps/decode_fmllr.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri4b/graph_3g data/test1k exp/tri4b/decode_3g_test1k || exit 1; -) & +# nnet3 LSTM +#local/nnet3/run_lstm.sh --stage 13 --affix "0" - utils/mkgraph.sh data/lang_test_4g exp/tri4b exp/tri4b/graph_4g || exit 1; - steps/decode_fmllr.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri4b/graph_4g data/test1k exp/tri4b/decode_4g_test1k || exit 1; +# nnet3 bLSTM +#local/nnet3/run_blstm.sh --stage 12 -wait -# alignment used to train nnets and sgmms -steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train data/lang exp/tri4b exp/tri4b_ali || exit 1; -## Works -local/sprak_run_nnet_cpu.sh 3g test1k +# chain TDNN +# This setup creates a new lang directory that is also used by the +# TDNN-LSTM system +#local/chain/run_tdnn.sh --stage 14 -## Works -local/sprak_run_sgmm2.sh test1k +# chain TDNN-LSTM +local/chain/run_tdnn_lstm.sh --stage 17 # Getting results [see RESULTS file] -for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done +local/generate_results_file.sh 2> /dev/null > RESULTS + diff --git a/egs/sprakbanken_swe/s5/local/data_prep.py b/egs/sprakbanken_swe/s5/local/data_prep.py index f3b644a26b6..58a0898dc26 100755 --- a/egs/sprakbanken_swe/s5/local/data_prep.py +++ b/egs/sprakbanken_swe/s5/local/data_prep.py @@ -123,7 +123,7 @@ def create_parallel_kaldi(filelist, sphpipe, snd=False): if __name__ == '__main__': - flist = codecs.open(sys.argv[1], "r", "utf8").readlines() + flist = codecs.open(sys.argv[1], "r").readlines() outpath = sys.argv[2] if len(sys.argv) == 5: sndlist = codecs.open(sys.argv[3], "r").readlines() @@ -133,8 +133,8 @@ def create_parallel_kaldi(filelist, sphpipe, snd=False): traindata = create_parallel_kaldi(flist, "") textout = codecs.open(os.path.join(outpath, "text.unnormalised"), "w", "utf8") - wavout = codecs.open(os.path.join(outpath, "wav.scp"), "w","utf8") - utt2spkout = codecs.open(os.path.join(outpath, "utt2spk"), "w","utf8") + wavout = codecs.open(os.path.join(outpath, "wav.scp"), "w") + utt2spkout = codecs.open(os.path.join(outpath, "utt2spk"), "w") textout.writelines(traindata[0]) wavout.writelines(traindata[1]) utt2spkout.writelines(traindata[2]) diff --git a/egs/sprakbanken_swe/s5/local/normalize_transcript.py b/egs/sprakbanken_swe/s5/local/normalize_transcript.py index 68e534df40c..90e45744e2a 100755 --- a/egs/sprakbanken_swe/s5/local/normalize_transcript.py +++ b/egs/sprakbanken_swe/s5/local/normalize_transcript.py @@ -18,6 +18,9 @@ } #removes all the above signs +from_chars = ''.join(normdict.keys()) +to_chars = ''.join(normdict.values()) + t_table = str.maketrans(normdict) ## Main @@ -25,13 +28,15 @@ transcript = codecs.open(sys.argv[1], "r", "utf8") outtext = codecs.open(sys.argv[2], "w", "utf8") -for line in transcript: - line = line.replace(".\Punkt", ".") - line = line.replace(",\Komma", ",") - normtext1 = line.translate(t_table) - normtext2 = re.sub(r' +', ' ', normtext1.strip()) - outtext.write(normtext2.upper() + "\n") +#TODO: Add number normalisation and remove uppercasing +for line in transcript: + line = line.replace(".\Punkt", ".") + line = line.replace(",\Komma", ",") + normtext1 = re.sub(r'[\.,:;\?]', '', line) + normtext2 = re.sub(r'[\t\\]', ' ', normtext1) + normtext3 = re.sub(r' +', ' ', normtext2.strip()) + outtext.write(normtext3.upper()) transcript.close() outtext.close() diff --git a/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh b/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh index ad6c6e2472f..19751815208 100755 --- a/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh +++ b/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh @@ -22,10 +22,10 @@ utils=`pwd`/utils # This recipe currently relies on version 3 because python3 uses utf8 as internal # string representation -if ! which python3 >&/dev/null; then - echo "Python3 is not installed, to install it you should probably do:" - echo "sudo apt-get install python3" || exit 1; -fi +#if ! which python3 >&/dev/null; then +# echo "Python3 is not installed, to install it you should probably do:" +# echo "sudo apt-get install python3" || exit 1; +#fi if [ ! -d $dir/download ]; then mkdir -p $dir/download/0467-1 $dir/download/0467-2 $dir/download/0467-3 @@ -34,19 +34,19 @@ fi echo "Downloading and unpacking sprakbanken to $dir/corpus_processed. This will take a while." if [ ! -f $dir/download/sve.16khz.0467-1.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-1.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-1.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/sve.16khz.0467-2.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-2.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-2.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/sve.16khz.0467-3.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-3.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-3.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/sve.16khz.0467-1.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0468.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0468.tar.gz --directory-prefix=$dir/download ) fi echo "Corpus files downloaded." @@ -78,31 +78,31 @@ mkdir -p $dir/corpus_processed/training/0467-1 $dir/corpus_processed/training/04 # Create parallel file lists and text files, but keep sound files in the same location to save disk space # Writes the lists to data/local/data (~ 310h) echo "Creating parallel data for training data." -python3 $local/sprak2kaldi.py $dir/download/0467-1 $dir/corpus_processed/training/0467-1 # ~140h -python3 $local/sprak2kaldi.py $dir/download/0467-2 $dir/corpus_processed/training/0467-2 # ~125h -python3 $local/sprak2kaldi.py $dir/download/0467-3 $dir/corpus_processed/training/0467-3 # ~128h +python $local/sprak2kaldi.py $dir/download/0467-1 $dir/corpus_processed/training/0467-1 # ~140h +python $local/sprak2kaldi.py $dir/download/0467-2 $dir/corpus_processed/training/0467-2 # ~125h +python $local/sprak2kaldi.py $dir/download/0467-3 $dir/corpus_processed/training/0467-3 # ~128h mv $dir/corpus_processed/training/0467-1/'r4670118.791213 8232' $dir/corpus_processed/training/0467-1/'r4670118.791213_8232' -for f in $dir/corpus_processed/training/0467-1/r4670118.791213_8232/*.txt; do mv "$f" "${f// /_}"; done +for f in $dir/corpus_processed/training/0467-1/r4670118.791213_8232/*.txt; do + mv "$f" "${f// /_}"; +done ( # Ditto test set (~ 93h) echo "Creating parallel data for test data." rm -rf $dir/corpus_processed/test/0468 mkdir -p $dir/corpus_processed/test/0468 - python3 $local/sprak2kaldi.py $dir/download/0468 $dir/corpus_processed/test/0468 + python $local/sprak2kaldi.py $dir/download/0468 $dir/corpus_processed/test/0468 ) - - # Create the LM training data ( echo "Writing the LM text to file and normalising." cat $dir/corpus_processed/training/0467-1/txtlist $dir/corpus_processed/training/0467-2/txtlist $dir/corpus_processed/training/0467-3/txtlist | while read l; do cat $l; done > $lmdir/lmsents - python3 local/normalize_transcript.py $lmdir/lmsents $lmdir/lmsents.norm + python local/normalize_transcript.py $lmdir/lmsents $lmdir/lmsents.norm sort -u $lmdir/lmsents.norm > $lmdir/transcripts.uniq -) & +) # Combine training file lists echo "Combine file lists." diff --git a/egs/sre08/v1/local/run_more_data.sh b/egs/sre08/v1/local/run_more_data.sh index db7f14615a8..001e7ff4d23 100755 --- a/egs/sre08/v1/local/run_more_data.sh +++ b/egs/sre08/v1/local/run_more_data.sh @@ -49,7 +49,7 @@ sid/compute_vad_decision.sh --nj 4 --cmd "$train_cmd" data/sre08_test_short3_mal # Note: to see the proportion of voiced frames you can do, -# grep Prop exp/make_vad/vad_*.1.log +# grep Prop exp/make_vad/vad_*.1.log # Get male and female subsets of training data. grep -w m data/train/spk2gender | awk '{print $1}' > foo; @@ -78,20 +78,20 @@ sid/train_full_ubm.sh --nj 30 --remove-low-count-gaussians false --num-iters 1 - data/train_female_4k exp/full_ubm_2048 exp/full_ubm_2048_female & wait -# note, the mem_free,ram_free is counted per thread... in this setup each +# note, the --mem is counted per thread... in this setup each # job has 4 processes running each with 4 threads; each job takes about 5G # of memory so we need about 20G, plus add memory for sum-accs to make it 25G. -# but we'll submit using -pe smp 16, and this multiplies the memory requirement +# but we'll submit using --num-threads 16, and this multiplies the memory requirement # by 16, so submitting with 2G as the requirement, to make the total requirement # 32, is reasonable. # Train the iVector extractor for male speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 2G" \ --num-iters 5 exp/full_ubm_2048_male/final.ubm data/train_male \ exp/extractor_2048_male # The same for female speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 2G" \ --num-iters 5 exp/full_ubm_2048_female/final.ubm data/train_female \ exp/extractor_2048_female @@ -105,22 +105,22 @@ sid/gender_id.sh --cmd "$train_cmd" --nj 150 exp/full_ubm_2048{,_male,_female} \ # Gender-id error rate is 2.58% # Extract the iVectors for the Fisher data. -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_male data/train_male exp/ivectors_train_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_female data/train_female exp/ivectors_train_female # .. and for the SRE08 training and test data. (We focus on the main # evaluation condition, the only required one in that eval, which is # the short2-short3 eval.) -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_female data/sre08_train_short2_female exp/ivectors_sre08_train_short2_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_male data/sre08_train_short2_male exp/ivectors_sre08_train_short2_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_female data/sre08_test_short3_female exp/ivectors_sre08_test_short3_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_male data/sre08_test_short3_male exp/ivectors_sre08_test_short3_male @@ -131,7 +131,7 @@ cat $trials | awk '{print $1, $2}' | \ ivector-compute-dot-products - \ scp:exp/ivectors_sre08_train_short2_female/spk_ivector.scp \ scp:exp/ivectors_sre08_test_short3_female/spk_ivector.scp \ - foo + foo local/score_sre08.sh $trials foo diff --git a/egs/sre08/v1/run.sh b/egs/sre08/v1/run.sh index 4e31542bf4d..c4afe447e8d 100755 --- a/egs/sre08/v1/run.sh +++ b/egs/sre08/v1/run.sh @@ -110,12 +110,12 @@ sid/train_full_ubm.sh --nj 30 --remove-low-count-gaussians false \ wait # Train the iVector extractor for male speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ --num-iters 5 exp/full_ubm_2048_male/final.ubm data/train_male \ exp/extractor_2048_male # The same for female speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ --num-iters 5 exp/full_ubm_2048_female/final.ubm data/train_female \ exp/extractor_2048_female @@ -129,25 +129,25 @@ sid/gender_id.sh --cmd "$train_cmd" --nj 150 exp/full_ubm_2048{,_male,_female} \ # Gender-id error rate is 3.41% # Extract the iVectors for the training data. -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_male data/train_male exp/ivectors_train_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_female data/train_female exp/ivectors_train_female # .. and for the SRE08 training and test data. (We focus on the main # evaluation condition, the only required one in that eval, which is # the short2-short3 eval.) -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_female data/sre08_train_short2_female \ exp/ivectors_sre08_train_short2_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_male data/sre08_train_short2_male \ exp/ivectors_sre08_train_short2_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_female data/sre08_test_short3_female \ exp/ivectors_sre08_test_short3_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_male data/sre08_test_short3_male \ exp/ivectors_sre08_test_short3_male diff --git a/egs/sre08/v1/sid/extract_ivectors_dnn.sh b/egs/sre08/v1/sid/extract_ivectors_dnn.sh index 8692e6ee8a5..2687d1fc6c8 100755 --- a/egs/sre08/v1/sid/extract_ivectors_dnn.sh +++ b/egs/sre08/v1/sid/extract_ivectors_dnn.sh @@ -1,7 +1,7 @@ #!/bin/bash # Copyright 2013 Daniel Povey -# 2014-2015 David Snyder +# 2014-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. @@ -16,6 +16,9 @@ stage=0 min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out) posterior_scale=1.0 # This scale helps to control for successive features being highly # correlated. E.g. try 0.1 or 0.3. +use_gpu=true +chunk_size=256 +nnet_job_opt= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -37,6 +40,8 @@ if [ $# != 5 ]; then echo " --num-gselect # Number of Gaussians to select using" echo " # diagonal model." echo " --min-post # Pruning threshold for posteriors" + echo " --nnet-job-opt # Options for the DNN jobs which add to or" + echo " # replace those specified by --cmd" exit 1; fi @@ -46,6 +51,21 @@ data=$3 data_dnn=$4 dir=$5 +gpu_opt="" +if $use_gpu; then + nnet_job_opt="$nnet_job_opt --gpu 1" + gpu_opt="--use-gpu=yes" + if ! cuda-compiled; then + echo "$0: WARNING: you are trying to use the GPU but you have not compiled" + echo " for CUDA. If you have GPUs and have nvcc installed, go to src/" + echo " and do ./configure; make" + exit 1 + fi +else + echo "$0: without using a GPU this will be slow." + gpu_opt="--use-gpu=no" +fi + for f in $srcdir/final.ie $srcdir/final.ubm $data/feats.scp ; do [ ! -f $f ] && echo "No such file $f" && exit 1; done @@ -60,8 +80,6 @@ utils/split_data.sh $data_dnn $nj || exit 1; delta_opts=`cat $srcdir/delta_opts 2>/dev/null` -splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options - ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -69,13 +87,18 @@ nnet_feats="ark,s,cs:apply-cmvn-sliding --center=true scp:$sdata_dnn/JOB/feats.s if [ $stage -le 0 ]; then echo "$0: extracting iVectors" - $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \ - nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \ - \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \ - \| logprob-to-post --min-post=$min_post ark:- ark:- \| \ - scale-post ark:- $posterior_scale ark:- \| \ - ivector-extract --verbose=2 $srcdir/final.ie "$feats" ark,s,cs:- \ - ark,scp,t:$dir/ivector.JOB.ark,$dir/ivector.JOB.scp || exit 1; + for g in $(seq $nj); do + $cmd $nnet_job_opt $dir/log/extract_ivectors.$g.log \ + nnet-am-compute $gpu_opt --apply-log=true --chunk-size=${chunk_size} \ + $nnet "`echo $nnet_feats | sed s/JOB/$g/g`" ark:- \ + \| select-voiced-frames ark:- scp,s,cs:$sdata/$g/vad.scp ark:- \ + \| logprob-to-post --min-post=$min_post ark:- ark:- \| \ + scale-post ark:- $posterior_scale ark:- \| \ + ivector-extract --verbose=2 $srcdir/final.ie \ + "`echo $feats | sed s/JOB/$g/g`" ark,s,cs:- \ + ark,scp,t:$dir/ivector.$g.ark,$dir/ivector.$g.scp || exit 1 & + done + wait fi if [ $stage -le 1 ]; then diff --git a/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh b/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh index f6710028ae5..c6b508a7206 100755 --- a/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh +++ b/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh @@ -1,18 +1,23 @@ #!/bin/bash -# Copyright 2015 David Snyder -# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) -# 2015 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2015-2017 David Snyder +# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) +# 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # This script derives a full-covariance UBM from DNN posteriors and # speaker recognition features. # Begin configuration section. -nj=40 +nj=8 cmd="run.pl" stage=-2 delta_window=3 delta_order=2 +use_gpu=true +nnet_job_opt= +cleanup=true +chunk_size=256 +stage=0 # End configuration section. echo "$0 $@" # Print the command line for logging @@ -30,15 +35,34 @@ if [ $# != 4 ]; then echo " --nj # number of parallel training jobs" echo " --delta-window # delta window size" echo " --delta-order # delta order" - echo " # to be equal to the size of the DNN output layer." + echo " --use-gpu # Use GPU to extract DNN posteriors" + echo " --chunk-size # Number of frames processed at a time by the DNN" + echo " --nnet-job-opt # Options for the DNN jobs which add to or" + echo " # replace those specified by --cmd" exit 1; fi -data=$1 -data_dnn=$2 +data=$1 # Features for the GMM +data_dnn=$2 # Features for the DNN nnet=$3 dir=$4 +gpu_opt="" +nnet_job_opt="" +if $use_gpu; then + nnet_job_opt="$nnet_job_opt --gpu 1" + gpu_opt="--use-gpu=yes" + if ! cuda-compiled; then + echo "$0: WARNING: you are trying to use the GPU but you have not compiled" + echo " for CUDA. If you have GPUs and have nvcc installed, go to src/" + echo " and do ./configure; make" + exit 1 + fi +else + echo "$0: without using a GPU this will be slow." + gpu_opt="--use-gpu=no" +fi + for f in $data/feats.scp $data/vad.scp ${data_dnn}/feats.scp \ ${data_dnn}/vad.scp $nnet; do @@ -69,16 +93,34 @@ select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" # in the ancillary GMM. num_components=`grep -oP 'output-dim\ \K[0-9]+' <(nnet-am-info $nnet 2> /dev/null)` -$cmd JOB=1:$nj $logdir/make_stats.JOB.log \ - nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \ - \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \ - \| logprob-to-post ark:- ark:- \| \ - fgmm-global-acc-stats-post ark:- $num_components "$feats" \ - $dir/stats.JOB.acc || exit 1; +if [ $stage -le 0 ]; then + echo "$0: accumulating stats from DNN posteriors and speaker ID features" + for g in $(seq $nj); do + $cmd $nnet_job_opt $dir/log/make_stats.$g.log \ + nnet-am-compute $gpu_opt \ + --chunk-size=${chunk_size} --apply-log=true $nnet \ + "`echo $nnet_feats | sed s/JOB/$g/g`" \ + ark:- \ + \| select-voiced-frames ark:- scp,s,cs:$sdata/$g/vad.scp ark:- \ + \| logprob-to-post ark:- ark:- \| \ + fgmm-global-acc-stats-post ark:- $num_components \ + "`echo $feats | sed s/JOB/$g/g`" \ + $dir/stats.$g.acc || exit 1 & + done + wait +fi -$cmd $dir/log/init.log \ - fgmm-global-init-from-accs --verbose=2 \ - "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \ - $dir/final.ubm || exit 1; +if [ $stage -le 1 ]; then + echo "$0: initializing GMM from stats" + $cmd $dir/log/init.log \ + fgmm-global-init-from-accs --verbose=2 \ + "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \ + $dir/final.ubm || exit 1; +fi -exit 0; +if $cleanup; then + echo "$0: removing stats" + for g in $(seq $nj); do + rm $dir/stats.$g.acc || exit 1 + done +fi diff --git a/egs/sre10/v1/local/dnn/get_egs2.sh b/egs/sre08/v1/sid/nnet2/get_egs2.sh similarity index 98% rename from egs/sre10/v1/local/dnn/get_egs2.sh rename to egs/sre08/v1/sid/nnet2/get_egs2.sh index 9f1644178e2..05ea1d1a0cd 100755 --- a/egs/sre10/v1/local/dnn/get_egs2.sh +++ b/egs/sre08/v1/sid/nnet2/get_egs2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2015 David Snyder # Apache 2.0. # @@ -54,7 +54,7 @@ transform_dir= # If supplied, overrides alidir as the place to find fMLLR tr postdir= # If supplied, we will use posteriors in it as soft training targets. stage=0 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. random_copy=false online_ivector_dir= # can be used if we are including speaker information as iVectors. @@ -83,7 +83,7 @@ if [ $# != 3 ]; then echo " # very end." echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -109,7 +109,7 @@ utils/split_data.sh $data $nj mkdir -p $dir/log $dir/info cp $alidir/tree $dir -# Get list of validation utterances. +# Get list of validation utterances. awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \ > $dir/valid_uttlist || exit 1; @@ -129,7 +129,7 @@ awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlis [ -z "$transform_dir" ] && transform_dir=$alidir -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -140,7 +140,7 @@ case $feat_type in valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` # caution: the top-level nnet training script should copy these to its own dir now. cp $alidir/{splice_opts,final.mat} $dir || exit 1; @@ -280,13 +280,13 @@ if [ $stage -le 3 ]; then egs_list="$egs_list ark:$dir/egs_orig.$n.JOB.ark" done echo "$0: Generating training examples on disk" - # The examples will go round-robin to egs_list. + # The examples will go round-robin to egs_list. if [ ! -z $postdir ]; then $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ scp:$postdir/post.JOB.scp ark:- \| \ nnet-copy-egs ark:- $egs_list || exit 1; - else + else $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \ @@ -299,7 +299,7 @@ if [ $stage -le 4 ]; then # shuffle the order, writing to the egs.JOB.ark egs_list= - for n in $(seq $nj); do + for n in $(seq $nj); do egs_list="$egs_list $dir/egs_orig.JOB.$n.ark" done diff --git a/egs/sre10/v1/local/dnn/get_lda.sh b/egs/sre08/v1/sid/nnet2/get_lda.sh similarity index 99% rename from egs/sre10/v1/local/dnn/get_lda.sh rename to egs/sre08/v1/sid/nnet2/get_lda.sh index 253222ff271..89594a20f84 100755 --- a/egs/sre10/v1/local/dnn/get_lda.sh +++ b/egs/sre08/v1/sid/nnet2/get_lda.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). # 2015 David Snyder # Apache 2.0. # @@ -108,7 +108,7 @@ N=$[$num_feats/$nj] case $feat_type in raw) feats="ark,s,cs:utils/subset_scp.pl --quiet $N $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` cp $alidir/{splice_opts,final.mat} $dir || exit 1; feats="ark,s,cs:utils/subset_scp.pl --quiet $N $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" @@ -144,7 +144,7 @@ fi echo $ivector_dim >$dir/ivector_dim if [ -z "$lda_dim" ]; then - spliced_feats_one="$(echo "$spliced_feats" | sed s:JOB:1:g)" + spliced_feats_one="$(echo "$spliced_feats" | sed s:JOB:1:g)" lda_dim=$(feat-to-dim "$spliced_feats_one" -) || exit 1; fi diff --git a/egs/sre10/v1/local/dnn/train_multisplice_accel2.sh b/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh similarity index 96% rename from egs/sre10/v1/local/dnn/train_multisplice_accel2.sh rename to egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh index f5441d6e967..c56e89b5d94 100755 --- a/egs/sre10/v1/local/dnn/train_multisplice_accel2.sh +++ b/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2013 Xiaohui Zhang # 2013 Guoguo Chen # 2014 Vimal Manohar @@ -8,7 +8,7 @@ # Apache 2.0. # This is a modified version of train_multisplice_accel2.sh in -# steps/nnet2/ for speaker recognition. The main difference is +# ../../steps/nnet2/ for speaker recognition. The main difference is # that it uses different get_lda.sh and get_egs2.sh scripts. # # The original train_multisplice_accel2.sh was a modified version of @@ -25,11 +25,11 @@ num_epochs=15 # Number of epochs of training; initial_effective_lrate=0.01 final_effective_lrate=0.001 bias_stddev=0.5 -pnorm_input_dim=3000 +pnorm_input_dim=3000 pnorm_output_dim=300 minibatch_size=128 # by default use a smallish minibatch size for neural net # training; this controls instability which would otherwise - # be a problem with multi-threaded update. + # be a problem with multi-threaded update. samples_per_iter=400000 # each iteration of training, see this many samples # per job. This option is passed to get_egs.sh @@ -66,7 +66,7 @@ splice_indexes="layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-1:3" # so hidden layer indexing is different from component count -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't randprune=4.0 # speeds up LDA. alpha=4.0 # relates to preconditioning. update_period=4 # relates to online preconditioning: says how often we update the subspace. @@ -78,11 +78,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -92,7 +92,7 @@ transform_dir= # If supplied, overrides alidir feat_type= # Can be used to force "raw" features. align_cmd= # The cmd that is passed to steps/nnet2/align.sh align_use_gpu= # Passed to use_gpu in steps/nnet2/align.sh [yes/no] -realign_times= # List of times on which we realign. Each time is +realign_times= # List of times on which we realign. Each time is # floating point number strictly between 0 and 1, which # will be multiplied by the num-iters to get an iteration # number. @@ -127,10 +127,10 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." - echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... note, you might have to reduce --mem" + echo " # versus your defaults, because it gets multiplied by the --num-threads argument." + echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" @@ -148,7 +148,7 @@ if [ $# != 4 ]; then echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -201,7 +201,7 @@ extra_opts+=(--transform-dir $transform_dir) if [ $stage -le -4 ]; then echo "$0: calling get_lda.sh" - local/dnn/get_lda.sh $lda_opts "${extra_opts[@]}" --left-context $first_left_context --right-context $first_right_context --cmd "$cmd" $data $lang $alidir $dir || exit 1; + sid/nnet2/get_lda.sh $lda_opts "${extra_opts[@]}" --left-context $first_left_context --right-context $first_right_context --cmd "$cmd" $data $lang $alidir $dir || exit 1; fi # these files will have been written by get_lda.sh feat_dim=$(cat $dir/feat_dim) || exit 1; @@ -213,7 +213,7 @@ if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then extra_opts+=(--left-context $nnet_left_context ) extra_opts+=(--right-context $nnet_right_context ) echo "$0: calling get_egs2.sh" - local/dnn/get_egs2.sh $egs_opts "${extra_opts[@]}" \ + sid/nnet2/get_egs2.sh $egs_opts "${extra_opts[@]}" \ --samples-per-iter $samples_per_iter --stage $get_egs_stage \ --io-opts "$io_opts" \ --cmd "$cmd" $egs_opts \ @@ -372,7 +372,7 @@ while [ $x -lt $num_iters ]; do ilr=$initial_effective_lrate; flr=$final_effective_lrate; np=$num_archives_processed; nt=$num_archives_to_process; this_learning_rate=$(perl -e "print (($x + 1 >= $num_iters ? $flr : $ilr*exp($np*log($flr/$ilr)/$nt))*$this_num_jobs);"); - echo "On iteration $x, learning rate is $this_learning_rate." + echo "On iteration $x, learning rate is $this_learning_rate." if [ ! -z "${realign_this_iter[$x]}" ]; then prev_egs_dir=$cur_egs_dir @@ -417,7 +417,7 @@ while [ $x -lt $num_iters ]; do steps/nnet2/remove_egs.sh $prev_egs_dir fi fi - + # Set off jobs doing some diagnostics, in the background. # Use the egs dir from the previous iteration for the diagnostics $cmd $dir/log/compute_prob_valid.$x.log \ @@ -461,7 +461,7 @@ while [ $x -lt $num_iters ]; do ( # this sub-shell is so that when we "wait" below, # we only wait for the training jobs that we just spawned, # not the diagnostic jobs that we spawned above. - + # We can't easily use a single parallel SGE job to do the main training, # because the computation of which archive and which --frame option # to use for each job is a little complex, so we spawn each one separately. @@ -500,7 +500,7 @@ while [ $x -lt $num_iters ]; do n=$(perl -e '($nj,$pat)=@ARGV; $best_n=1; $best_logprob=-1.0e+10; for ($n=1;$n<=$nj;$n++) { $fn = sprintf($pat,$n); open(F, "<$fn") || die "Error opening log file $fn"; undef $logprob; while () { if (m/log-prob-per-frame=(\S+)/) { $logprob=$1; } } - close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; + close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; $best_n=$n; } } print "$best_n\n"; ' $num_jobs_nnet $dir/log/train.$x.%d.log) || exit 1; [ -z "$n" ] && echo "Error getting best model" && exit 1; cp $dir/$[$x+1].$n.mdl $dir/$[$x+1].mdl || exit 1; @@ -537,7 +537,7 @@ if [ $stage -le $num_iters ]; then cur_offset=0 # current offset from first_model_combine. for n in $(seq $max_models_combine); do next_offset=$[($n*$num_models_combine)/$max_models_combine] - sub_list="" + sub_list="" for o in $(seq $cur_offset $[$next_offset-1]); do iter=$[$first_model_combine+$o] mdl=$dir/$iter.mdl diff --git a/egs/sre08/v1/sid/train_diag_ubm.sh b/egs/sre08/v1/sid/train_diag_ubm.sh index 6ff1a9099d9..1e79fc10c99 100755 --- a/egs/sre08/v1/sid/train_diag_ubm.sh +++ b/egs/sre08/v1/sid/train_diag_ubm.sh @@ -60,7 +60,7 @@ if [ $# != 3 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" + echo " --parallel-opts # Option should match number of threads in" echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" @@ -85,7 +85,7 @@ for f in $data/feats.scp $data/vad.scp; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1 done -parallel_opts="-pe smp $num_threads" +parallel_opts="--num-threads $num_threads" delta_opts="--delta-window=$delta_window --delta-order=$delta_order" echo $delta_opts > $dir/delta_opts @@ -135,10 +135,11 @@ for x in `seq 0 $[$num_iters-1]`; do $cmd $dir/log/update.$x.log \ gmm-global-est $opt --min-gaussian-weight=$min_gaussian_weight $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \ $dir/$[$x+1].dubm || exit 1; - rm $dir/$x.*.acc $dir/$x.dubm + $cleanup && rm $dir/$x.*.acc $dir/$x.dubm fi done -rm $dir/gselect.*.gz +$cleanup && rm $dir/gselect.*.gz + mv $dir/$num_iters.dubm $dir/final.dubm || exit 1; exit 0; diff --git a/egs/sre08/v1/sid/train_ivector_extractor.sh b/egs/sre08/v1/sid/train_ivector_extractor.sh index 5d7eb984485..68ba0ca65fd 100755 --- a/egs/sre08/v1/sid/train_ivector_extractor.sh +++ b/egs/sre08/v1/sid/train_ivector_extractor.sh @@ -13,7 +13,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -24,8 +24,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -90,7 +90,7 @@ if [ -f $srcdir/delta_opts ]; then cp $srcdir/delta_opts $dir/ 2>/dev/null fi -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -102,7 +102,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -140,26 +140,24 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; echo "Updating model (pass $x)" nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + $cmd $parallel_opts $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done - +$cleanup && rm -f $dir/post.*.gz +rm -f $dir/final.ie ln -s $x.ie $dir/final.ie diff --git a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh index 5e5881e358b..c64b83c5a4b 100755 --- a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh +++ b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh @@ -1,23 +1,23 @@ #!/bin/bash # Copyright 2013 Daniel Povey -# 2014-2015 David Snyder +# 2014-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. # This script trains the i-vector extractor using a DNN-based UBM. It also requires # an fGMM, usually created by the script sid/init_full_gmm_from_dnn.sh. -# Note: there are 3 separate levels of parallelization: num_threads, num_processes, -# and num_jobs. This may seem a bit excessive. It has to do with minimizing -# memory usage and disk I/O, subject to various constraints. The "num_threads" +# Note: there are 3 separate levels of parallelization: num_threads, num_processes, +# and num_jobs. This may seem a bit excessive. It has to do with minimizing +# memory usage and disk I/O, subject to various constraints. The "num_threads" # is how many threads a program uses; the "num_processes" is the number of separate # processes a single job spawns, and then sums the accumulators in memory. # Our recommendation: # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -28,12 +28,12 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=5 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 -num_processes=4 # each job runs this many processes, each with --num-threads threads +num_processes=2 # each job runs this many processes, each with --num-threads threads cmd="run.pl" stage=-4 num_gselect=20 # Gaussian-selection using diagonal model: number of Gaussians to select @@ -46,6 +46,9 @@ cleanup=true posterior_scale=1.0 # This scale helps to control for successve features being highly # correlated. E.g. try 0.1 or 0.3 sum_accs_opt= +use_gpu=true +chunk_size=256 +nnet_job_opt= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -71,6 +74,9 @@ if [ $# != 5 ]; then echo " # diagonal model." echo " --sum-accs-opt # Option e.g. '-l hostname=a15' to localize" echo " # sum-accs process to nfs server." + echo " --nnet-job-opt # Options for the DNN jobs which add to or" + echo " # replace those specified by --cmd" + echo " --chunk-size # Number of frames processed at a time by the DNN" exit 1; fi @@ -80,6 +86,21 @@ data=$3 data_dnn=$4 dir=$5 +gpu_opt="" +if $use_gpu; then + nnet_job_opt="$nnet_job_opt --gpu 1" + gpu_opt="--use-gpu=yes" + if ! cuda-compiled; then + echo "$0: WARNING: you are trying to use the GPU but you have not compiled" + echo " for CUDA. If you have GPUs and have nvcc installed, go to src/" + echo " and do ./configure; make" + exit 1 + fi +else + echo "$0: without using a GPU this will be slow." + gpu_opt="--use-gpu=no" +fi + srcdir=$(dirname $fgmm_model) for f in $fgmm_model $data/feats.scp ; do @@ -100,9 +121,7 @@ if [ -f $srcdir/delta_opts ]; then cp $srcdir/delta_opts $dir/ 2>/dev/null fi -splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options - -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -117,19 +136,24 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1; -fi +fi -# Do Gaussian selection and posterior extracion +# Do Gaussian selection and posterior extraction if [ $stage -le -1 ]; then echo $nj_full > $dir/num_jobs echo "$0: doing DNN posterior computation" - $cmd JOB=1:$nj_full $dir/log/post.JOB.log \ - nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \ - \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \ - \| logprob-to-post --min-post=$min_post ark,s,cs:- ark:- \| \ - scale-post ark:- $posterior_scale "ark:|gzip -c >$dir/post.JOB.gz" || exit 1; - + for g in $(seq $nj_full); do + $cmd $nnet_job_opt $dir/log/post.$g.log \ + nnet-am-compute $gpu_opt \ + --chunk-size=${chunk_size} --apply-log=true $nnet \ + "`echo $nnet_feats | sed s/JOB/$g/g`" \ + ark:- \ + \| select-voiced-frames ark:- scp,s,cs:$sdata/$g/vad.scp ark:- \ + \| logprob-to-post ark:- ark:- \ + \| scale-post ark:- $posterior_scale "ark:|gzip -c >$dir/post.$g.gz" || exit 1 & + done + wait else if ! [ $nj_full -eq $(cat $dir/num_jobs) ]; then echo "Num-jobs mismatch $nj_full versus $(cat $dir/num_jobs)" @@ -156,26 +180,25 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; echo "Updating model (pass $x)" nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + $cmd $parallel_opts $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done +$cleanup && rm -f $dir/post.*.gz +rm -f $dir/final.ie ln -s $x.ie $dir/final.ie diff --git a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh index 684cc8ddfc0..97b9789af0c 100755 --- a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh +++ b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh @@ -4,56 +4,52 @@ # egs/fisher_english/s5/local/online. It has been modified # for speaker recognition. -. cmd.sh - - stage=1 train_stage=-10 use_gpu=true set -e -. cmd.sh +. ./cmd.sh . ./path.sh -. ./utils/parse_options.sh +. utils/parse_options.sh # assume use_gpu=true since it would be way too slow otherwise. if ! cuda-compiled; then - cat < local/scores_gmm_2048_dep_pooled/plda_scores +# Pool the gender dependent results. +mkdir -p exp/scores_gmm_2048_dep_pooled +cat exp/scores_gmm_2048_dep_male/plda_scores exp/scores_gmm_2048_dep_female/plda_scores \ + > exp/scores_gmm_2048_dep_pooled/plda_scores # GMM-2048 PLDA EER # ind pooled: 2.26 @@ -140,7 +141,7 @@ cat local/scores_gmm_2048_dep_male/plda_scores local/scores_gmm_2048_dep_female/ echo "GMM-$num_components EER" for x in ind dep; do for y in female male pooled; do - eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}/plda_scores) 2> /dev/null` + eer=`compute-eer <(python local/prepare_for_eer.py $trials exp/scores_gmm_${num_components}_${x}_${y}/plda_scores) 2> /dev/null` echo "${x} ${y}: $eer" done done diff --git a/egs/sre10/v2/cmd.sh b/egs/sre10/v2/cmd.sh index 5c38b3a5d77..fe4cd0bcb3f 100755 --- a/egs/sre10/v2/cmd.sh +++ b/egs/sre10/v2/cmd.sh @@ -6,10 +6,10 @@ # the number of cpus on your machine. #a) JHU cluster options -export train_cmd="queue.pl -l arch=*64*" -export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G" -#export cuda_cmd="..." -export mkgraph_cmd="queue.pl -l arch=*64* ram_free=4G,mem_free=4G" +export train_cmd="queue.pl" +export decode_cmd="queue.pl --mem 4G" +#export cuda_cmd="queue --gpu 1" +export mkgraph_cmd="queue.pl --mem 4G" #b) BUT cluster options #export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M" diff --git a/egs/sre10/v2/run.sh b/egs/sre10/v2/run.sh index 4f5ab2756bb..b6c24fc1371 100755 --- a/egs/sre10/v2/run.sh +++ b/egs/sre10/v2/run.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2015-2016 David Snyder +# Copyright 2015-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. @@ -105,62 +105,61 @@ utils/fix_data_dir.sh data/train_32k # Initialize a full GMM from the DNN posteriors and speaker recognition # features. This can be used both alone, as a UBM, or to initialize the # i-vector extractor in a DNN-based system. -sid/init_full_ubm_from_dnn.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" \ +sid/init_full_ubm_from_dnn.sh --cmd "$train_cmd --mem 15G" \ data/train_32k \ data/train_dnn_32k $nnet exp/full_ubm # Train an i-vector extractor based on just the supervised-GMM. sid/train_ivector_extractor.sh \ - --cmd "$train_cmd -l mem_free=70G,ram_free=70G" \ + --cmd "$train_cmd --mem 120G" \ --ivector-dim 600 \ --num-iters 5 exp/full_ubm/final.ubm data/train \ exp/extractor_sup_gmm # Train an i-vector extractor based on the DNN-UBM. sid/train_ivector_extractor_dnn.sh \ - --cmd "$train_cmd -l mem_free=80G,ram_free=80G" \ - --min-post 0.015 \ - --ivector-dim 600 \ - --num-iters 5 exp/full_ubm/final.ubm $nnet \ + --cmd "$train_cmd --mem 100G" --nnet-job-opt "--mem 4G" \ + --min-post 0.015 --ivector-dim 600 --num-iters 5 \ + exp/full_ubm/final.ubm $nnet \ data/train \ data/train_dnn \ exp/extractor_dnn # Extract i-vectors from the extractor with the sup-GMM UBM. sid/extract_ivectors.sh \ - --cmd "$train_cmd -l mem_free=8G,ram_free=8G" --nj 40 \ + --cmd "$train_cmd --mem 12G" --nj 40 \ exp/extractor_sup_gmm data/sre10_train \ exp/ivectors_sre10_train_sup_gmm sid/extract_ivectors.sh \ - --cmd "$train_cmd -l mem_free=8G,ram_free=8G" --nj 40 \ + --cmd "$train_cmd --mem 12G" --nj 40 \ exp/extractor_sup_gmm data/sre10_test \ exp/ivectors_sre10_test_sup_gmm sid/extract_ivectors.sh \ - --cmd "$train_cmd -l mem_free=8G,ram_free=8G" --nj 40 \ + --cmd "$train_cmd --mem 12G" --nj 40 \ exp/extractor_sup_gmm data/sre \ exp/ivectors_sre_sup_gmm # Extract i-vectors using the extractor with the DNN-UBM. sid/extract_ivectors_dnn.sh \ - --cmd "$train_cmd -l mem_free=10G,ram_free=10G" --nj 40 \ + --cmd "$train_cmd --mem 15G" --nj 10 \ exp/extractor_dnn \ $nnet \ data/sre10_test \ data/sre10_test_dnn \ exp/ivectors10_test_dnn -sid/extract_ivectors_dnn.sh - --cmd "$train_cmd -l mem_free=10G,ram_free=10G" --nj 40 \ +sid/extract_ivectors_dnn.sh \ + --cmd "$train_cmd --mem 15G" --nj 10 \ exp/extractor_dnn \ $nnet \ data/sre10_train \ data/sre10_train_dnn \ exp/ivectors10_train_dnn -sid/extract_ivectors_dnn.sh - --cmd "$train_cmd -l mem_free=10G,ram_free=10G" --nj 40 \ +sid/extract_ivectors_dnn.sh \ + --cmd "$train_cmd --mem 15G" --nj 10 \ exp/extractor_dnn \ $nnet \ data/sre \ @@ -183,87 +182,90 @@ local/scoring_common.sh data/sre data/sre10_train data/sre10_test \ # # local/cosine_scoring.sh data/sre10_train data/sre10_test \ # exp/ivectors_sre10_train exp/ivectors_sre10_test $trials \ -# local/scores_gmm_2048_ind_pooled +# exp/scores_gmm_2048_ind_pooled # local/lda_scoring.sh data/sre data/sre10_train data/sre10_test \ # exp/ivectors_sre exp/ivectors_sre10_train exp/ivectors_sre10_test \ -# $trials local/scores_gmm_2048_ind_pooled +# $trials exp/scores_gmm_2048_ind_pooled # Create a gender independent PLDA model and do scoring with the sup-GMM system. local/plda_scoring.sh data/sre data/sre10_train data/sre10_test \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm \ - exp/ivectors_sre10_test_sup_gmm $trials local/scores_sup_gmm_ind_pooled + exp/ivectors_sre10_test_sup_gmm $trials exp/scores_sup_gmm_ind_pooled local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_female \ - exp/ivectors_sre10_test_sup_gmm_female $trials_female local/scores_sup_gmm_ind_female + exp/ivectors_sre10_test_sup_gmm_female $trials_female exp/scores_sup_gmm_ind_female local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_male \ - exp/ivectors_sre10_test_sup_gmm_male $trials_male local/scores_sup_gmm_ind_male + exp/ivectors_sre10_test_sup_gmm_male $trials_male exp/scores_sup_gmm_ind_male # Create gender dependent PLDA models and do scoring with the sup-GMM system. local/plda_scoring.sh data/sre_female data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_female \ - exp/ivectors_sre10_test_sup_gmm_female $trials_female local/scores_sup_gmm_dep_female + exp/ivectors_sre10_test_sup_gmm_female $trials_female exp/scores_sup_gmm_dep_female local/plda_scoring.sh data/sre_male data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_male \ - exp/ivectors_sre10_test_sup_gmm_male $trials_male local/scores_sup_gmm_dep_male -mkdir -p local/scores_sup_gmm_dep_pooled -cat local/scores_sup_gmm_dep_male/plda_scores local/scores_sup_gmm_dep_female/plda_scores \ - > local/scores_sup_gmm_dep_pooled/plda_scores + exp/ivectors_sre10_test_sup_gmm_male $trials_male exp/scores_sup_gmm_dep_male + +# Pool the gender dependent results +mkdir -p exp/scores_sup_gmm_dep_pooled +cat exp/scores_sup_gmm_dep_male/plda_scores exp/scores_sup_gmm_dep_female/plda_scores \ + > exp/scores_sup_gmm_dep_pooled/plda_scores # Create a gender independent PLDA model and do scoring with the DNN system. local/plda_scoring.sh data/sre data/sre10_train data/sre10_test \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn \ - exp/ivectors_sre10_test_dnn $trials local/scores_dnn_ind_pooled + exp/ivectors_sre10_test_dnn $trials exp/scores_dnn_ind_pooled local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_female \ - exp/ivectors_sre10_test_dnn_female $trials_female local/scores_dnn_ind_female + exp/ivectors_sre10_test_dnn_female $trials_female exp/scores_dnn_ind_female local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_male \ - exp/ivectors_sre10_test_dnn_male $trials_male local/scores_dnn_ind_male + exp/ivectors_sre10_test_dnn_male $trials_male exp/scores_dnn_ind_male # Create gender dependent PLDA models and do scoring with the DNN system. local/plda_scoring.sh data/sre_female data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_female \ - exp/ivectors_sre10_test_dnn_female $trials_female local/scores_dnn_dep_female + exp/ivectors_sre10_test_dnn_female $trials_female exp/scores_dnn_dep_female local/plda_scoring.sh data/sre_male data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_male \ - exp/ivectors_sre10_test_dnn_male $trials_male local/scores_dnn_dep_male -mkdir -p local/scores_dnn_dep_pooled -cat local/scores_dnn_dep_male/plda_scores local/scores_dnn_dep_female/plda_scores \ - > local/scores_dnn_dep_pooled/plda_scores + exp/ivectors_sre10_test_dnn_male $trials_male exp/scores_dnn_dep_male + +mkdir -p exp/scores_dnn_dep_pooled +cat exp/scores_dnn_dep_male/plda_scores exp/scores_dnn_dep_female/plda_scores \ + > exp/scores_dnn_dep_pooled/plda_scores # Sup-GMM PLDA EER # ind pooled: 1.72 # ind female: 1.81 -# ind male: 1.56 -# dep female: 1.89 -# dep male: 1.39 -# dep pooled: 1.65 -echo "Sup-GMM-$num_components EER" +# ind male: 1.70 +# dep female: 2.03 +# dep male: 1.50 +# dep pooled: 1.79 +echo "Sup-GMM EER" for x in ind dep; do for y in female male pooled; do - eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_sup_gmm_${x}_${y}/plda_scores) 2> /dev/null` + eer=`compute-eer <(python local/prepare_for_eer.py $trials exp/scores_sup_gmm_${x}_${y}/plda_scores) 2> /dev/null` echo "${x} ${y}: $eer" done done -# DNN PLDA EER -# ind pooled: 1.05 -# ind female: 1.33 -# ind male: 0.75 -# dep female: 1.41 -# dep male: 0.64 -# dep pooled: 1.02 -echo "DNN-$num_components EER" +# DNN-UBM EER +# ind pooled: 1.01 +# ind female: 1.16 +# ind male: 0.78 +# dep female: 1.27 +# dep male: 0.61 +# dep pooled: 0.96 +echo "DNN-UBM EER" for x in ind dep; do for y in female male pooled; do - eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}/plda_scores) 2> /dev/null` + eer=`compute-eer <(python local/prepare_for_eer.py $trials exp/scores_dnn_${x}_${y}/plda_scores) 2> /dev/null` echo "${x} ${y}: $eer" done done # In comparison, here is the EER for an unsupervised GMM-based system -# with 5297 components (the same as the number of senones in the DNN): +# with 5297 components (about the same as the number of senones in the DNN): # GMM-5297 PLDA EER # ind pooled: 2.25 # ind female: 2.33 diff --git a/egs/swahili/s5/cmd.sh b/egs/swahili/s5/cmd.sh index ab1c23f76ef..8c9422b92bc 100755 --- a/egs/swahili/s5/cmd.sh +++ b/egs/swahili/s5/cmd.sh @@ -1,5 +1,5 @@ # JHU cluster options -export train_cmd="queue.pl -l arch=*64*" -export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G" -export cuda_cmd="..." -export mkgraph_cmd="queue.pl -l arch=*64* ram_free=4G,mem_free=4G" +export train_cmd="queue.pl" +export decode_cmd="queue.pl --mem 4G" +export cuda_cmd="queue --gpu 1" +export mkgraph_cmd="queue.pl --mem 4G" diff --git a/egs/swbd/README.txt b/egs/swbd/README.txt index fc61a4c3060..1da570274e4 100644 --- a/egs/swbd/README.txt +++ b/egs/swbd/README.txt @@ -10,11 +10,14 @@ About the Switchboard corpus We are using the eval2000 a.k.a. hub5'00 evaluation data. The acoustics are LDC2002S09 and the text is LDC2002T43. + We are also using the RT'03 test set, available as LDC2007S10. Note: not + all parts of the recipe test with this. + About the Fisher corpus for language modeling We use Fisher English training speech transcripts for language modeling, if they are available. The catalog number for part 1 transcripts is LDC2004T19, - and LDC2005T19 for part 2. + and LDC2005T19 for part 2. Each subdirectory of this directory contains the scripts for a sequence of experiments. @@ -24,4 +27,3 @@ scripts for a sequence of experiments. s5b: This is (somewhat less) out of date, please see s5c s5c: This is the current recipe. - diff --git a/egs/swbd/s5/local/run_sgmm.sh b/egs/swbd/s5/local/run_sgmm.sh deleted file mode 100755 index da9af425fd8..00000000000 --- a/egs/swbd/s5/local/run_sgmm.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -. cmd.sh - - -# Build a SGMM system on just the 100k_nodup data, on top of LDA+MLLT+SAT. -if [ ! -f exp/ubm5a/final.ubm ]; then - steps/train_ubm.sh --cmd "$train_cmd" 700 data/train_100k_nodup data/lang \ - exp/tri4a_ali_100k_nodup exp/ubm5a || exit 1; -fi -steps/train_sgmm.sh --cmd "$train_cmd" \ - 4500 40000 data/train_100k_nodup data/lang exp/tri4a_ali_100k_nodup \ - exp/ubm5a/final.ubm exp/sgmm5a || exit 1; - -utils/mkgraph.sh data/lang_test exp/sgmm5a exp/sgmm5a/graph || exit 1; - -steps/decode_sgmm.sh --cmd "$decode_cmd" --config conf/decode.config \ - --nj 30 --transform-dir exp/tri4a/decode_eval2000 \ - exp/sgmm5a/graph data/eval2000 exp/sgmm5a/decode_eval2000 - - # Now discriminatively train the SGMM system on 100k_nodup data. -steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4a_ali_100k_nodup \ - --use-graphs true --use-gselect true data/train_100k_nodup data/lang exp/sgmm5a exp/sgmm5a_ali_100k_nodup - - # Took the beam down to 10 to get acceptable decoding speed. -steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --beam 9.0 --lattice-beam 6 --cmd "$decode_cmd" \ - --transform-dir exp/tri4a_ali_100k_nodup \ - data/train_100k_nodup data/lang exp/sgmm5a_ali_100k_nodup exp/sgmm5a_denlats_100k_nodup - -steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4a_ali_100k_nodup --boost 0.1 \ - data/train_100k_nodup data/lang exp/sgmm5a_ali_100k_nodup exp/sgmm5a_denlats_100k_nodup exp/sgmm5a_mmi_b0.1 - -for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4a/decode_eval2000 data/lang_test data/eval2000 exp/sgmm5a/decode_eval2000 \ - exp/sgmm5a_mmi_b0.1/decode_eval2000_it$iter & -done - diff --git a/egs/swbd/s5/run.sh b/egs/swbd/s5/run.sh index 7286938b290..d61b818fe1b 100755 --- a/egs/swbd/s5/run.sh +++ b/egs/swbd/s5/run.sh @@ -161,7 +161,6 @@ steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ -#local/run_sgmm.sh local/run_sgmm2.sh # Building a larger SAT system. diff --git a/egs/swbd/s5/run_edin.sh b/egs/swbd/s5/run_edin.sh index 5778d017529..8aff7e40c66 100755 --- a/egs/swbd/s5/run_edin.sh +++ b/egs/swbd/s5/run_edin.sh @@ -340,7 +340,7 @@ done # TODO(arnab): add SGMM and hybrid -# local/run_sgmm.sh +# local/run_sgmm2.sh # # Recipe with DNN system on top of fMLLR features # local/run_hybrid.sh diff --git a/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh index 940c99538cb..3aae7918964 100755 --- a/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh @@ -18,7 +18,7 @@ EOF . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ ! -f exp/nnet5a_gpu/final.mdl ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh index 50f79208897..74058d9fac4 100755 --- a/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh @@ -66,7 +66,7 @@ if [ $stage -le 2 ]; then steps/nnet2/train_block.sh --stage "$train_stage" \ --num-threads 1 --max-change 40.0 --minibatch-size 512 \ - --parallel-opts "-l gpu=1" \ + --parallel-opts "--gpu 1" \ --initial-learning-rate 0.01 --final-learning-rate 0.001 \ --num-epochs 10 --num-epochs-extra 5 \ --cmd "$decode_cmd" \ diff --git a/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh index 36f72b77083..55becfbe0fc 100755 --- a/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh @@ -20,7 +20,7 @@ EOF ( if [ ! -f exp/nnet5c_gpu/final.mdl ]; then - steps/nnet2/train_tanh.sh --cmd "$decode_cmd" --parallel-opts "-l gpu=1" --io-opts "--max-jobs-run 5" \ + steps/nnet2/train_tanh.sh --cmd "$decode_cmd" --parallel-opts "--gpu 1" --io-opts "--max-jobs-run 5" \ --num-threads 1 --minibatch-size 512 --max-change 40.0 --mix-up 20000 --samples-per-iter 300000 \ --num-epochs 10 --num-epochs-extra 3 --initial-learning-rate 0.0067 --final-learning-rate 0.00067 \ --num-jobs-nnet 10 --num-hidden-layers 5 --hidden-layer-dim 1536 data/train_nodup data/lang \ diff --git a/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh index 5364f14bcb6..e0b523910df 100755 --- a/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh @@ -18,7 +18,7 @@ EOF . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ ! -f exp/$dir/final.mdl ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh index 545c80c0e1c..77de59b90ff 100755 --- a/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh @@ -18,7 +18,7 @@ train_stage=-10 . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ ! -f exp/$dir/final.mdl ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh index 3cc315a9775..b91599a27e6 100755 --- a/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh @@ -9,7 +9,7 @@ dir=nnet5f_gpu . ./cmd.sh . ./path.sh . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ ! -f exp/$dir/final.mdl ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh b/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh index 712c8e79c5b..6327ee85224 100755 --- a/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh @@ -21,7 +21,7 @@ EOF . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. alidir=exp/nnet5a_ali_100k_nodup if [ ! -f $alidir/.done ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh b/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh index 8324051279b..0296f4cca00 100755 --- a/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh @@ -7,7 +7,7 @@ # directory name. -gpu_opts="-l gpu=1" # This is suitable for the CLSP network, +gpu_opts="--gpu 1" # This is suitable for the CLSP network, # you'll likely have to change it. we'll # use it later on, in the training (it's # not used in denlat creation) @@ -18,8 +18,8 @@ set -e # exit on error. . ./cmd.sh . ./path.sh -! cuda-compiled && cat </dev/null || true data_dirs= - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \ $x $train_data_dir exp/shift_hires/ mfcc_hires utils/fix_data_dir.sh ${train_data_dir}_fs$x @@ -110,7 +109,7 @@ if [ $frame_subsampling_factor -ne 1 ]; then awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done utils/combine_data.sh ${train_data_dir}_fs $data_dirs - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do rm -r ${train_data_dir}_fs$x done fi @@ -119,9 +118,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then affix=_fs fi - + rm ${online_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true -for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do +for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done online_ivector_dir=${online_ivector_dir}_fs @@ -140,7 +139,7 @@ fi if [ -z "$lats_dir" ]; then lats_dir=${srcdir}_denlats${affix} if [ $stage -le 2 ]; then - nj=50 + nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of # the phases of get_egs_discriminative.sh below. num_threads_denlats=6 @@ -154,16 +153,13 @@ if [ -z "$lats_dir" ]; then fi fi -model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` -model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` +model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` +model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` left_context=$[model_left_context + extra_left_context] right_context=$[model_right_context + extra_right_context] -valid_left_context=$[valid_left_context + frames_per_eg] -valid_right_context=$[valid_right_context + frames_per_eg] - -cmvn_opts=`cat $srcdir/cmvn_opts` +cmvn_opts=`cat $srcdir/cmvn_opts` if [ -z "$degs_dir" ]; then degs_dir=${srcdir}_degs${affix} @@ -176,16 +172,13 @@ if [ -z "$degs_dir" ]; then # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi - degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true" - steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ --adjust-priors false --acwt 1.0 \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ - --valid-left-context $valid_left_context --valid-right-context $valid_right_context \ - --priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \ - --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ + $frame_subsampling_opt \ + --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ $train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ; fi fi @@ -198,7 +191,7 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" --use-frame-shift false \ - --truncate-deriv-weights $truncate_deriv_weights --adjust-priors false \ + --adjust-priors false \ --modify-learning-rates false \ ${degs_dir} $dir ; fi @@ -210,7 +203,7 @@ if [ $stage -le 5 ]; then ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` iter=epoch$x.adj - + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} $context_opts \ @@ -235,4 +228,3 @@ fi exit 0; - diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh index 1718b5a4f7e..ae7c97e7d08 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh @@ -119,7 +119,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh new file mode 100755 index 00000000000..1e673f8e01a --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh @@ -0,0 +1,240 @@ +#!/bin/bash + +# 6k is same as 6j, but with the fast lstm layers + +# local/chain/compare_wer_general.sh blstm_6j_sp blstm_6k_sp +# System blstm_6j_sp blstm_6k_sp +# WER on train_dev(tg) 13.80 13.25 +# WER on train_dev(fg) 12.64 12.27 +# WER on eval2000(tg) 15.6 15.7 +# WER on eval2000(fg) 14.2 14.5 +# Final train prob -0.055 -0.052 +# Final valid prob -0.077 -0.080 +# Final train prob (xent) -0.777 -0.743 +# Final valid prob (xent) -0.9126 -0.8816 + +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/blstm_6k # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=40 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=0 + +# decode options +extra_left_context=50 +extra_right_context=50 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=blstm1-forward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm1-backward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh new file mode 100644 index 00000000000..68daf81ab01 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh @@ -0,0 +1,248 @@ +#!/bin/bash + +# 6l is same as 6k, but with the per-frame dropout +# location4 as paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# local/chain/compare_wer_general.sh blstm_6k_sp blstm_6l_sp +# attention: the blatm_6k_sp result here is far better than the updated +# result (14.5 vs 14.1), this may due to noise + +# System blstm_6k_sp blstm_6l_sp +# WER on train_dev(tg) 13.30 13.06 +# WER on train_dev(fg) 12.34 12.16 +# WER on eval2000(tg) 15.5 15.2 +# WER on eval2000(fg) 14.1 13.8 +# Final train prob -0.052 -0.065 +# Final valid prob -0.090 -0.093 +# Final train prob (xent) -0.743 -0.831 +# Final valid prob (xent) -0.9579 -0.9821 + +# exp/chain/blstm_6k_sp/: num-iters=327 nj=3..16 num-params=41.2M dim=40+100->6074 combine=-0.069->-0.069 xent:train/valid[217,326,final]=(-0.849,-0.748,-0.743/-1.04,-0.959,-0.958) logprob:train/valid[217,326,final]=(-0.065,-0.053,-0.052/-0.096,-0.090,-0.090) +# exp/chain/blstm_6l_sp/: num-iters=327 nj=3..16 num-params=41.2M dim=40+100->6074 combine=-0.084->-0.082 xent:train/valid[217,326,final]=(-1.45,-0.840,-0.831/-1.58,-0.994,-0.982) logprob:train/valid[217,326,final]=(-0.110,-0.066,-0.065/-0.132,-0.094,-0.093) +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/blstm_6l # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=40 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=0 +dropout_schedule='0,0@0.20,0.1@0.50,0' + +# decode options +extra_left_context=50 +extra_right_context=50 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20 dropout-proportion=0.0" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=blstm1-forward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm1-backward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.dropout-schedule $dropout_schedule \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh index e262430ab06..8d3fcae4297 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh @@ -120,7 +120,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh new file mode 100755 index 00000000000..b9b7152dcbe --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh @@ -0,0 +1,304 @@ +#!/bin/bash + +# Copyright 2015 Johns Hopkins University (Author: Daniel Povey). +# 2015 Vijayaditya Peddinti +# 2015 Xingyu Na +# 2015 Pegah Ghahrmani +# 2017 Google Inc. (vpeddinti@google.com) +# Apache 2.0. + + + +# run_lstm_6k.sh is like run_lstm_6j.sh but making +# various kaldi-5.1-related upgrades to the script. +# For the list of changes compare tuning/run_tdnn_lstm_1{c,d}.sh + +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/lstm_6k # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + # Note : The delay variable will be used just in the init.config. + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat delay=$label_delay + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi + +if [ $stage -le 15 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in looped decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in online decoding" + exit 1 + fi +fi + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh index 85afa7bf9ca..25c6841c0a9 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh @@ -78,14 +78,13 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.000000125 max_param_change=1 num_jobs_nnet=4 num_epochs=4 -regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 ## Decode options @@ -93,8 +92,8 @@ decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we deci if $use_gpu; then if ! cuda-compiled; then - cat </dev/null || true data_dirs= - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \ $x $train_data_dir exp/shift_hires/ mfcc_hires utils/fix_data_dir.sh ${train_data_dir}_fs$x @@ -137,7 +136,7 @@ if [ $frame_subsampling_factor -ne 1 ]; then awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done utils/combine_data.sh ${train_data_dir}_fs $data_dirs - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do rm -r ${train_data_dir}_fs$x done fi @@ -146,9 +145,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then affix=_fs fi - + rm ${online_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true -for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do +for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done online_ivector_dir=${online_ivector_dir}_fs @@ -167,7 +166,7 @@ fi if [ -z "$lats_dir" ]; then lats_dir=${srcdir}_denlats${affix} if [ $stage -le 2 ]; then - nj=50 + nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of # the phases of get_egs_discriminative.sh below. num_threads_denlats=6 @@ -181,16 +180,13 @@ if [ -z "$lats_dir" ]; then fi fi -model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` -model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` +model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` +model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` left_context=$[model_left_context + extra_left_context] right_context=$[model_right_context + extra_right_context] -valid_left_context=$[valid_left_context + frames_per_eg] -valid_right_context=$[valid_right_context + frames_per_eg] - -cmvn_opts=`cat $srcdir/cmvn_opts` +cmvn_opts=`cat $srcdir/cmvn_opts` if [ -z "$degs_dir" ]; then degs_dir=${srcdir}_degs${affix} @@ -203,16 +199,13 @@ if [ -z "$degs_dir" ]; then # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi - degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true" - steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ --adjust-priors false --acwt 1.0 \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ - --valid-left-context $valid_left_context --valid-right-context $valid_right_context \ - --priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \ - --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ + $frame_subsampling_opt \ + --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ $train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ; fi fi @@ -225,7 +218,7 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" --use-frame-shift false \ - --truncate-deriv-weights $truncate_deriv_weights --adjust-priors false \ + --adjust-priors false \ --modify-learning-rates false \ ${degs_dir} $dir ; fi @@ -237,7 +230,7 @@ if [ $stage -le 5 ]; then ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` iter=epoch$x.adj - + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ @@ -262,4 +255,3 @@ fi exit 0; - diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh index 2a0019e59d7..fa6518a9ad9 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh @@ -116,7 +116,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh index 946ae796e2f..9dfaa1d4509 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh @@ -9,7 +9,14 @@ #Final valid prob -0.110475 -0.113102 #Final train prob (xent) -1.20065 -1.2533 #Final valid prob (xent) -1.3313 -1.36743 -# + +# Online decoding +# System tdnn_7h_sp tdnn_7h_sp_online +# WER on train_dev(tg) 13.96 13.95 +# WER on train_dev(fg) 12.86 12.82 +# WER on eval2000(tg) 16.5 16.5 +# WER on eval2000(fg) 14.8 14.8 + set -e # configs for 'chain' @@ -20,6 +27,7 @@ get_egs_stage=-10 speed_perturb=true dir=exp/chain/tdnn_7h # Note: _sp will get added to this if $speed_perturb == true. decode_iter= +decode_nj=50 # training options num_epochs=4 @@ -36,6 +44,8 @@ remove_egs=false common_egs_dir= xent_regularize=0.1 +test_online_decoding=false # if true, it will run the last decoding stage. + # End configuration section. echo "$0 $@" # Print the command line for logging @@ -109,7 +119,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs @@ -193,26 +203,65 @@ if [ $stage -le 14 ]; then utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg fi -decode_suff=sw1_tg + graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi + rm $dir/.error 2>/dev/null || true for decode_set in train_dev eval2000; do ( steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj 50 --cmd "$decode_cmd" $iter_opts \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_${decode_suff} || exit 1; + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; if $has_fisher; then steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; fi - ) & + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 16 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi fi -wait; + + exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh index c19ca88a843..c5b5633d94c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh @@ -112,7 +112,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh index b3bed2f2538..793b40f7fe3 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh @@ -111,7 +111,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs @@ -126,12 +126,12 @@ if [ $stage -le 12 ]; then # the first splicing is moved before the lda layer, so no splicing here relu-renorm-layer name=tdnn1 dim=768 - tdnn-relu-renorm-layer name=tdnn2 splice-indexes=-1,0,1 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn3 splice-indexes=-1,0,1 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn4 splice-indexes=-3,0,3 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn5 splice-indexes=-3,0,3 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn6 splice-indexes=-3,0,3 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn7 splice-indexes=-3,0,3 dim=768 subset-dim=384 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=768 subset-dim=384 ## adding the layers for chain branch relu-renorm-layer name=prefinal-chain input=tdnn7 dim=768 target-rms=0.5 diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh index b346862049b..f7681a743e1 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh @@ -111,7 +111,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh new file mode 100755 index 00000000000..12b63b7e96a --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh @@ -0,0 +1,245 @@ +#!/bin/bash + +# tdnn_blstm_1a is same as blstm_6k, but with the initial tdnn layers +# blstm_6k : num-parameters: 41155430 +# tdnn_blstm_1a : num-parameters: 53688166 + +# local/chain/compare_wer_general.sh blstm_6l_sp blstm_6k_sp +# System blstm_6k_sp tdnn_blstm_1a_sp +# WER on train_dev(tg) 13.25 12.95 +# WER on train_dev(fg) 12.27 11.98 +# WER on eval2000(tg) 15.7 15.5 +# WER on eval2000(fg) 14.5 14.1 +# Final train prob -0.052 -0.041 +# Final valid prob -0.080 -0.072 +# Final train prob (xent) -0.743 -0.629 +# Final valid prob (xent) -0.8816 -0.8091 + +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_blstm_1a # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=40 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=0 + +# decode options +extra_left_context=50 +extra_right_context=50 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=blstm1-forward input=tdnn3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm1-backward input=tdnn3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh new file mode 100644 index 00000000000..3929cdc432e --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh @@ -0,0 +1,248 @@ +#!/bin/bash + +# tdnn_blstm_1b is same as tdnn_blstm_1a, but with the per-frame dropout +# added with location 4, see paper: +# http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh tdnn_blstm_1a_sp tdnn_blstm_1b_sp +# System tdnn_blstm_1a_sp tdnn_blstm_1b_sp +# WER on train_dev(tg) 12.86 12.60 +# WER on train_dev(fg) 11.86 11.80 +# WER on eval2000(tg) 15.3 14.9 +# WER on eval2000(fg) 14.0 13.5 +# Final train prob -0.042 -0.054 +# Final valid prob -0.099 -0.091 +# Final train prob (xent) -0.637 -0.719 +# Final valid prob (xent) -0.9418 -0.9190 + +# exp/chain/tdnn_blstm_1a_sp/: num-iters=327 nj=3..16 num-params=53.7M dim=40+100->6074 combine=-0.058->-0.057 xent:train/valid[217,326,final]=(-0.753,-0.631,-0.637/-0.974,-0.941,-0.942) logprob:train/valid[217,326,final]=(-0.055,-0.041,-0.042/-0.094,-0.099,-0.099) +# exp/chain/tdnn_blstm_1b_sp/: num-iters=327 nj=3..16 num-params=53.7M dim=40+100->6074 combine=-0.070->-0.068 xent:train/valid[217,326,final]=(-1.27,-0.732,-0.719/-1.42,-0.931,-0.919) logprob:train/valid[217,326,final]=(-0.094,-0.055,-0.054/-0.117,-0.091,-0.091) +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_blstm_1b # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=40 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=0 +dropout_schedule='0,0@0.20,0.1@0.50,0' +# decode options +extra_left_context=50 +extra_right_context=50 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20 dropout-proportion=0.0" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=blstm1-forward input=tdnn3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm1-backward input=tdnn3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.dropout-schedule $dropout_schedule \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh index 47d4fcdd52c..89ed8ad1d72 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -118,7 +118,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh index 07e38cb29c5..f0c88368245 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -114,7 +114,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_fastlstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh old mode 100644 new mode 100755 similarity index 91% rename from egs/swbd/s5c/local/chain/tuning/run_tdnn_fastlstm_1b.sh rename to egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh index 88a191a1348..d71301eb102 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_fastlstm_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -1,16 +1,23 @@ #!/bin/bash -# Unlike 1a this setup interleaves the TDNN and LSTM layers. - -#System tdnn_lstm_1a_ld5 tdnn_lstm_1b_ld5 tdnn_fastlstm_1b_ld5 -#WER on train_dev(tg) 13.42 13.00 12.91 -#WER on train_dev(fg) 12.42 12.03 11.98 -#WER on eval2000(tg) 15.7 15.3 15.2 -#WER on eval2000(fg) 14.2 13.9 13.8 -#Final train prob -0.0538088 -0.056294 -0.050 -#Final valid prob -0.0800484 -0.0813322 -0.092 -#Final train prob (xent) -0.7603 -0.777787 -0.756 -#Final valid prob (xent) -0.949909 -0.939146 -0.983 +# run_tdnn_lstm_1c.sh is like run_tdnn_lstm_1b.sh but using the +# new 'fast-lstm' layer. Results are slightly improved, plus +# it's faster. See PR #1243 on github, and issue #1237. +# This used to be called run_tdnn_fastlstm_1b.sh. + +## note: the last column below was this run on Feb 1 2017, in the +## shortcut branch. Results are a bit worse, but I believe this is just +## random noise or a little bit of mean-regression. + +#System tdnn_lstm_1a_ld5_sp tdnn_lstm_1b_ld5_sp tdnn_lstm_1c_ld5_sp tdnn_lstm_1c_ld5_sp +#WER on train_dev(tg) 13.42 13.00 12.91 13.17 +#WER on train_dev(fg) 12.42 12.03 11.98 12.25 +#WER on eval2000(tg) 15.7 15.3 15.2 15.4 +#WER on eval2000(fg) 14.2 13.9 13.8 14.1 +#Final train prob -0.0538088 -0.056294 -0.050 -0.046 +#Final valid prob -0.0800484 -0.0813322 -0.092 -0.073 +#Final train prob (xent) -0.7603 -0.777787 -0.756 -0.749 +#Final valid prob (xent) -0.949909 -0.939146 -0.983 -0.980 set -e @@ -19,7 +26,7 @@ stage=12 train_stage=-10 get_egs_stage=-10 speed_perturb=true -dir=exp/chain/tdnn_fastlstm_1b # Note: _sp will get added to this if $speed_perturb == true. +dir=exp/chain/tdnn_lstm_1c # Note: _sp will get added to this if $speed_perturb == true. decode_iter= decode_dir_affix= @@ -114,7 +121,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh new file mode 100755 index 00000000000..22c7d2e582d --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -0,0 +1,288 @@ +#!/bin/bash + + +# run_tdnn_lstm_1d.sh is like run_tdnn_lstm_1c.sh but making +# various kaldi-5.1-related upgrades to the script: +# change chunk-width to be variable, add extra_left_context_initial=0 +# and extra_right_context_final=0; add looped decoding. +# Also changed frames-per-iter from 1.2 million to 1.5 million... this +# might have been a mistake, trying 1 million in 1f to see if this matters. + +# The comparison below is with a version of the 1c system that was run at about +# the same time. The degradation in log-likelihood and xent prob is likely because +# now on average the chunk-size is slightly smaller than before (150 -> 136); +# possibly the change in extra-(left,right) context has a similar effect +# (or maybe it's just because the validation and train-subset examples have changed). + + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1c_ld5_sp tdnn_lstm_1d_sp +# System tdnn_lstm_1c_ld5_sp tdnn_lstm_1d_sp +# WER on train_dev(tg) 13.17 12.90 +# [looped:] 13.01 +# WER on train_dev(fg) 12.25 11.90 +# [looped:] 12.13 +# WER on eval2000(tg) 15.4 15.7 +# [looped:] 15.7 +# WER on eval2000(fg) 14.1 14.2 +# [looped:] 14.4 +# Final train prob -0.046 -0.064 +# Final valid prob -0.073 -0.088 +# Final train prob (xent) -0.749 -0.836 +# Final valid prob (xent) -0.9084 -0.9631 + +# run_tdnn_lstm_1c.sh is like run_tdnn_lstm_1b.sh but using the +# new 'fast-lstm' layer. Results are slightly improved, plus +# it's faster. See PR #1243 on github, and issue #1237. +# This used to be called run_tdnn_fastlstm_1b.sh. + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1d # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh new file mode 100755 index 00000000000..6987757757a --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -0,0 +1,327 @@ +#!/bin/bash + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + +# There seems to be no consistent difference in WER. Inconclusive. +# However I may keep 0.01 just for consistency with other setups. +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1d_sp tdnn_lstm_1e_sp +# System tdnn_lstm_1d_sp tdnn_lstm_1e_sp +# WER on train_dev(tg) 12.90 12.74 +# [looped:] 13.01 12.93 +# WER on train_dev(fg) 11.90 11.70 +# [looped:] 12.13 12.09 +# WER on eval2000(tg) 15.7 15.7 +# [looped:] 15.7 15.9 +# WER on eval2000(fg) 14.2 14.3 +# [looped:] 14.4 14.6 +# Final train prob -0.064 -0.066 +# Final valid prob -0.088 -0.087 +# Final train prob (xent) -0.836 -0.931 +# Final valid prob (xent) -0.9631 -1.0279 + +# Online decoding +# System tdnn_lstm_1e_sp_online tdnn_lstm_1e_sp +# WER on train_dev(tg) 12.93 12.74 +# WER on train_dev(fg) 12.05 11.87 +# WER on eval2000(tg) 15.5 15.4 +# WER on eval2000(fg) 14.0 13.8 + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1e # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi + +if [ $stage -le 15 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in looped decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in online decoding" + exit 1 + fi +fi + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh new file mode 100755 index 00000000000..90e179379e4 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -0,0 +1,300 @@ +#!/bin/bash + +# run_tdnn_lstm_1f.sh is like run_tdnn_lstm_1e.sh but +# reducing the frames-per-iter from 1.5 million to 1 million, +# since the time per iter was more than usual (about 5 minutes). + +# Below, the WER seems to get a little worse, although the optimization +# is improved slightly. There seems to be more train/valid difference. +# see also 1i. + +# exp/chain/tdnn_lstm_1f_sp: num-iters=392 nj=3..16 num-params=39.6M dim=40+100->6042 combine=-0.080->-0.073 xent:train/valid[260,391,final]=(-1.06,-0.903,-0.916/-1.13,-1.03,-1.04) logprob:train/valid[260,391,final]=(-0.084,-0.064,-0.065/-0.100,-0.091,-0.090) + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# WER on train_dev(tg) 12.74 13.23 +# [looped:] 12.93 13.27 +# WER on train_dev(fg) 11.70 12.17 +# [looped:] 12.09 12.42 +# WER on eval2000(tg) 15.7 16.1 +# [looped:] 15.9 16.2 +# WER on eval2000(fg) 14.3 14.6 +# [looped:] 14.6 14.7 +# Final train prob -0.066 -0.065 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.916 +# Final valid prob (xent) -1.0279 -1.0359 + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + +# WER is worse but this seems to be due to more complete optimization +# (train better, valid worse). Looks like we may be overtraining. +# +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# WER on train_dev(tg) 12.74 13.23 +# [looped:] 12.93 13.27 +# WER on train_dev(fg) 11.70 12.17 +# [looped:] 12.09 12.42 +# WER on eval2000(tg) 15.7 16.1 +# [looped:] 15.9 16.2 +# WER on eval2000(fg) 14.3 14.6 +# [looped:] 14.6 14.7 +# Final train prob -0.066 -0.065 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.916 +# Final valid prob (xent) -1.0279 -1.0359 + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1f # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b1{1,2,3,4}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1000000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh new file mode 100755 index 00000000000..cb73f020e3e --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -0,0 +1,282 @@ +#!/bin/bash + +# 1g is like 1e, but reducing decay-time from 20 to 15, to see if +# it reduces the difference between regular and looped decoding. +# +# There doesn't seem to be a very consistent difference betwen 1e and 1g. + + +# exp/chain/tdnn_lstm_1g_sp: num-iters=262 nj=3..16 num-params=39.6M dim=40+100->6042 combine=-0.083->-0.076 xent:train/valid[173,261,final]=(-1.09,-0.929,-0.938/-1.15,-1.04,-1.05) logprob:train/valid[173,261,final]=(-0.089,-0.066,-0.067/-0.102,-0.089,-0.090) + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1g_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1g_sp +# WER on train_dev(tg) 12.74 13.03 +# [looped:] 12.93 12.98 +# WER on train_dev(fg) 11.70 12.02 +# [looped:] 12.09 12.13 +# WER on eval2000(tg) 15.7 15.6 +# [looped:] 15.9 15.9 +# WER on eval2000(fg) 14.3 14.1 +# [looped:] 14.6 14.4 +# Final train prob -0.066 -0.067 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.938 +# Final valid prob (xent) -1.0279 -1.0473 + + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1g # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir=exp/chain/tdnn_lstm_1d_sp/egs + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=15" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh new file mode 100755 index 00000000000..b12be22ce3d --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -0,0 +1,279 @@ +#!/bin/bash + +# 1h is like 1e, but reducing the hidden-dims from 1024 to 880. + +# Does not seem to help; both train and valid probs get worse by about +# the same amount, and WER is overall just slightly worse. Maybe 1024 +# was approximately optimal. + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1h_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1h_sp +# WER on train_dev(tg) 12.74 13.06 +# [looped:] 12.93 13.17 +# WER on train_dev(fg) 11.70 12.13 +# [looped:] 12.09 12.27 +# WER on eval2000(tg) 15.7 15.7 +# [looped:] 15.9 15.9 +# WER on eval2000(fg) 14.3 14.4 +# [looped:] 14.6 14.5 +# Final train prob -0.066 -0.069 +# Final valid prob -0.087 -0.091 +# Final train prob (xent) -0.931 -0.967 +# Final valid prob (xent) -1.0279 -1.0631 + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1h # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir=exp/chain/tdnn_lstm_1d_sp/egs + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=880 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=880 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=880 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=880 recurrent-projection-dim=220 non-recurrent-projection-dim=220 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=880 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=880 + fast-lstmp-layer name=fastlstm2 cell-dim=880 recurrent-projection-dim=220 non-recurrent-projection-dim=220 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=880 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=880 + fast-lstmp-layer name=fastlstm3 cell-dim=880 recurrent-projection-dim=220 non-recurrent-projection-dim=220 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh new file mode 100755 index 00000000000..7e05834c1fb --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -0,0 +1,300 @@ +#!/bin/bash + +# run_tdnn_lstm_1i.sh is like run_tdnn_lstm_1{e,f}.sh but +# with a different frames-per-iter: 2 million, vs. 1.5 million +# (1e) and 1 million (1f) + +# Results are inconclusive regarding comparison with 1e: it's [0.3 worse, 0.1 +# better, 0.2 worse, same, 0.2 better, 0.2 better, 0.3 better, 0.3 better] on +# the different conditions. There is less train/valid difference and worse +# train prob [the trends of valid and train probs are consistent as we change +# the frames-per-iter]. + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1{e,f,i}_sp 2>/dev/null +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp tdnn_lstm_1i_sp +# WER on train_dev(tg) 12.74 13.23 13.08 +# [looped:] 12.93 13.27 13.00 +# WER on train_dev(fg) 11.70 12.17 11.97 +# [looped:] 12.09 12.42 12.08 +# WER on eval2000(tg) 15.7 16.1 15.5 +# [looped:] 15.9 16.2 15.7 +# WER on eval2000(fg) 14.3 14.6 14.0 +# [looped:] 14.6 14.7 14.3 +# Final train prob -0.066 -0.065 -0.069 +# Final valid prob -0.087 -0.090 -0.088 +# Final train prob (xent) -0.931 -0.916 -0.947 +# Final valid prob (xent) -1.0279 -1.0359 -1.0419 + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + +# WER is worse but this seems to be due to more complete optimization +# (train better, valid worse). Looks like we may be overtraining. +# +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# WER on train_dev(tg) 12.74 13.23 +# [looped:] 12.93 13.27 +# WER on train_dev(fg) 11.70 12.17 +# [looped:] 12.09 12.42 +# WER on eval2000(tg) 15.7 16.1 +# [looped:] 15.9 16.2 +# WER on eval2000(fg) 14.3 14.6 +# [looped:] 14.6 14.7 +# Final train prob -0.066 -0.065 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.916 +# Final valid prob (xent) -1.0279 -1.0359 + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1i # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b1{1,2,3,4}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 2000000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh new file mode 100755 index 00000000000..6a6a4ba30e1 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -0,0 +1,311 @@ +#!/bin/bash + +# same as 1e but with delay of -1 +# System tdnn_lstm_1e_sp tdnn_lstm_1j_sp +# WER on train_dev(tg) 12.74 12.95 +# WER on train_dev(fg) 11.70 12.01 +# WER on eval2000(tg) 15.7 15.3 +# WER on eval2000(fg) 14.3 13.9 +# Final train prob -0.066 -0.066 +# Final valid prob -0.087 -0.089 +# Final train prob (xent) -0.931 -0.921 +# Final valid prob (xent) -1.0279 -1.0363 +# exp/chain/tdnn_lstm_1j_sp/: num-iters=262 nj=3..16 num-params=39.6M dim=40+100->6067 combine=-0.076->-0.074 xent:train/valid[173,261,final]=(-1.08,-0.925,-0.921/-1.17,-1.04,-1.04) logprob:train/valid[173,261,final]=(-0.085,-0.067,-0.066/-0.103,-0.090,-0.089) + +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1j # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-1 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi + +if [ $stage -le 15 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in looped decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in online decoding" + exit 1 + fi +fi + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh new file mode 100644 index 00000000000..21cb4fa9373 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -0,0 +1,321 @@ +#!/bin/bash + +# run_tdnn_lstm_1k.sh is like run_tdnn_lstm_1e.sh but +# added the per-frame dropout location 4 as paper: +# http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1k_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1k_sp +# WER on train_dev(tg) 13.18 12.60 +# [looped:] 13.10 12.56 +# WER on train_dev(fg) 12.21 11.58 +# [looped:] 12.28 11.62 +# WER on eval2000(tg) 15.8 15.2 +# [looped:] 15.8 15.2 +# WER on eval2000(fg) 14.5 13.7 +# [looped:] 14.5 13.8 +# Final train prob -0.060 -0.076 +# Final valid prob -0.101 -0.106 +# Final train prob (xent) -0.868 -0.989 +# Final valid prob (xent) -1.0740 -1.1341 + +# exp/chain/tdnn_lstm_1e_sp/: num-iters=262 nj=3..16 num-params=39.6M dim=40+100->6074 combine=-0.072->-0.071 xent:train/valid[173,261,final]=(-1.01,-0.876,-0.868/-1.16,-1.08,-1.07) logprob:train/valid[173,261,final]=(-0.075,-0.061,-0.060/-0.106,-0.101,-0.101) +# exp/chain/tdnn_lstm_1k_sp/: num-iters=262 nj=3..16 num-params=39.6M dim=40+100->6074 combine=-0.093->-0.089 xent:train/valid[173,261,final]=(-2.87,-1.07,-0.989/-2.90,-1.20,-1.13) logprob:train/valid[173,261,final]=(-0.153,-0.079,-0.076/-0.179,-0.107,-0.106) + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1e # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi + +if [ $stage -le 15 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in looped decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in online decoding" + exit 1 + fi +fi + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh new file mode 100644 index 00000000000..e88e199839c --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -0,0 +1,244 @@ +#!/bin/bash + +# tdnn_lstm_1l is same as tdnn_lstm_1b, but with the per-frame dropout +# added with location 4 in LSTM layer, see paper: +# http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh tdnn_lstm_1b_ld5_sp tdnn_lstm_1l_ld5_sp +# System tdnn_lstm_1b_ld5_sp tdnn_lstm_1l_ld5_sp +# WER on train_dev(tg) 13.06 12.41 +# WER on train_dev(fg) 12.13 11.59 +# WER on eval2000(tg) 15.1 14.8 +# WER on eval2000(fg) 13.9 13.5 +# Final train prob -0.047 -0.069 +# Final valid prob -0.093 -0.095 +# Final train prob (xent) -0.735 -0.913 +# Final valid prob (xent) -1.0151 -1.0820 + +# exp/chain/tdnn_lstm_1b_ld5_sp: num-iters=327 nj=3..16 num-params=39.6M dim=40+100->6074 combine=-0.062->-0.061 xent:train/valid[217,326,final]=(-0.877,-0.741,-0.735/-1.08,-1.02,-1.02) logprob:train/valid[217,326,final]=(-0.063,-0.048,-0.047/-0.095,-0.093,-0.093) +# exp/chain/tdnn_lstm_1l_ld5_sp: num-iters=327 nj=3..16 num-params=39.6M dim=40+100->6074 combine=-0.088->-0.084 xent:train/valid[217,326,final]=(-3.32,-0.961,-0.913/-3.40,-1.13,-1.08) logprob:train/valid[217,326,final]=(-0.176,-0.072,-0.069/-0.198,-0.097,-0.095) +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1l # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=5 +dropout_schedule='0,0@0.20,0.3@0.50,0' +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.dropout-schedule $dropout_schedule \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/eval1997_data_prep.sh b/egs/swbd/s5c/local/eval1997_data_prep.sh index f49ac551192..e29da13deee 100755 --- a/egs/swbd/s5c/local/eval1997_data_prep.sh +++ b/egs/swbd/s5c/local/eval1997_data_prep.sh @@ -5,13 +5,13 @@ # To be run from one directory above this script. -# The input is a directory name containing the 1997 Hub5 english evaluation +# The input is a directory name containing the 1997 Hub5 english evaluation # test set and transcripts, which is LDC2002S10 # e.g. see # http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC2002S10 # # It is assumed that the transcripts are in a subdirectory called transcr -# However, we download the STM from NIST site: +# However, we download the STM from NIST site: # ftp://jaguar.ncsl.nist.gov/lvcsr/mar97/eval/hub5e97.english.980618.stm if [ $# -ne 1 ]; then @@ -26,7 +26,7 @@ sdir=$1 [ ! -d $sdir/transcr ] \ && echo Expecting directory $sdir/transcr to be present && exit 1; -. path.sh +. path.sh dir=data/local/eval1997 mkdir -p $dir @@ -40,7 +40,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -49,8 +49,8 @@ awk -v sph2pipe=$sph2pipe '{ # segments file format is: utt-id side-id start-time end-time, e.g.: # sw02001-A_000098-001156 sw02001-A 0.98 11.56 pem=$sdir/speech/97_hub5e.pem -[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +[ ! -f $pem ] && echo "$0: No such file $pem" && exit 1; +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 # There is one line in the 97_hub5e.pem with an extra : on the channel # sw_10022 B: unknown_speaker 281.21 284.37 -- the : is removed @@ -64,7 +64,7 @@ grep -v ';;' $pem | sed -e 's?:??g' \ printf "%s %s %.2f %.2f\n", utt, spk, start, end; }' \ | sort -u > $dir/segments - + # Download the STM and GLM files: ( cd $dir rm -f stm glm @@ -78,9 +78,9 @@ grep -v ';;' $pem | sed -e 's?:??g' \ # stm file has lines like: -# en_4042 A en_4042_A 227.71 232.26 BEANS RIGHT THAT IS WHY I SAID BEANS -# One of the segments (sw_10022-B_028120-028437) is removed since it is not -# scored and does not show up in the pem file. +# en_4042 A en_4042_A 227.71 232.26 BEANS RIGHT THAT IS WHY I SAID BEANS +# One of the segments (sw_10022-B_028120-028437) is removed since it is not +# scored and does not show up in the pem file. grep -v ';;' $dir/hub5e97.english.980618.stm \ | awk '{ spk=$1"-"$2; @@ -96,7 +96,7 @@ grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -116,4 +116,3 @@ done echo Data preparation and formatting completed for Eval 2000 echo "(but not MFCC extraction)" - diff --git a/egs/swbd/s5c/local/eval2000_data_prep.sh b/egs/swbd/s5c/local/eval2000_data_prep.sh index 8d7e1f7ed6e..4c34061a120 100755 --- a/egs/swbd/s5c/local/eval2000_data_prep.sh +++ b/egs/swbd/s5c/local/eval2000_data_prep.sh @@ -1,11 +1,11 @@ #!/bin/bash -# Hub-5 Eval 2000 data preparation +# Hub-5 Eval 2000 data preparation # Author: Arnab Ghoshal (Jan 2013) # To be run from one directory above this script. -# The input is two directory names (possibly the same) containing the +# The input is two directory names (possibly the same) containing the # 2000 Hub5 english evaluation test set and transcripts, which are # respectively: LDC2002S09 LDC2002T43 # e.g. see @@ -35,7 +35,7 @@ tdir=$2 [ ! -d $tdir/reference ] \ && echo Expecting directory $tdir/reference to be present && exit 1; -. path.sh +. path.sh dir=data/local/eval2000 mkdir -p $dir @@ -49,7 +49,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -58,8 +58,8 @@ awk -v sph2pipe=$sph2pipe '{ # segments file format is: utt-id side-id start-time end-time, e.g.: # sw02001-A_000098-001156 sw02001-A 0.98 11.56 pem=$sdir/english/hub5e_00.pem -[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +[ ! -f $pem ] && echo "$0: No such file $pem" && exit 1; +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 # we ignore the warnings below for now, although they seem to indicate some problems @@ -72,7 +72,7 @@ grep -v ';;' $pem \ | sort -u | local/extend_segments.pl 0.1 > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -94,10 +94,10 @@ cp $tdir/reference/en20000405_hub5.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp diff --git a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh new file mode 100755 index 00000000000..7cf42c9ae04 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh @@ -0,0 +1,99 @@ +#!/bin/bash + + +# this script is used for comparing decoding results between systems. +# e.g. local/nnet3/compare_wer_general.sh tdnn_c_sp tdnn_d_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/nnet3/compare_wer_general.sh tdnn_d_sp tdnn_d_sp_smbr:1 tdnn_d_sp_smbr:2 ... + +echo "# $0 $*"; # print command line. + + +echo -n "# System " +for x in $*; do printf " % 9s" $x; done +echo + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free name, like: +# set_names tdnn_a_sp +# it will set dir=exp/nnet3/tdnn_a_sp and epoch_suffix="" +# If called with something like: +# set_names tdnn_d_sp_smbr:3 +# it will set dir=exp/nnet3/tdnn_d_sp_smbr and epoch_suffix="epoch3" +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + name=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + dirname=exp/nnet3/$name + if [ -z $epoch ]; then + epoch_suffix="" + else + used_epochs=true + epoch_suffix=_epoch${epoch} + fi +} + + +echo -n "# WER on train_dev(tg) " +for x in $*; do + set_names $x + # note: the '*' in the directory name is because there + # is _hires_ in there for the cross-entropy systems, and + # nothing for the sequence trained systems. + wer=$(grep WER $dirname/decode_train_dev*sw1_tg$epoch_suffix/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# WER on train_dev(fg) " +for x in $*; do + set_names $x + wer=$(grep WER $dirname/decode_train_dev*sw1_fsh_fg$epoch_suffix/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# WER on eval2000(tg) " +for x in $*; do + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_tg$epoch_suffix/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# WER on eval2000(fg) " +for x in $*; do + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_fsh_fg$epoch_suffix/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +if $used_epochs; then + # we don't print the probs in this case. + exit 0 +fi + +echo -n "# Final train prob " +for x in $*; do + set_names $x + prob=$(grep log-likelihood $dirname/log/compute_prob_train.combined.log | awk '{print $8}') + printf "% 10.3f" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + set_names $x + prob=$(grep log-likelihood $dirname/log/compute_prob_valid.combined.log | awk '{print $8}') + printf "% 10.3f" $prob +done +echo diff --git a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh index 99f6a31e708..ba751ad8732 100755 --- a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh +++ b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh @@ -2,7 +2,9 @@ set -o pipefail set -e -# this is run_discriminative.sh + +# Caution: this script is out of date, it does not use the +# refactored discriminative training script with get_degs.sh. # This script does discriminative training on top of CE BLSTM system. # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, @@ -44,7 +46,6 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.0000125 @@ -53,10 +54,6 @@ num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -modify_learning_rates=true -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -138,15 +135,12 @@ if [ -z "$degs_dir" ]; then # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi - degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true" - steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ - --adjust-priors $adjust_priors \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ $frame_subsampling_opt \ - --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ + --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ $train_data_dir data/lang ${srcdir}_ali $lats_dir $srcdir/final.mdl $degs_dir ; fi fi @@ -159,8 +153,6 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --truncate-deriv-weights $truncate_deriv_weights --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi @@ -170,7 +162,7 @@ if [ $stage -le 5 ]; then for decode_set in train_dev eval2000; do ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - iter=epoch$x.adj + iter=epoch${x}_adj steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} $context_opts \ @@ -195,4 +187,3 @@ fi exit 0; - diff --git a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh index 109396ed72e..b64d3e468df 100755 --- a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh +++ b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh @@ -13,6 +13,9 @@ speed_perturb=true mkdir -p nnet3 # perturbed data preparation train_set=train_nodup + +if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi + if [ "$speed_perturb" == "true" ]; then if [ $stage -le 1 ]; then #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment @@ -59,18 +62,7 @@ if [ $stage -le 3 ]; then for dataset in $train_set train_100k_nodup; do utils/copy_data_dir.sh data/$dataset data/${dataset}_hires - # scale the waveforms, this is useful as we don't use CMVN - data_dir=data/${dataset}_hires - cat $data_dir/wav.scp | python -c " -import sys, os, subprocess, re, random -scale_low = 1.0/8 -scale_high = 2.0 -for line in sys.stdin.readlines(): - if len(line.strip()) == 0: - continue - print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) -"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1; - mv $data_dir/wav.scp_scaled $data_dir/wav.scp + utils/data/perturb_data_dir_volume.sh data/${dataset}_hires steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; @@ -81,7 +73,7 @@ for line in sys.stdin.readlines(): utils/fix_data_dir.sh data/${dataset}_hires; done - for dataset in eval2000 train_dev rt03; do + for dataset in eval2000 train_dev $maybe_rt03; do # Create MFCCs for the eval set utils/copy_data_dir.sh data/$dataset data/${dataset}_hires steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \ @@ -128,12 +120,12 @@ if [ $stage -le 8 ]; then # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). - steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires + utils/data/modify_speaker_info.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/${train_set}_max2_hires exp/nnet3/extractor exp/nnet3/ivectors_$train_set || exit 1; - for data_set in eval2000 train_dev rt03; do + for data_set in eval2000 train_dev $maybe_rt03; do steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/${data_set}_hires exp/nnet3/extractor exp/nnet3/ivectors_$data_set || exit 1; done diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn.sh b/egs/swbd/s5c/local/nnet3/run_tdnn.sh index 8189d3c0dba..95976e17d5b 120000 --- a/egs/swbd/s5c/local/nnet3/run_tdnn.sh +++ b/egs/swbd/s5c/local/nnet3/run_tdnn.sh @@ -1 +1 @@ -tuning/run_tdnn_b.sh \ No newline at end of file +tuning/run_tdnn_d.sh \ No newline at end of file diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh new file mode 120000 index 00000000000..e4d47deb7a4 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh @@ -0,0 +1 @@ +tuning/run_tdnn_d_disc.sh \ No newline at end of file diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh deleted file mode 100755 index f422aa92e38..00000000000 --- a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh +++ /dev/null @@ -1,186 +0,0 @@ -#!/bin/bash - -set -o pipefail -set -e -# this is run_discriminative.sh - -# This script does discriminative training on top of CE nnet3 system. -# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, -# since the lattice generation runs in about real-time, so takes of the order of -# 1000 hours of CPU time. -# -. cmd.sh - - -stage=0 -train_stage=-10 # can be used to start training in the middle. -get_egs_stage=-10 -use_gpu=true # for training -cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like denlats, - # alignments and degs). - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -srcdir=exp/nnet3/nnet_ms_a -train_data_dir=data/train_nodup_sp_hires -online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp -degs_dir= # If provided, will skip the degs directory creation -lats_dir= # If provided, will skip denlats creation - -## Objective options -criterion=smbr -one_silence_class=true - -dir=${srcdir}_${criterion} - -## Egs options -frames_per_eg=150 -frames_overlap_per_eg=30 -truncate_deriv_weights=10 - -## Nnet training options -effective_learning_rate=0.0000125 -max_param_change=1 -num_jobs_nnet=4 -num_epochs=4 -regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options -minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -modify_learning_rates=true -last_layer_factor=0.1 - -## Decode options -decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. - -if $use_gpu; then - if ! cuda-compiled; then - cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn5 dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 10 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $ali_dir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +graph_dir=exp/tri4/graph_sw1_tg +if [ $stage -le 11 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh new file mode 100755 index 00000000000..b4b60688cdb --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +# d is as c, but with one extra layer. + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# note: the last column is a version of tdnn_d that was done after the +# changes for the 5.1 version of Kaldi (variable minibatch-sizes, etc.) +# System tdnn_c tdnn_d tdnn_d[repeat] +# WER on train_dev(tg) 17.37 16.72 16.51 +# WER on train_dev(fg) 15.94 15.31 15.34 +# WER on eval2000(tg) 20.0 19.2 19.2 +# WER on eval2000(fg) 18.2 17.8 17.7 +# Final train prob -1.43781 -1.22859 -1.22215 +# Final valid prob -1.56895 -1.354 -1.31647 + +stage=0 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +reporting_email= +remove_egs=true + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn6 dim=1024 + + output-layer name=output input=tdnn6 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 10 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $ali_dir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +graph_dir=exp/tri4/graph_sw1_tg +if [ $stage -le 11 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh new file mode 100755 index 00000000000..6f42e042166 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh @@ -0,0 +1,224 @@ +#!/bin/bash + +# This script does discriminative training on top of the CE nnet3 system +# from run_tdnn_d. To simplify things, this assumes you are using the "speed-perturbed" data +# (--speed_perturb true, which is the default) in the baseline run_tdnn_d.sh script. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the lattice generation runs in about real-time, so takes of the order of +# 1000 hours of CPU time. + + + +# Below is with the current effective_learning_rate=0.00000125. This was run +# with 4 epochs, but the script is currently set to run for 3 epochs, and the +# 'slow2' affix is removed. + +# steps/info/nnet3_disc_dir_info.pl exp/nnet3/tdnn_d_sp_smbrslow2 +# exp/nnet3/tdnn_d_sp_smbrslow2:num-jobs=4;effective-lrate=1.25e-06;iters-per-epoch=194;epoch[0,1,2,3,4]:train-objf=[0.87,0.91,0.91,0.91,0.92],valid-objf=[0.85,0.86,0.87,0.87,0.87],train-counts=[1.27,0.92,0.79,0.72,0.68],valid-counts=[1.11,0.80,0.74,0.67,0.65] +# +# local/nnet3/compare_wer_general.sh tdnn_d_sp tdnn_d_sp_smbrslow2:{1,2,3,4}_adj +# System tdnn_d_sp tdnn_d_sp_smbrslow2:1_adj tdnn_d_sp_smbrslow2:2_adj tdnn_d_sp_smbrslow2:3_adj tdnn_d_sp_smbrslow2:4_adj +# WER on train_dev(tg) 16.51 15.12 15.02 14.89 14.87 +# WER on train_dev(fg) 15.34 13.80 13.64 13.61 13.62 +# WER on eval2000(tg) 19.2 17.8 17.7 17.6 17.8 +# WER on eval2000(fg) 17.7 16.3 16.1 16.2 16.4 + +# Below is when it was run with learning-rate 0.0000025. It was best after 2 epochs. + +# exp/nnet3/tdnn_d_sp_smbrslow:num-jobs=4;effective-lrate=2.5e-06;iters-per-epoch=194;epoch[0,1,2,3]:train-objf=[0.87,0.91,0.91,0.92],valid-objf=[0.85,0.87,0.87,0.87],train-counts=[1.27,0.80,0.73,0.65],valid-counts=[1.11,0.72,0.65,0.63] +# local/nnet3/compare_wer_general.sh tdnn_d_sp tdnn_d_sp_smbrslow:{1,2,3}_adj +# System tdnn_d_sp tdnn_d_sp_smbrslow:1_adj tdnn_d_sp_smbrslow:2_adj tdnn_d_sp_smbrslow:3_adj +# WER on train_dev(tg) 16.51 15.01 14.89 14.84 +# WER on train_dev(fg) 15.34 13.69 13.61 13.58 +# WER on eval2000(tg) 19.2 17.7 17.8 17.8 +# WER on eval2000(fg) 17.7 16.2 16.4 16.5 + +# Below is when it was run with learning-rate 0.000005. It was best after 1st epoch. + +# steps/info/nnet3_disc_dir_info.pl exp/nnet3/tdnn_d_sp_smbr +# exp/nnet3/tdnn_d_sp_smbr:num-jobs=4;effective-lrate=5e-06;iters-per-epoch=194;epoch[0,1,2,3]:train-objf=[0.87,0.91,0.92,0.93],valid-objf=[0.85,0.87,0.87,0.88],train-counts=[1.27,0.67,0.67,0.50],valid-counts=[1.11,0.64,0.61,0.58] + +# local/nnet3/compare_wer_general.sh tdnn_d_sp tdnn_d_sp_smbr:{1,2,3}_adj +# System tdnn_d_sp tdnn_d_sp_smbr:1_adj tdnn_d_sp_smbr:2_adj tdnn_d_sp_smbr:3_adj +# WER on train_dev(tg) 16.51 14.94 14.85 14.91 +# WER on train_dev(fg) 15.34 13.66 13.76 13.77 +# WER on eval2000(tg) 19.2 17.7 17.9 18.1 +# WER on eval2000(fg) 17.7 16.2 16.5 16.6 + +# below is with learning-rate 0.000005, showing results without prior-adjustment (the prior-adjustment +# helps). +# local/nnet3/compare_wer_general.sh tdnn_d_sp tdnn_d_sp_smbr:{1,2,3} +# System tdnn_d_sp tdnn_d_sp_smbr:1 tdnn_d_sp_smbr:2 tdnn_d_sp_smbr:3 +# WER on train_dev(tg) 16.51 15.06 15.05 15.04 +# WER on train_dev(fg) 15.34 13.88 13.92 13.85 +# WER on eval2000(tg) 19.2 17.9 18.1 18.2 +# WER on eval2000(fg) 17.7 16.4 16.7 16.9 + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +graph_dir=exp/tri4/graph_sw1_tg +srcdir=exp/nnet3/tdnn_d_sp +train_data_dir=data/train_nodup_sp_hires +online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp + + +## Objective options +criterion=smbr +one_silence_class=true + +# you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b" +# originally ran with no affix, with effective_learning_rate=0.0000125; +# reran by mistake with no affix with effective_learning_rate=0.000005 [was a bit +# better, see NOTES, but still best after 1st epoch]. +# reran again with affix=slow and effective_learning_rate=0.0000025 +# reran again with affix=slow2 and effective_learning_rate=0.00000125 (this was +# about the best). +# before checking in the script, removed the slow2 affix but left with +# the lowest learning rate. +disc_affix= + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_decoding=200 +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +## Note: extra-left-context and extra-right-context are 0 because this is a TDNN, +## it's not a recurrent model like an LSTM or BLSTM. +extra_left_context=0 +extra_right_context=0 + + +## Nnet training options +effective_learning_rate=0.00000125 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=3 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 10 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $ali_dir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +graph_dir=exp/tri4/graph_sw1_tg +if [ $stage -le 11 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh new file mode 100755 index 00000000000..a82b2078acb --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh @@ -0,0 +1,161 @@ +#!/bin/bash + +# _lfr1a is as _c, but is LFR (low frame rate): it uses triphone chain topology +# with a frame subsampling factor of 3. + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_c tdnn_lfr1a +# WER on train_dev(tg) 17.37 17.25 +# WER on train_dev(fg) 15.94 15.90 +# WER on eval2000(tg) 20.0 20.1 +# WER on eval2000(fg) 18.2 18.5 +# Final train prob -1.43781 -1.32434 +# Final valid prob -1.56895 -1.42206 + + +stage=11 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +reporting_email= +remove_egs=true +leftmost_questions_truncate=-1 + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn5 dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $treedir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +echo 3 >$dir/frame_subsampling_factor +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 13 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir +fi + +if [ $stage -le 14 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh new file mode 100755 index 00000000000..8c80dc3d7ad --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +# _lfr1b is as _lfr1a, but with one more -3,3 layer (the comparable +# non-LFR system is tdnn_d) + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_d tdnn_lfr1a tdnn_lfr1b +# WER on train_dev(tg) 16.72 17.25 17.00 +# WER on train_dev(fg) 15.31 15.90 15.57 +# WER on eval2000(tg) 19.2 20.1 19.3 +# WER on eval2000(fg) 17.8 18.5 17.8 +# Final train prob -1.22859 -1.32434 -1.11497 +# Final valid prob -1.354 -1.42206 -1.21105 + + + +stage=0 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +reporting_email= +remove_egs=true +leftmost_questions_truncate=-1 + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn6 dim=1024 + + output-layer name=output input=tdnn6 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $treedir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +echo 3 >$dir/frame_subsampling_factor +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 13 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir +fi + +if [ $stage -le 14 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh new file mode 100755 index 00000000000..95cdbf7f975 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh @@ -0,0 +1,162 @@ +#!/bin/bash + +# _lfr1c is as _lfr1a, but uses splicing similar to chain's without changing +# number of layers (comparable non-LFR system is tdnn_e). + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_c tdnn_e tdnn_lfr1c +# WER on train_dev(tg) 17.37 16.75 17.10 +# WER on train_dev(fg) 15.94 15.34 15.74 +# WER on eval2000(tg) 20.0 19.5 19.2 +# WER on eval2000(fg) 18.2 18.0 17.7 +# Final train prob -1.43781 -1.40491 -1.29898 +# Final valid prob -1.56895 -1.55255 -1.43117 + + +stage=11 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +#exp/nnet3/tdnn_lfr1b_sp/egs +reporting_email= +remove_egs=true +leftmost_questions_truncate=-1 + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $treedir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +echo 3 >$dir/frame_subsampling_factor +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 13 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir +fi + +if [ $stage -le 14 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh new file mode 100755 index 00000000000..734c5a5d1be --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +# This script does discriminative training on top of the CE nnet3 LFR system +# from run_tdnn_lfr1c. To simplify things, this assumes you are using the +# "speed-perturbed" data +# (--speed_perturb true, which is the default) in the baseline run_tdnn_d.sh script. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the lattice generation runs in about real-time, so takes of the order of +# 1000 hours of CPU time. + +# Comparing effect of shift: +# System tdnn_lfr1c_sp_smbr:1 tdnn_lfr1c_sp_smbr:2 tdnn_lfr1c_sp_smbr:3 tdnn_lfr1c_sp_fs_smbr:1 tdnn_lfr1c_sp_fs_smbr:2 tdnn_lfr1c_sp_fs_smbr:3 +# WER on train_dev(tg) 16.26 16.11 16.02 16.02 15.77 15.78 +# WER on train_dev(fg) 15.01 14.91 14.80 14.79 14.58 14.50 +# WER on eval2000(tg) 18.9 18.7 18.6 18.6 18.5 18.5 +# WER on eval2000(fg) 17.4 17.2 17.1 17.1 17.0 16.9 + + +set -e +set -uo pipefail + +stage=0 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=65 # have a high number of jobs because this could take a while, and we might + # have some stragglers. + +## Objective options +criterion=smbr +one_silence_class=true + +# you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b" +# originally ran with no affix, with effective_learning_rate=0.0000125; +# reran by mistake with no affix with effective_learning_rate=0.000005 [was a bit +# better, see NOTES, but still best after 1st epoch]. +# reran again with affix=slow and effective_learning_rate=0.0000025 +# reran again with affix=slow2 and effective_learning_rate=0.00000125 (this was +# about the best). +# before checking in the script, removed the slow2 affix but left with +# the lowest learning rate. +disc_affix= + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_decoding=200 +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +## Note: extra-left-context and extra-right-context are 0 because this is a TDNN, +## it's not a recurrent model like an LSTM or BLSTM. +extra_left_context=0 +extra_right_context=0 + + +## Nnet training options +effective_learning_rate=0.00000125 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=3 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). +shift_feats=false + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +srcdir=exp/nnet3/tdnn_lfr1c_sp +graph_dir=$srcdir/graph_sw1_tg +train_data_dir=data/train_nodup_sp_hires +online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp +dir=${srcdir}_${criterion}${disc_affix} + + +if $use_gpu; then + if ! cuda-compiled; then + cat <" + echo "Usage: $0 " + echo "e.g.: $0 /export/corpora/LDC/LDC2007S10" echo "See comments in the script for more details" exit 1 fi @@ -19,7 +20,7 @@ sdir=$1 [ ! -d $sdir/data/references/eval03/english/cts ] \ && echo Expecting directory $tdir/data/references/eval03/english/cts to be present && exit 1; -. path.sh +. path.sh dir=data/local/rt03 mkdir -p $dir @@ -37,7 +38,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -47,7 +48,7 @@ awk -v sph2pipe=$sph2pipe '{ # sw02001-A_000098-001156 sw02001-A 0.98 11.56 #pem=$sdir/english/hub5e_00.pem #[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 #grep -v ';;' $pem \ @@ -59,7 +60,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \ | sort -u > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. #grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -77,7 +78,7 @@ cat $tdir/*.stm | \ grep -v inter_segment_gap | \ awk '{ printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\ - > $dir/stm + > $dir/stm #$tdir/reference/hub5e00.english.000405.stm > $dir/stm cp $rtroot/data/trans_rules/en20030506.glm $dir/glm @@ -87,10 +88,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -110,4 +111,3 @@ done echo Data preparation and formatting completed for RT-03 echo "(but not MFCC extraction)" - diff --git a/egs/swbd/s5c/local/swbd1_prepare_dict.sh b/egs/swbd/s5c/local/swbd1_prepare_dict.sh index 673513806dc..3d9297b5f19 100755 --- a/egs/swbd/s5c/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5c/local/swbd1_prepare_dict.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Formatting the Mississippi State dictionary for use in Edinburgh. Differs +# Formatting the Mississippi State dictionary for use in Edinburgh. Differs # from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013) # To be run from one directory above this script. @@ -16,7 +16,7 @@ mkdir -p $dir srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text # assume swbd_p1_data_prep.sh was done already. -[ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1; +[ ! -f "$srcdict" ] && echo "$0: No such file $srcdict" && exit 1; cp $srcdict $dir/lexicon0.txt || exit 1; patch WOLMANIZED # Also, mispronounced words, e.g. @@ -90,4 +90,3 @@ ln -sf lexicon5.txt lexicon.txt # This is the final lexicon. popd >&/dev/null rm $dir/lexiconp.txt 2>/dev/null echo Prepared input dictionary and phone-sets for Switchboard phase 1. - diff --git a/egs/swbd/s5c/run.sh b/egs/swbd/s5c/run.sh index 0eafe73d046..8b08419007d 100755 --- a/egs/swbd/s5c/run.sh +++ b/egs/swbd/s5c/run.sh @@ -72,11 +72,16 @@ fi # local/eval2000_data_prep.sh /home/dpovey/data/LDC2002S09/hub5e_00 /home/dpovey/data/LDC2002T43 local/eval2000_data_prep.sh /export/corpora2/LDC/LDC2002S09/hub5e_00 /export/corpora2/LDC/LDC2002T43 +# prepare the rt03 data. Note: this isn't 100% necessary for this +# recipe, not all parts actually test using rt03. +local/rt03_data_prep.sh /export/corpora/LDC/LDC2007S10 + # Now make MFCC features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. +if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi mfccdir=mfcc -for x in train eval2000; do +for x in train eval2000 $maybe_rt03; do steps/make_mfcc.sh --nj 50 --cmd "$train_cmd" \ data/$x exp/make_mfcc/$x $mfccdir steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir diff --git a/egs/tedlium/s5/cmd.sh b/egs/tedlium/s5/cmd.sh index ba7f120e599..4e0263d7cca 100644 --- a/egs/tedlium/s5/cmd.sh +++ b/egs/tedlium/s5/cmd.sh @@ -11,9 +11,9 @@ #export cuda_cmd=run.pl # JHU cluster: -export train_cmd="queue.pl -l arch=*64*" -export decode_cmd="queue.pl -l arch=*64* --mem 4G" -export cuda_cmd="queue.pl -l arch=*64* --gpu 1" +export train_cmd="queue.pl" +export decode_cmd="queue.pl --mem 4G" +export cuda_cmd="queue.pl --gpu 1" host=$(hostname -f) if [ ${host#*.} == "fit.vutbr.cz" ]; then @@ -23,10 +23,10 @@ if [ ${host#*.} == "fit.vutbr.cz" ]; then storage="matylda5" export train_cmd="queue.pl -q $queue -l ram_free=1500M,mem_free=1500M,${storage}=1" export decode_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,${storage}=0.5" - export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1" + export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1" elif [ ${host#*.} == "cm.cluster" ]; then # MARCC bluecrab cluster: export train_cmd="slurm.pl --time 4:00:00 " export decode_cmd="slurm.pl --mem 4G --time 4:00:00 " - export cuda_cmd="slurm.pl --gpu 1" + export cuda_cmd="slurm.pl --gpu 1" fi diff --git a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh index a5b80505393..8d7393af853 100755 --- a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh @@ -4,7 +4,7 @@ # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# #%WER 13.3 | 507 17792 | 89.1 8.2 2.8 2.4 13.3 86.0 | -0.207 | exp/nnet3/tdnn_smbr/decode_dev_epoch1.adj/score_12_1.0/ctm.filt.filt.sys #%WER 12.4 | 507 17792 | 89.8 7.5 2.7 2.2 12.4 85.4 | -0.305 | exp/nnet3/tdnn_smbr/decode_dev_epoch1.adj_rescore/score_12_1.0/ctm.filt.filt.sys @@ -52,27 +52,22 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.0000125 max_param_change=1 num_jobs_nnet=4 num_epochs=4 -regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -modify_learning_rates=true -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. if $use_gpu; then if ! cuda-compiled; then - cat <${dir}_online/sample_decode.sh . cmd.sh data_dir=\$1 # e.g. data/dev_hires (to be prepared by the user, see egs/tedlium/run.sh for examples) -model_dir=\$2 # e.g. exp/nnet2_online/nnet_ms_sp_online (provided in the distribution) +model_dir=\$2 # e.g. exp/nnet2_online/nnet_ms_sp_online (provided in the distribution) decode_dir=\$model_dir/\`basename \$data_dir\` num_jobs=\`cat \$data_dir/spk2utt | wc -l\` diff --git a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh index aebbd66349a..00b2d29cc88 100755 --- a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh +++ b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh @@ -1,64 +1,106 @@ #!/bin/bash -echo $0 $* +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_c_sp exp/chain_cleaned/tdnn_c_sp_smbr:{1,2,3} -echo -n "System " -for x in $*; do printf "% 10s" " $(basename $x)"; done -echo -echo -n "WER on dev(orig) " -for x in $*; do - wer=$(grep Sum $x/decode_dev/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer -done -echo +echo "# $0 $*" -echo -n "WER on dev(rescored)" -for x in $*; do - wer=$(grep Sum $x/decode_dev_rescore/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer -done -echo +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi -echo -n "WER on test(orig) " -for x in $*; do - wer=$(grep Sum $x/decode_test/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer -done +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain_cleaned/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain_cleaned/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done echo -echo -n "WER on test(rescored)" -for x in $*; do - wer=$(grep Sum $x/decode_test_rescore/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer +strings=("# WER on dev(orig) " "# WER on dev(rescored) " "# WER on test(orig) " "# WER on test(rescored)") + +for n in 0 1 2 3; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_looped_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi done -echo -echo -n "Final train prob " +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + +echo -n "# Final train prob " for x in $*; do prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final valid prob " +echo -n "# Final valid prob " for x in $*; do prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final train prob (xent)" +echo -n "# Final train prob (xent)" for x in $*; do prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final valid prob (xent)" +echo -n "# Final valid prob (xent)" for x in $*; do prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done + echo diff --git a/egs/tedlium/s5_r2/local/chain/run_tdnn.sh b/egs/tedlium/s5_r2/local/chain/run_tdnn.sh index 61f8f499182..e1adaa9346d 120000 --- a/egs/tedlium/s5_r2/local/chain/run_tdnn.sh +++ b/egs/tedlium/s5_r2/local/chain/run_tdnn.sh @@ -1 +1 @@ -tuning/run_tdnn_1b.sh \ No newline at end of file +tuning/run_tdnn_1d.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh index 8e647598556..fbc28248491 120000 --- a/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh +++ b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh @@ -1 +1 @@ -tuning/run_tdnn_lstm_1a.sh \ No newline at end of file +tuning/run_tdnn_lstm_1e.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh new file mode 120000 index 00000000000..d4268b4185a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1e_disc.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh new file mode 100755 index 00000000000..f7a18b4bfcf --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh @@ -0,0 +1,295 @@ +#!/bin/bash + +# run_tdnn_1c.sh is like run_tdnn_1b.sh but changing chunk-width from 150 to +# '140,110,160', and +# and --trainer.num-chunk-per-minibatch from 128 to 128,64. +# Not better; if anything a little worse. But could possibly be noise. + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1b_sp_bi exp/chain_cleaned/tdnn1c_sp_bi +# System tdnn1b_sp_bi tdnn1c_sp_bi +# WER on dev(orig) 9.4 9.8 +# WER on dev(rescored) 8.8 9.0 +# WER on test(orig) 9.6 9.7 +# WER on test(rescored) 9.0 9.2 +# Final train prob -0.0870 -0.0942 +# Final valid prob -0.1147 -0.1108 +# Final train prob (xent) -1.4014 -1.4227 +# Final valid prob (xent) -1.5634 -1.4884 + + +# run_tdnn_1b.sh is like run_tdnn_1a.sh but upgrading to xconfig-based +# config generation. + +# Results (11/29/2016, note, this build is is before the upgrade of the LM +# done in Nov 2016): +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_sp_bi exp/chain_cleaned/tdnn1b_sp_bi +# System tdnn_sp_bi tdnn1b_sp_bi +# WER on dev(orig) 10.3 10.2 +# WER on dev(rescored) 9.8 9.6 +# WER on test(orig) 9.8 9.7 +# WER on test(rescored) 9.3 9.2 +# Final train prob -0.0918 -0.0928 +# Final valid prob -0.1190 -0.1178 +# Final train prob (xent) -1.3572 -1.4666 +# Final valid prob (xent) -1.4415 -1.5473 + + +## how you run this (note: this assumes that the run_tdnn.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script is like run_tdnn_1a.sh except it uses an xconfig-based mechanism +# to get the configuration. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1c #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width '140,110,160' \ + --trainer.num-chunk-per-minibatch '128,64' \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh new file mode 100755 index 00000000000..99921a9bf61 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh @@ -0,0 +1,256 @@ +#!/bin/bash + + +# run_tdnn_1d.sh is like run_tdnn_1b.sh but using 10 times the self-repair +# scale on the 1st TDNN layer. +# seems a little better- I wouldn't say it was significant normally, but +# it definitely stops the 1st TDNN layer from having under/over-saturated +# neurons. + +# exp/chain_cleaned/tdnn1b_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3606 combine=-0.10->-0.10 xent:train/valid[167,252,final]=(-1.47,-1.40,-1.40/-1.61,-1.57,-1.56) logprob:train/valid[167,252,final]=(-0.096,-0.087,-0.087/-0.119,-0.115,-0.115) +# exp/chain_cleaned/tdnn1d_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3606 combine=-0.10->-0.10 xent:train/valid[167,252,final]=(-1.46,-1.39,-1.39/-1.61,-1.56,-1.55) logprob:train/valid[167,252,final]=(-0.096,-0.088,-0.088/-0.120,-0.115,-0.115) + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1b_sp_bi exp/chain_cleaned/tdnn1d_sp_bi +# System tdnn1b_sp_bi tdnn1d_sp_bi +# WER on dev(orig) 9.4 9.5 +# WER on dev(rescored) 8.8 8.6 +# WER on test(orig) 9.6 9.4 +# WER on test(rescored) 9.0 8.9 +# Final train prob -0.0870 -0.0878 +# Final valid prob -0.1147 -0.1152 +# Final train prob (xent) -1.4014 -1.3921 +# Final valid prob (xent) -1.5634 -1.5543 + +# run_tdnn_1b.sh is like run_tdnn_1a.sh but upgrading to xconfig-based +# config generation. + + +## how you run this (note: this assumes that the run_tdnn.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script is like run_tdnn_1a.sh except it uses an xconfig-based mechanism +# to get the configuration. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 self-repair-scale=1.0e-04 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh new file mode 100755 index 00000000000..eb2c91dc3d4 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -0,0 +1,317 @@ +#!/bin/bash + +# this is as run_tdnn_lstm_1a.sh, but changing +# frames_per_chunk 150 to 140,100,160 +# and --trainer.num-chunk-per-minibatch from 128 to 128,64 +# and adding +# --egs.chunk-left-context-initial=0 +# and --egs.chunk-right-context-final=0 +# See 1e for summary of results. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1b #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh new file mode 100755 index 00000000000..bb3c5b1a942 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -0,0 +1,279 @@ +#!/bin/bash + +# 1c is as 1b, but adding the option --slow-start true. [since removed; it +# takes half the param change from the first two minibatches of each +# job]. The difference is probably just random noise. + + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_lstm1b_sp_bi exp/chain_cleaned/tdnn_lstm1c_sp_bi +# System tdnn_lstm1b_sp_bi tdnn_lstm1c_sp_bi +# WER on dev(orig) 9.1 8.9 +# WER on dev(rescored) 8.4 8.2 +# WER on test(orig) 8.9 8.9 +# WER on test(rescored) 8.4 8.5 +# Final train prob -0.0621 -0.0620 +# Final valid prob -0.0799 -0.0811 +# Final train prob (xent) -0.8300 -0.8117 +# Final valid prob (xent) -0.9500 -0.9448 + + + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1c #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.slow-start true \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh new file mode 100755 index 00000000000..4be28a4ca97 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -0,0 +1,313 @@ +#!/bin/bash + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. +# See 1e for summary of results. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1d #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=40 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=40 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=40 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh new file mode 100755 index 00000000000..e56946c1b54 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -0,0 +1,395 @@ +#!/bin/bash + +# 1e is as 1d, but reducing decay-time from 40 to 20. + +# The following table shows comparison of various decay-time values, +# namely: [b:unset=infinity, f:80, d:40, e:20, g:10, g2:5]. +# note: the g2 script is not checked in. +# There is no clear trend on the non-looped decoding, but looped decoding seems +# to improve as decay-time is decreased. We end up recommending decay-time=20, +# as by then we get all the improvement on looped decoding, and it's the +# most conservative setting with which we can get this improvement (although +# actually it seems fine to use an even smaller decay-time). + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{b,f,d,e,g,g2}_sp_bi + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1b_sp_bi exp/chain_cleaned/tdnn_lstm1f_sp_bi exp/chain_cleaned/tdnn_lstm1d_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1g_sp_bi exp/chain_cleaned/tdnn_lstm1g2_sp_bi +# System tdnn_lstm1b_sp_bi tdnn_lstm1f_sp_bi tdnn_lstm1d_sp_bi tdnn_lstm1e_sp_bi tdnn_lstm1g_sp_bi tdnn_lstm1g2_sp_bi +# WER on dev(orig) 9.1 8.8 9.0 9.0 9.0 9.4 +# [looped:] 9.4 9.3 9.2 9.0 8.9 9.4 +# WER on dev(rescored) 8.4 8.2 8.4 8.4 8.4 8.7 +# [looped:] 8.8 8.7 8.6 8.4 8.3 8.7 +# WER on test(orig) 8.9 9.0 8.9 8.8 8.8 9.3 +# [looped:] 9.3 9.3 9.0 8.8 8.8 9.2 +# WER on test(rescored) 8.4 8.6 8.3 8.4 8.4 8.9 +# [looped:] 8.7 8.9 8.5 8.3 8.4 8.8 +# Final train prob -0.0621 -0.0631 -0.0595 -0.0648 -0.0689 -0.0739 +# Final valid prob -0.0799 -0.0802 -0.0823 -0.0827 -0.0890 -0.0963 +# Final train prob (xent) -0.8300 -0.8295 -0.8129 -0.8372 -0.8610 -0.8792 +# Final valid prob (xent) -0.9500 -0.9662 -0.9589 -0.9497 -0.9982 -1.0256 + + +# the following table compares the 'online' decoding with regular and looped +# decoding. online decoding is a little better than either (possibly due to +# using slightly later iVectors). +# +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi{,_online} 2>/dev/null +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_online +# System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_online +# WER on dev(orig) 9.0 8.8 +# [looped:] 9.0 +# WER on dev(rescored) 8.4 8.4 +# [looped:] 8.4 +# WER on test(orig) 8.8 8.8 +# [looped:] 8.8 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.3 + + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1e #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results very much (unlike + # regular decoding)... [it will affect them slightly due to differences in the + # iVector extraction; probably smaller will be worse as it sees less of the future, + # but in a real scenario, long chunks will introduce excessive latency]. + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if $test_online_decoding && [ $stage -le 22 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --extra-left-context-initial $extra_left_context_initial \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh new file mode 100755 index 00000000000..0d64c75aea8 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh @@ -0,0 +1,264 @@ +#!/bin/bash + +# This script does discriminative training on top of the 1e chain system. To +# simplify things, this assumes you are using the "cleaned" data (since this is +# generally better), i.e. it won't work if you used options to run_tdnn_lstm_1e.sh +# to use the non-cleaned data. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the alignment and the lattice generation/egs-dumping takes quite a bit +# of CPU time. + + +# Below is with 0.00002 and last_layer_factor=0.5 +# this is the setting we're leaving in the script, but the discriminative training +# is not really helping. Maybe we should try the frame-shifted version. +# steps/info/nnet3_disc_dir_info.pl exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow2 +# exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow2:num-jobs=4;effective-lrate=2e-05;last-layer-factor=0.50;iters-per-epoch=138;epoch[0,1,2]:train-objf=[0.94,0.96,0.97],valid-objf=[0.95,0.96,0.96],train-counts=[0.24,0.12,0.10],valid-counts=[0.28,0.20,0.17] +# b01:s5_r2: steps/info/nnet3_disc_dir_info.pl exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow2:{1,2} +# System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_smbroutslow2:1 tdnn_lstm1e_sp_bi_smbroutslow2:2 +# WER on dev(orig) 9.0 8.9 8.9 +# [looped:] 9.0 8.9 8.9 +# WER on dev(rescored) 8.4 8.3 8.4 +# [looped:] 8.4 8.3 8.4 +# WER on test(orig) 8.8 8.7 8.8 +# [looped:] 8.8 8.8 8.8 +# WER on test(rescored) 8.4 8.3 8.4 +# [looped:] 8.3 8.4 8.5 + + + +# Below is with 0.00002 and last_layer_factor=1.0. +# b01:s5_r2: steps/info/nnet3_disc_dir_info.pl exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr +# exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr:num-jobs=4;lrate=2e-05;iters-per-epoch=138;epoch[0,1,2]:train-objf=[0.94,0.96,0.97],valid-objf=[0.95,0.96,0.96],train-counts=[0.24,0.12,0.09],valid-counts=[0.28,0.19,0.16] +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr:{1,2} +# System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_smbr:1 tdnn_lstm1e_sp_bi_smbr:2 +# WER on dev(orig) 9.0 8.8 8.9 +# [looped:] 9.0 8.9 8.9 +# WER on dev(rescored) 8.4 8.3 8.4 +# [looped:] 8.4 8.3 8.4 +# WER on test(orig) 8.8 8.8 8.9 +# [looped:] 8.8 8.8 8.9 +# WER on test(rescored) 8.4 8.4 8.5 +# [looped:] 8.3 8.4 8.5 + + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. +# you can set disc_affix if you run different configurations, e.g. --disc-affix "_b" +disc_affix= + + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +srcdir=exp/chain_cleaned/tdnn_lstm1e_sp_bi +graph_dir=$srcdir/graph +train_data_dir=data/train_cleaned_sp_hires_comb +online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb + +## Objective options +criterion=smbr +one_silence_class=true + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_egs=200 # frames-per-chunk for decoding in alignment and + # denlat decoding. +frames_per_chunk_decoding=140 # frames-per-chunk for decoding when we test + # the models. +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +extra_left_context=40 +extra_right_context=0 + + + +## Nnet training options +effective_learning_rate=0.00002 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=2 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +last_layer_factor=0.5 # have the output layer train slower than the others.. this can + # be helpful. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat </dev/null || true + + for x in `seq $decode_start_epoch $num_epochs`; do + for decode_set in dev test; do + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + iter=epoch$x + # We don't test the iter "epoch${x}_adj", although it's computed, + # because prior-adjustment doesn't make sense for chain models + # and it degrades the results. + ( + steps/nnet3/decode_looped.sh \ + --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/${decode_set}_hires $dir/decode_looped_${decode_set}_${iter} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${decode_set}_hires \ + ${dir}/decode_looped_${decode_set}_${iter} ${dir}/decode_looped_${decode_set}_${iter}_rescore || exit 1 + ) || touch $dir/.error & + done + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +wait; + +if [ $stage -le 6 ] && $cleanup; then + # if you run with "--cleanup true --stage 6" you can clean up. + # actually, keep the alignments in case we need them later.. they're slow to + # create, and quite big. + # rm ${srcdir}_ali/ali.*.gz || true + + steps/nnet2/remove_egs.sh ${srcdir}_degs || true +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh new file mode 100755 index 00000000000..3ed14f30956 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -0,0 +1,315 @@ +#!/bin/bash + +# 1f is as 1d, but increasing decay-time from 40 to 80. [see also 1e, at 20.] +# see 1e for summary of results. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1f #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=80 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=80 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=80 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh new file mode 100755 index 00000000000..aff39a04025 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -0,0 +1,318 @@ +#!/bin/bash + +####################### +# 1g is as 1e, but reducing decay-time further from 20 to 10. +# see 1e for summary of results. + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1g #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=10 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=10 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=10 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh new file mode 100755 index 00000000000..8ffd43f27bc --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +####################### +# 1h is as 1e, but increasing decay-time from to to 30. +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1h #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=30 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=30 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=30 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh new file mode 100755 index 00000000000..62497ca59ff --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -0,0 +1,337 @@ +#!/bin/bash + +# 1i is as 1e, but adding boundary-offset. No clear effect. +# +# the 3 columns below are: baseline; boundary-offset with that component +# learning with 10x the normal learning rate; boundary-offset with +# regular learning rate. There seems no clear benefit from this +# idea. Reverting the code changes that supported it; +# see ~dpovey/patches/lstm_boundary.patch + + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1i_sp_bi exp/chain_cleaned/tdnn_lstm1i_sp_bi.orig_learning_rate +# System tdnn_lstm1e_sp_bi tdnn_lstm1i_sp_bi tdnn_lstm1i_sp_bi.orig_learning_rate +# WER on dev(orig) 9.0 9.1 8.9 +# [looped:] 9.0 9.0 9.0 +# WER on dev(rescored) 8.4 8.3 8.3 +# [looped:] 8.4 8.2 8.2 +# WER on test(orig) 8.8 8.9 8.9 +# [looped:] 8.8 8.9 8.9 +# WER on test(rescored) 8.4 8.4 8.4 +# [looped:] 8.3 8.4 8.4 +# Final train prob -0.0648 -0.0625 -0.0644 +# Final valid prob -0.0827 -0.0833 -0.0855 +# Final train prob (xent) -0.8372 -0.8129 -0.8286 +# Final valid prob (xent) -0.9497 -0.9558 -0.9641 + + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1i #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 boundary-offset=true + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 boundary-offset=true + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 boundary-offset=true + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh new file mode 100755 index 00000000000..c9a57f0ab4d --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -0,0 +1,334 @@ +#!/bin/bash + +# 1j is as 1e, but adding self-repair-scale=1.0e-04 on 1st tdnn layer [default is 1e-5]. +# It's definitely more effective in preventing under or over-saturated ReLUs, but +# it's not clear that there is any other benefit. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,j}_sp_bi +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1j_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1j_sp_bi +# WER on dev(orig) 9.0 9.1 +# [looped:] 9.0 9.1 +# WER on dev(rescored) 8.4 8.5 +# [looped:] 8.4 8.5 +# WER on test(orig) 8.8 9.0 +# [looped:] 8.8 9.1 +# WER on test(rescored) 8.4 8.6 +# [looped:] 8.3 8.5 +# Final train prob -0.0648 -0.0646 +# Final valid prob -0.0827 -0.0835 +# Final train prob (xent) -0.8372 -0.8296 +# Final valid prob (xent) -0.9497 -0.9597 + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1j #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 self-repair-scale=1.0e-04 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh new file mode 100755 index 00000000000..ab9d6ce6342 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -0,0 +1,339 @@ +#!/bin/bash + +# 1k is as 1e, but introducing a dropout schedule. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,k,l,m}_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1k_sp_bi tdnn_lstm1l_sp_bi tdnn_lstm1m_sp_bi +# WER on dev(orig) 9.0 8.7 8.9 9.0 +# [looped:] 9.0 8.6 8.9 8.9 +# WER on dev(rescored) 8.4 7.9 8.2 8.2 +# [looped:] 8.4 7.8 8.2 8.3 +# WER on test(orig) 8.8 8.8 8.9 8.9 +# [looped:] 8.8 8.7 8.8 8.8 +# WER on test(rescored) 8.4 8.3 8.2 8.5 +# [looped:] 8.3 8.3 8.3 8.4 +# Final train prob -0.0648 -0.0693 -0.0768 -0.0807 +# Final valid prob -0.0827 -0.0854 -0.0943 -0.0931 +# Final train prob (xent) -0.8372 -0.8848 -0.9371 -0.9807 +# Final valid prob (xent) -0.9497 -0.9895 -1.0546 -1.0629 + + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1k #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.7@0.5,0@0.75,0' \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh new file mode 100755 index 00000000000..e09df86558a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -0,0 +1,330 @@ +#!/bin/bash + + +# 1l is as 1k, but having the dropout end at the end of training, not @0.75. + +# see run_tdnn_lstm_1k.sh for results. + + +# 1k is as 1e, but introducing a dropout schedule. + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1l #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.7@0.5,0' \ + --trainer.optimization.combine-sum-to-one-penalty=0.001 \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh new file mode 100755 index 00000000000..3e75c9fe3e0 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -0,0 +1,330 @@ +#!/bin/bash + + +# 1m is as 1l, but having the dropout end at 0.1 +# see run_tdnn_lstm_1k.sh for results. + +# 1l is as 1k, but having the dropout end at the end of training. + +# 1k is as 1e, but introducing a dropout schedule. + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1m #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.7@0.5,0.1' \ + --trainer.optimization.combine-sum-to-one-penalty=0.001 \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh new file mode 100755 index 00000000000..ed79404f815 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -0,0 +1,340 @@ +#!/bin/bash + +# 1n is as 1k, but maxing out at 0.5, not 0.7. +# 1k is as 1e, but introducing a dropout schedule. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,k,l,m}_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1k_sp_bi tdnn_lstm1l_sp_bi tdnn_lstm1m_sp_bi +# WER on dev(orig) 9.0 8.7 8.9 9.0 +# [looped:] 9.0 8.6 8.9 8.9 +# WER on dev(rescored) 8.4 7.9 8.2 8.2 +# [looped:] 8.4 7.8 8.2 8.3 +# WER on test(orig) 8.8 8.8 8.9 8.9 +# [looped:] 8.8 8.7 8.8 8.8 +# WER on test(rescored) 8.4 8.3 8.2 8.5 +# [looped:] 8.3 8.3 8.3 8.4 +# Final train prob -0.0648 -0.0693 -0.0768 -0.0807 +# Final valid prob -0.0827 -0.0854 -0.0943 -0.0931 +# Final train prob (xent) -0.8372 -0.8848 -0.9371 -0.9807 +# Final valid prob (xent) -0.9497 -0.9895 -1.0546 -1.0629 + + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1n #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.5@0.5,0@0.75,0' \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh new file mode 100755 index 00000000000..ec97bce3a8b --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh @@ -0,0 +1,344 @@ +#!/bin/bash + +# 1o is as 1k, but putting the dropout on (c,m), i.e. the output +# of the LstmNonlinearityComponent, which I believe is the same as +# putting it on (i,f) which Gaofeng found worked well in the non-fast Lstm +# component; and using schedule which maxes out at 0.3, not 0.7. +# [note: this was a little worse. turns out it was not the same as +# what gaofeng did because he had separate masks on (i,f). +# note: I've since removed the script-level support for this. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,k,l,m,n,o}_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1k_sp_bi tdnn_lstm1l_sp_bi tdnn_lstm1m_sp_bi tdnn_lstm1n_sp_bi tdnn_lstm1o_sp_bi +# WER on dev(orig) 9.0 8.7 8.9 9.0 8.8 8.8 +# [looped:] 9.0 8.6 8.9 8.9 8.8 8.9 +# WER on dev(rescored) 8.4 7.9 8.2 8.2 8.1 8.1 +# [looped:] 8.4 7.8 8.2 8.3 8.1 8.2 +# WER on test(orig) 8.8 8.8 8.9 8.9 8.7 8.7 +# [looped:] 8.8 8.7 8.8 8.8 8.7 8.7 +# WER on test(rescored) 8.4 8.3 8.2 8.5 8.3 8.2 +# [looped:] 8.3 8.3 8.3 8.5 8.3 8.2 +# Final train prob -0.0648 -0.0693 -0.0768 -0.0807 -0.0702 -0.0698 +# Final valid prob -0.0827 -0.0854 -0.0943 -0.0931 -0.0836 -0.0858 +# Final train prob (xent) -0.8372 -0.8848 -0.9371 -0.9807 -0.8719 -0.8998 +# Final valid prob (xent) -0.9497 -0.9895 -1.0546 -1.0629 -0.9732 -1.0084 + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1o #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-place=2 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.3@0.5,0@0.75,0' \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh new file mode 100755 index 00000000000..b3da38e412a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh @@ -0,0 +1,339 @@ +#!/bin/bash + +# 1r is as 1e, but changing update-period of natural gradient from 4 to 1, +# Not helpful. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,r}_sp_bi +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1r_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1r_sp_bi +# WER on dev(orig) 9.0 9.0 +# [looped:] 9.0 9.1 +# WER on dev(rescored) 8.4 8.5 +# [looped:] 8.4 8.6 +# WER on test(orig) 8.8 9.1 +# [looped:] 8.8 9.0 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.3 8.5 +# Final train prob -0.0648 -0.0642 +# Final valid prob -0.0827 -0.0838 +# Final train prob (xent) -0.8372 -0.8319 +# Final valid prob (xent) -0.9497 -0.9635 + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1r #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. +remove_egs=true + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + tdnn_opts='ng-affine-options="update-period=1"' + lstmp_opts='ng-affine-options="update-period=1" decay-time=20' + output_opts='max-change=1.5 ng-affine-options="update-period=1"' + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 $tdnn_opts + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) $tdnn_opts + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) $tdnn_opts + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) $tdnn_opts + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) $tdnn_opts + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) $tdnn_opts + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets $output_opts + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh new file mode 100644 index 00000000000..dc0f59fb64a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh @@ -0,0 +1,333 @@ +#!/bin/bash + +# 1s is as 1e, but adding per-frame dropout to LSTM in location4 +# as paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1s_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1s_sp_bi +# WER on dev(orig) 9.0 8.9 +# [looped:] 9.0 8.9 +# WER on dev(rescored) 8.4 8.1 +# [looped:] 8.4 8.1 +# WER on test(orig) 8.9 8.8 +# [looped:] 8.9 8.8 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.4 8.3 +# Final train prob -0.0712 -0.0914 +# Final valid prob -0.0892 -0.0977 +# Final train prob (xent) -0.8566 -0.9931 +# Final valid prob (xent) -0.9927 -1.0633 + +# exp/chain_cleaned/tdnn_lstm1e_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3626 combine=-0.082->-0.081 xent:train/valid[167,252,final]=(-0.961,-0.859,-0.857/-1.06,-0.999,-0.993) logprob:train/valid[167,252,final]=(-0.086,-0.072,-0.071/-0.098,-0.091,-0.089) +# exp/chain_cleaned/tdnn_lstm1s_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3626 combine=-0.104->-0.101 xent:train/valid[167,252,final]=(-3.08,-1.07,-0.993/-3.13,-1.14,-1.06) logprob:train/valid[167,252,final]=(-0.181,-0.093,-0.091/-0.183,-0.100,-0.098) + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +dropout_schedule="0,0@0.2,0.3@0.5,0" +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1s #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 dropout-proportion=0.0 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 dropout-proportion=0.0 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 dropout-proportion=0.0 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results very much (unlike + # regular decoding)... [it will affect them slightly due to differences in the + # iVector extraction; probably smaller will be worse as it sees less of the future, + # but in a real scenario, long chunks will introduce excessive latency]. + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if $test_online_decoding && [ $stage -le 22 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --extra-left-context-initial $extra_left_context_initial \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh new file mode 100644 index 00000000000..c286fcef353 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh @@ -0,0 +1,333 @@ +#!/bin/bash + +# 1t is as 1e, but increasing the TDNN dim and LSTM cell-dim into +# 1024, the recurrent and non-recurrent projection of the LSTM from +# 128 into 256. + +# ./local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1t_sp_bi +# System tdnn_lstm1e_again_sp_bi tdnn_lstm1t_again_sp_bi +# WER on dev(orig) 9.0 8.9 +# [looped:] 9.0 8.9 +# WER on dev(rescored) 8.4 8.2 +# [looped:] 8.4 8.3 +# WER on test(orig) 8.9 8.9 +# [looped:] 8.9 9.0 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.4 8.5 +# Final train prob -0.0712 -0.0459 +# Final valid prob -0.0892 -0.0867 +# Final train prob (xent) -0.8566 -0.6434 +# Final valid prob (xent) -0.9927 -0.8733 + +# exp/chain_cleaned/tdnn_lstm1e_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3626 combine=-0.082->-0.081 xent:train/valid[167,252,final]=(-0.961,-0.859,-0.857/-1.06,-0.999,-0.993) logprob:train/valid[167,252,final]=(-0.086,-0.072,-0.071/-0.098,-0.091,-0.089) +# exp/chain_cleaned/tdnn_lstm1t_sp_bi: num-iters=253 nj=2..12 num-params=37.1M dim=40+100->3626 combine=-0.055->-0.055 xent:train/valid[167,252,final]=(-0.774,-0.655,-0.643/-0.928,-0.883,-0.873) logprob:train/valid[167,252,final]=(-0.063,-0.048,-0.046/-0.087,-0.089,-0.087) + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1t #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 dim=1024 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=1024 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=1024 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=1024 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=1024 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results very much (unlike + # regular decoding)... [it will affect them slightly due to differences in the + # iVector extraction; probably smaller will be worse as it sees less of the future, + # but in a real scenario, long chunks will introduce excessive latency]. + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if $test_online_decoding && [ $stage -le 22 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --extra-left-context-initial $extra_left_context_initial \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh new file mode 100644 index 00000000000..9e50060f5d6 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh @@ -0,0 +1,327 @@ +#!/bin/bash + +# 1u is the same as 1t but adding per-frame dropout to LSTM +# in location4, see paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_lstm1t_sp_bi exp/chain_cleaned/tdnn_lstm1u_sp_bi +# System tdnn_lstm1t_again_sp_bi tdnn_lstm1u_sp_bi +# WER on dev(orig) 8.9 8.6 +# WER on dev(rescored) 8.2 8.0 +# WER on test(orig) 8.9 8.3 +# WER on test(rescored) 8.4 7.9 +# Final train prob -0.0459 -0.0709 +# Final valid prob -0.0867 -0.0902 +# Final train prob (xent) -0.6434 -0.8112 +# Final valid prob (xent) -0.8733 -0.9384 + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +dropout_schedule="0,0@0.20,0.3@0.5,0" +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1u #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 dim=1024 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 dropout-proportion=0.0 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=1024 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=1024 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 dropout-proportion=0.0 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=1024 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=1024 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 dropout-proportion=0.0 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results very much (unlike + # regular decoding)... [it will affect them slightly due to differences in the + # iVector extraction; probably smaller will be worse as it sees less of the future, + # but in a real scenario, long chunks will introduce excessive latency]. + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if $test_online_decoding && [ $stage -le 22 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --extra-left-context-initial $extra_left_context_initial \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh new file mode 100755 index 00000000000..da0bb728e69 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_c_sp exp/nnet3_cleaned/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] [ ... ]" + echo "e.g.: $0 exp/nnet3_cleaned/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/nnet3_cleaned/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain_cleaned/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain_cleaned/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=("# WER on dev(orig) " "# WER on dev(rescored) " "# WER on test(orig) " "# WER on test(rescored)") + +for n in 0 1 2 3; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_looped_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum ${dirname}_online/decode_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo diff --git a/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh b/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh index b4f2dd3e3b4..16093616b05 100755 --- a/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh +++ b/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh @@ -21,9 +21,9 @@ num_threads_ubm=32 nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it # becomes exp/nnet3_cleaned or whatever. -. cmd.sh +. ./cmd.sh . ./path.sh -. ./utils/parse_options.sh +. utils/parse_options.sh gmm_dir=exp/${gmm} diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh deleted file mode 100755 index 91ba913c183..00000000000 --- a/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/bin/bash - -# This is the standard "tdnn" system, built in nnet3; this script -# is the version that's meant to run with data-cleanup, that doesn't -# support parallel alignments. - - -# by default, with cleanup: -# local/nnet3/run_tdnn.sh - -# without cleanup: -# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & - - -set -e -o pipefail -u - -# First the options that are passed through to run_ivector_common.sh -# (some of which are also used in this script directly). -stage=0 -nj=30 -decode_nj=30 -min_seg_len=1.55 -train_set=train_cleaned -gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it - # should have alignments for the specified training data. -num_threads_ubm=32 -nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned -tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. - -# Options which are not passed through to run_ivector_common.sh -train_stage=-10 -splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" -remove_egs=true -relu_dim=850 -num_epochs=3 - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat </dev/null - for dset in dev test; do - ( - steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ - --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ - ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 - steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ - data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 - ) || touch $dir/.error & - done - wait - [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 -fi - - -exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh new file mode 120000 index 00000000000..61f8f499182 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1b.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh new file mode 120000 index 00000000000..50d28fb91f3 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a_disc.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh new file mode 120000 index 00000000000..8e03c924bc1 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_lfr_1a.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..80ff91b8606 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# This is the standard "tdnn" system, built in nnet3; this script +# is the version that's meant to run with data-cleanup, that doesn't +# support parallel alignments. + + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1a_sp exp/nnet3_cleaned/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +# WER on dev(orig) 11.9 11.7 +# WER on dev(rescored) 11.2 10.9 +# WER on test(orig) 11.6 11.7 +# WER on test(rescored) 11.0 11.0 +# Final train prob -0.9255 -0.9416 +# Final valid prob -1.1842 -1.1496 +# Final train acc 0.7245 0.7241 +# Final valid acc 0.6771 0.6788 + + +# by default, with cleanup: +# local/nnet3/run_tdnn.sh + +# without cleanup: +# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1a #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" +remove_egs=true +relu_dim=850 +num_epochs=3 + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat </dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh new file mode 100755 index 00000000000..f6e4fb71b75 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh @@ -0,0 +1,172 @@ +#!/bin/bash + + +# 1b is as 1a but uses xconfigs. + +# This is the standard "tdnn" system, built in nnet3; this script +# is the version that's meant to run with data-cleanup, that doesn't +# support parallel alignments. + + +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn1b_sp +# exp/nnet3_cleaned/tdnn1b_sp: num-iters=240 nj=2..12 num-params=10.3M dim=40+100->4187 combine=-0.95->-0.95 loglike:train/valid[159,239,combined]=(-1.01,-0.95,-0.94/-1.18,-1.16,-1.15) accuracy:train/valid[159,239,combined]=(0.71,0.72,0.72/0.67,0.68,0.68) + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1a_sp exp/nnet3_cleaned/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +# WER on dev(orig) 11.9 11.7 +# WER on dev(rescored) 11.2 10.9 +# WER on test(orig) 11.6 11.7 +# WER on test(rescored) 11.0 11.0 +# Final train prob -0.9255 -0.9416 +# Final valid prob -1.1842 -1.1496 +# Final train acc 0.7245 0.7241 +# Final valid acc 0.6771 0.6788 + + +# by default, with cleanup: +# local/nnet3/run_tdnn.sh + +# without cleanup: +# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1b #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +relu_dim=850 +srand=0 +reporting_email=dpovey@gmail.com +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=850 + relu-renorm-layer name=tdnn2 dim=850 input=Append(-1,2) + relu-renorm-layer name=tdnn3 dim=850 input=Append(-3,3) + relu-renorm-layer name=tdnn4 dim=850 input=Append(-7,2) + relu-renorm-layer name=tdnn5 dim=850 input=Append(-3,3) + relu-renorm-layer name=tdnn6 dim=850 + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=12 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh new file mode 100755 index 00000000000..35789342ffb --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh @@ -0,0 +1,186 @@ +#!/bin/bash + +# 1c is as 1b but using more 'chain-like' splicing and slightly +# smaller dim. Not better; maybe slightly worse. + +# note: the num-params is almost the same. +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn1{b,c}_sp +# exp/nnet3_cleaned/tdnn1b_sp: num-iters=240 nj=2..12 num-params=10.3M dim=40+100->4187 combine=-0.95->-0.95 loglike:train/valid[159,239,combined]=(-1.01,-0.95,-0.94/-1.18,-1.16,-1.15) accuracy:train/valid[159,239,combined]=(0.71,0.72,0.72/0.67,0.68,0.68) +# exp/nnet3_cleaned/tdnn1c_sp: num-iters=240 nj=2..12 num-params=10.1M dim=40+100->4187 combine=-1.16->-1.15 loglike:train/valid[159,239,combined]=(-1.22,-1.16,-1.15/-1.41,-1.38,-1.38) accuracy:train/valid[159,239,combined]=(0.66,0.67,0.68/0.62,0.63,0.63) + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1{b,c}_sp +# System tdnn1b_sp tdnn1c_sp +# WER on dev(orig) 11.7 11.9 +# WER on dev(rescored) 10.9 11.1 +# WER on test(orig) 11.7 11.8 +# WER on test(rescored) 11.0 11.2 +# Final train prob -0.9416 -1.1505 +# Final valid prob -1.1496 -1.3805 +# Final train acc 0.7241 0.6756 +# Final valid acc 0.6788 0.6255 + +# This is the standard "tdnn" system, built in nnet3; this script +# is the version that's meant to run with data-cleanup, that doesn't +# support parallel alignments. + + +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn1b_sp +# exp/nnet3_cleaned/tdnn1b_sp: num-iters=240 nj=2..12 num-params=10.3M dim=40+100->4187 combine=-0.95->-0.95 loglike:train/valid[159,239,combined]=(-1.01,-0.95,-0.94/-1.18,-1.16,-1.15) accuracy:train/valid[159,239,combined]=(0.71,0.72,0.72/0.67,0.68,0.68) + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1a_sp exp/nnet3_cleaned/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +# WER on dev(orig) 11.9 11.7 +# WER on dev(rescored) 11.2 10.9 +# WER on test(orig) 11.6 11.7 +# WER on test(rescored) 11.0 11.0 +# Final train prob -0.9255 -0.9416 +# Final valid prob -1.1842 -1.1496 +# Final train acc 0.7245 0.7241 +# Final valid acc 0.6771 0.6788 + + +# by default, with cleanup: +# local/nnet3/run_tdnn.sh + +# without cleanup: +# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1c #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +srand=0 +reporting_email=dpovey@gmail.com +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=750 + relu-renorm-layer name=tdnn2 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=750 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=750 input=Append(-6,-3,0) + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=12 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh new file mode 100755 index 00000000000..666c2f1bb31 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh @@ -0,0 +1,200 @@ +#!/bin/bash + + +# run_tdnn_lfr_1a.sh is similar in configuration to run_tdnn_1c.sh, but it's a +# low-frame-rate system (see egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh +# for an example of such a system). + + +# by default, with cleanup: +# local/nnet3/run_tdnn_lfr.sh + +# without cleanup: +# local/nnet3/run_tdnn_lfr.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1a #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +srand=0 +reporting_email=dpovey@gmail.com +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 13 ]; then + # Build a tree using our new topology and a reduced sampling rate. + # We use 4000 leaves, which is a little less than the number used + # in the baseline GMM system (5k) in this setup, since generally + # LFR systems do best with somewhat fewer leaves. + # + # To get the stats to build the tree this script only uses every third frame, + # but it dumps converted alignments that essentially have 3 different + # frame-shifted versions of the alignment interpolated together; these can be + # used without modification in getting labels for training. + steps/nnet3/chain/build_tree.sh \ + --repeat-frames true --frame-subsampling-factor 3 \ + --cmd "$train_cmd" 4000 data/${train_set}_sp_comb \ + $lang $ali_dir $treedir +fi + +if [ $stage -le 14 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=750 + relu-renorm-layer name=tdnn2 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=750 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=750 input=Append(-6,-3,0) + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 15 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=12 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$treedir \ + --lang=$lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + echo 3 >$dir/frame_subsampling_factor +fi + +if [ $stage -le 16 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh data/lang/phones.txt $lang/phones.txt + utils/mkgraph.sh --self-loop-scale 0.333 data/lang $dir $dir/graph +fi + + +if [ $stage -le 17 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $decode_nj \ + --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $dir/graph data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..28c45836cf7 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,219 @@ +#!/bin/bash + +# this is a TDNN+LSTM system; the configuration is similar to +# local/chain/tuning/run_tdnn_lstm_1e.sh, but a non-chain nnet3 system, and +# with 1.5 times larger hidden dimensions. + + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1b_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp +# WER on dev(orig) 11.0 11.0 +# [looped:] 11.0 11.1 +# WER on dev(rescored) 10.4 10.3 +# [looped:] 10.3 10.5 +# WER on test(orig) 10.7 10.6 +# [looped:] 10.7 10.7 +# WER on test(rescored) 10.1 9.9 +# [looped:] 10.0 10.0 +# Final train prob -0.6881 -0.6897 +# Final valid prob -0.7796 -0.7989 +# Final train acc 0.7954 0.7946 +# Final valid acc 0.7611 0.7582 + +# by default, with cleanup: +# local/nnet3/run_tdnn_lstm.sh + +# without cleanup: +# local/nnet3/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned + +# Options which are not passed through to run_ivector_common.sh +affix=1a +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 + +# training options +srand=0 +remove_egs=true + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=768 + relu-renorm-layer name=tdnn2 dim=768 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=10000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=15 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $chunk_left_context \ + --extra-right-context $chunk_right_context \ + --frames-per-chunk $frames_per_chunk \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +if [ $stage -le 15 ]; then + # 'looped' decoding. + # note: you should NOT do this decoding step for setups that have bidirectional + # recurrence, like BLSTMs-- it doesn't make sense and will give bd results. + # we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $graph_dir data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh new file mode 100755 index 00000000000..1826caf3d05 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh @@ -0,0 +1,246 @@ +#!/bin/bash + +# This script does discriminative training on top of CE nnet3 system. To +# simplify things, this assumes you are using the "cleaned" data (since this is +# generally better), i.e. it won't work if you used options to run_tdnn_lstm_1a.sh +# to use the non-cleaned data. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the alignment and the lattice generation/egs-dumping takes quite a bit +# of CPU time. + +# below is with the current settings (effective_learning_rate=0.0000025, last_layer_factor=0.5): +# steps/info/nnet3_disc_dir_info.pl exp/nnet3_cleaned/tdnn_lstm1a_sp_smbrslow +# exp/nnet3_cleaned/tdnn_lstm1a_sp_smbrslow:num-jobs=4;effective-lrate=2.5e-06;last-layer-factor=0.50;iters-per-epoch=55;epoch[0,1,2,3]:train-objf=[0.94,0.96,0.97,0.97],valid-objf=[0.91,0.93,0.93,0.93],train-counts=[0.40,0.25,0.17,0.12],valid-counts=[0.57,0.31,0.34,0.35] + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1a_sp_smbrslow:{1,2,3} +# System tdnn_lstm1a_sp tdnn_lstm1a_sp_smbrslow:1 tdnn_lstm1a_sp_smbrslow:2 tdnn_lstm1a_sp_smbrslow:3 +# WER on dev(orig) 11.0 9.4 9.4 9.4 +# [looped:] 11.0 9.4 9.5 9.4 +# WER on dev(rescored) 10.3 8.8 8.7 8.7 +# [looped:] 10.3 8.8 8.9 8.9 +# WER on test(orig) 10.8 9.6 9.7 9.6 +# [looped:] 10.7 9.6 9.6 9.7 +# WER on test(rescored) 10.1 9.1 9.2 9.1 +# [looped:] 10.0 9.1 9.2 9.1 + +# Below is with twice the lrate (5e-06) and the same last-layer-factor (0.5). Trained too fast. +# exp/nnet3_cleaned/tdnn_lstm1a_sp_smbr:num-jobs=4;effective-lrate=5e-06;last-layer-factor=0.50;iters-per-epoch=55;epoch[0,1,2,3]:train-objf=[0.94,0.97,0.97,0.98],valid-objf=[0.91,0.93,0.93,0.93],train-counts=[0.40,0.22,0.12,0.09],valid-counts=[0.57,0.31,0.27,0.32] +# I'm not showing the looped decoding results with this older step; +# there was a script bug (now fixed) and I don't want to rerun them. +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1a_sp_smbr:{1,2,3} +# System tdnn_lstm1a_sp tdnn_lstm1a_sp_smbr:1 tdnn_lstm1a_sp_smbr:2 tdnn_lstm1a_sp_smbr:3 +# WER on dev(orig) 11.0 9.4 9.4 9.5 +# WER on dev(rescored) 10.3 8.8 8.8 8.9 +# WER on test(orig) 10.8 9.6 9.8 9.8 +# WER on test(rescored) 10.1 9.1 9.3 9.4 + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +graph_dir=exp/tri3_cleaned/graph +srcdir=exp/nnet3_cleaned/tdnn_lstm1a_sp +train_data_dir=data/train_cleaned_sp_hires_comb +online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb + +## Objective options +criterion=smbr +one_silence_class=true + +# originally ran with effective_learning_rate=0.000005, +# changing to effective_learning_rate=0.0000025 and using affix=slow + +# you can set --disc-affix if you run different configurations. +disc_affix= + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_egs=200 # for alignments and denlat creation. +frames_per_chunk_decoding=50 # for decoding; should be the same as the value + # used in the script that trained the nnet. + # We didn't set the frames_per_chunk in + # run_tdnn_lstm_1a.sh, so it defaults to 50. +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +## note: --extra-left-context should be the same as the chunk_left_context (or in +## general, the argument of --egs.chunk-left-context) in the baseline script. +extra_left_context=40 +extra_right_context=0 + + + +## Nnet training options +effective_learning_rate=0.0000025 +last_layer_factor=0.5 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=3 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat </dev/null || true + + for x in `seq $decode_start_epoch $num_epochs`; do + for decode_set in dev test; do + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + iter=epoch$x + # We don't test the iter "epoch${x}_adj", although it's computed, + # because prior-adjustment doesn't make sense for chain models + # and it degrades the results. + ( + steps/nnet3/decode_looped.sh \ + --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/${decode_set}_hires $dir/decode_looped_${decode_set}_${iter} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${decode_set}_hires \ + ${dir}/decode_looped_${decode_set}_${iter} ${dir}/decode_looped_${decode_set}_${iter}_rescore || exit 1 + ) || touch $dir/.error & + done + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +if [ $stage -le 6 ] && $cleanup; then + # if you run with "--cleanup true --stage 6" you can clean up. + # actually, keep the alignments in case we need them later.. they're slow to + # create, and quite big. + # rm ${srcdir}_ali/ali.*.gz || true + + steps/nnet2/remove_egs.sh ${srcdir}_degs || true +fi + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh new file mode 100755 index 00000000000..8b8af6eff78 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh @@ -0,0 +1,240 @@ +#!/bin/bash + +# 1b is as 1a, but removing the decay-time option as a baseline. + +# the decay-time option does seem to be having the expected interaction with +# 'looped' decoding, i.e. with the decay-time option we don't get a degradation +# from looped decoding (if anything, with decay time, looped decoding is a +# little better than baseline decoding). + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1b_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp +# WER on dev(orig) 11.0 11.0 +# [looped:] 11.0 11.1 +# WER on dev(rescored) 10.3 10.3 +# [looped:] 10.3 10.5 +# WER on test(orig) 10.8 10.6 +# [looped:] 10.7 10.7 +# WER on test(rescored) 10.1 9.9 +# [looped:] 10.0 10.0 +# Final train prob -0.6881 -0.6897 +# Final valid prob -0.7796 -0.7989 +# Final train acc 0.7954 0.7946 +# Final valid acc 0.7611 0.7582 + + + +# this is a TDNN+LSTM system; the configuration is similar to +# local/chain/tuning/run_tdnn_lstm_1e.sh, but a non-chain nnet3 system, and +# with 1.5 times larger hidden dimensions. + +# by default, with cleanup: +# local/nnet3/run_tdnn_lstm.sh + +# without cleanup: +# local/nnet3/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned + +# Options which are not passed through to run_ivector_common.sh +affix=1b +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 +# decode chunk-size options (for non-looped decoding) +extra_left_context=50 +extra_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +extra_left_context= +extra_right_context= +frames_per_chunk= + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=768 + relu-renorm-layer name=tdnn2 dim=768 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 delay=-3 + relu-renorm-layer name=tdnn3 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 delay=-3 + relu-renorm-layer name=tdnn5 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=10000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=15 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # caution: we don't set the --frames-per-chunk here, we just use the + # default value of 50, which happens to be suitable because it's + # close to the primary chunk_width of 40. + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +if [ $stage -le 15 ]; then + # 'looped' decoding. + # note: you should NOT do this decoding step for setups that have bidirectional + # recurrence, like BLSTMs-- it doesn't make sense and will give bd results. + # we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $graph_dir data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh new file mode 100755 index 00000000000..07c3d4af233 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh @@ -0,0 +1,185 @@ +#!/bin/bash + +# This script does discriminative training on top of CE nnet3 system. To +# simplify things, this assumes you are using the "cleaned" data (since this is +# generally better), i.e. it won't work if you used options to run_tdnn_lstm_1b.sh +# to use the non-cleaned data. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the alignment and the lattice generation/egs-dumping takes quite a bit +# of CPU time. + + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +graph_dir=exp/tri3_cleaned/graph +srcdir=exp/nnet3_cleaned/tdnn_lstm1b_sp +train_data_dir=data/train_cleaned_sp_hires_comb +online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb + +## Objective options +criterion=smbr +one_silence_class=true + +# you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b" +# note, I ran without affix with learning rate 0.0000125, with disc_affic=slow +# with learning rate 0.000005, and with disc_affix=slow2 with learning rate 0.0000025. +# disc_affix=slow3 is with effective_learning_rate=0.000005 and last_layer_factor=0.1 + +disc_affix=slow3 + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_egs=200 # for alignments and denlat creation. +frames_per_chunk_decoding=50 # for decoding; should be the same as the value + # used in the script that trained the nnet. + # We didn't set the frames_per_chunk in + # run_tdnn_lstm_1b.sh, so it defaults to 50. +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +## note: --extra-left-context should be the same as the chunk_left_context (or in +## general, the argument of --egs.chunk-left-context) in the baseline script. +extra_left_context=40 +extra_right_context=0 + + + +## Nnet training options +effective_learning_rate=0.000005 +last_layer_factor=0.1 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=2 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat <" + echo " e.g.: steps/train_pca_transform.sh data/train_si84 exp/tri2b" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --config # config containing options" + echo " --stage # stage to do partial re-run from." + exit 1; +fi + +data=$1 +dir=$2 + +for f in $data/feats.scp ; do + [ ! -f "$f" ] && echo "$0: expecting file $f to exist" && exit 1 +done + +mkdir -p $dir/log + +echo "$splice_opts" >$dir/splice_opts # keep track of frame-splicing options + # so that later stages of system building can know what they were. +echo $online_cmvn_opts > $dir/online_cmvn.conf # keep track of options to CMVN. + +# create global_cmvn.stats +if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then + echo "$0: Error summing cmvn stats" + exit 1 +fi + +feats="ark,s,cs:utils/subset_scp.pl --quiet $max_utts $data/feats.scp | apply-cmvn-online $online_cmvn_opts $dir/global_cmvn.stats scp:- ark:- | splice-feats $splice_opts ark:- ark:- | subsample-feats --n=$subsample ark:- ark:- |" + +if [ $stage -le 0 ]; then + $cmd $dir/log/pca_est.log \ + est-pca --dim=$dim --normalize-variance=$normalize_variance \ + --normalize-mean=$normalize_mean "$feats" $dir/final.mat || exit 1; +fi + +echo "Done estimating PCA transform in $dir" + +exit 0 diff --git a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh index cd18ae21f39..cc63adb3e17 100755 --- a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh +++ b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh @@ -77,6 +77,7 @@ if [ ! -z "$iedir" ]; then fi utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; +mkdir -p $dir cp $lang/phones.txt $dir || exit 1; dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the diff --git a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh index 22250ae9ee3..80a023fed8a 100755 --- a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh +++ b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh @@ -10,15 +10,15 @@ # This script was modified from ../../sre08/v1/sid/train_diag_ubm.sh. It trains # a diagonal UBM on top of features processed with apply-cmvn-online and then -# transformed with an LDA+MLLT matrix (obtained from the source directory). -# This script does not use the trained model from the source directory to -# initialize the diagonal GMM; instead, we initialize the GMM using +# transformed with an LDA+MLLT or PCA matrix (obtained from the source +# directory). This script does not use the trained model from the source +# directory to initialize the diagonal GMM; instead, we initialize the GMM using # gmm-global-init-from-feats, which sets the means to random data points and # then does some iterations of E-M in memory. After the in-memory -# initialization we train for a few iterations in parallel. -# Note that there is a slight mismatch in that the source LDA+MLLT matrix -# (final.mat) will have been estimated using standard CMVN, and we're using -# online CMVN. We don't think this will have much effect. +# initialization we train for a few iterations in parallel. Note that if an +# LDA+MLLT transform matrix is used, there will be a slight mismatch in that the +# source LDA+MLLT matrix (final.mat) will have been estimated using standard +# CMVN, and we're using online CMVN. We don't think this will have much effect. # Begin configuration section. @@ -58,7 +58,7 @@ if [ $# != 4 ]; then echo " --stage # stage to do partial re-run from." echo " --num-gselect # Number of Gaussians per frame to" echo " # limit computation to, for speed" - echo " --subsample # In main E-M phase, use every n" + echo " --subsample # In main E-M phase, use every n" echo " # frames (a speedup)" echo " --num-frames # Maximum num-frames to keep in memory" echo " # for model initialization" @@ -89,6 +89,15 @@ for f in $data/feats.scp "$online_cmvn_config" $srcdir/splice_opts $srcdir/final [ ! -f "$f" ] && echo "$0: expecting file $f to exist" && exit 1 done +if [ -d "$dir" ]; then + bak_dir=$(mktemp -d ${dir}/backup.XXX); + echo "$0: Directory $dir already exists. Backing up diagonal UBM in ${bak_dir}"; + for f in $dir/final.mat $dir/final.dubm $dir/online_cmvn.conf $dir/global_cmvn.stats; do + [ -f "$f" ] && mv $f ${bak_dir}/ + done + [ -d "$dir/log" ] && mv $dir/log ${bak_dir}/ +fi + splice_opts=$(cat $srcdir/splice_opts) cp $srcdir/splice_opts $dir/ || exit 1; cp $srcdir/final.mat $dir/ || exit 1; @@ -146,10 +155,16 @@ for x in `seq 0 $[$num_iters-1]`; do $cmd $dir/log/update.$x.log \ gmm-global-est $opt --min-gaussian-weight=$min_gaussian_weight $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \ $dir/$[$x+1].dubm || exit 1; - rm $dir/$x.*.acc $dir/$x.dubm + + if $cleanup; then + rm $dir/$x.*.acc $dir/$x.dubm + fi fi done -rm $dir/gselect.*.gz +if $cleanup; then + rm $dir/gselect.*.gz +fi + mv $dir/$num_iters.dubm $dir/final.dubm || exit 1; exit 0; diff --git a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh index 9b354c0753e..5dbda1780f4 100755 --- a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh +++ b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh @@ -21,7 +21,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -32,8 +32,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -88,6 +88,17 @@ for f in $srcdir/final.dubm $srcdir/final.mat $srcdir/global_cmvn.stats $srcdir/ [ ! -f $f ] && echo "No such file $f" && exit 1; done + +if [ -d "$dir" ]; then + bak_dir=$(mktemp -d ${dir}/backup.XXX); + echo "$0: Directory $dir already exists. Backing up iVector extractor in ${bak_dir}"; + for f in $dir/final.ie $dir/*.ie $dir/final.mat $dir/final.dubm \ + $dir/online_cmvn.conf $dir/global_cmvn.stats; do + [ -f "$f" ] && mv $f ${bak_dir}/ + done + [ -d "$dir/log" ] && mv $dir/log ${bak_dir}/ +fi + # Set various variables. mkdir -p $dir/log nj_full=$[$nj*$num_processes] @@ -105,7 +116,6 @@ gmm_feats="ark,s,cs:apply-cmvn-online --config=$dir/online_cmvn.conf $dir/global feats="ark,s,cs:splice-feats $splice_opts scp:$sdata/JOB/feats.scp ark:- | transform-feats $dir/final.mat ark:- ark:- | subsample-feats --n=$subsample ark:- ark:- |" - # Initialize the i-vector extractor using the input GMM, which is converted to # full because that's what the i-vector extractor expects. Note: we have to do # --use-weights=false to disable regression of the log weights on the ivector, @@ -115,7 +125,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=false \ "gmm-global-to-fgmm $dir/final.dubm -|" $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -168,19 +178,27 @@ while [ $x -lt $num_iters ]; do # each accumulation process uses, since we # can be sure the queue will support this many. # - # The parallel-opts was either specified by + # The parallel-opts was either specified by # the user or we computed it correctly in # tge previous stages $cmd --num-threads $[$num_threads*$num_processes] $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie + rm $dir/acc.$x $dir/$x.ie fi fi x=$[$x+1] done +if $cleanup; then + rm $dir/post.*.gz +fi + rm $dir/final.ie 2>/dev/null ln -s $x.ie $dir/final.ie + +# assign a unique id to this extractor +# we are not interested in the id itself, just pre-caching ... +steps/nnet2/get_ivector_id.sh $dir > /dev/null || exit 1 + diff --git a/egs/wsj/s5/steps/online/nnet3/decode.sh b/egs/wsj/s5/steps/online/nnet3/decode.sh index a4777f1edf7..118cf9e1260 100755 --- a/egs/wsj/s5/steps/online/nnet3/decode.sh +++ b/egs/wsj/s5/steps/online/nnet3/decode.sh @@ -8,6 +8,8 @@ stage=0 nj=4 cmd=run.pl +frames_per_chunk=20 +extra_left_context_initial=0 min_active=200 max_active=7000 beam=15.0 @@ -114,11 +116,6 @@ else fi -decoder=online2-wav-nnet3-latgen-faster -parallel_opts= -opts="--online=$online" - - if [ "$post_decode_acwt" == 1.0 ]; then lat_wspecifier="ark:|gzip -c >$dir/lat.JOB.gz" else @@ -132,8 +129,12 @@ if [ -f $srcdir/frame_subsampling_factor ]; then fi if [ $stage -le 0 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ - $decoder $opts $silence_weighting_opts --do-endpointing=$do_endpointing $frame_subsampling_opt \ + $cmd JOB=1:$nj $dir/log/decode.JOB.log \ + online2-wav-nnet3-latgen-faster $silence_weighting_opts --do-endpointing=$do_endpointing \ + --frames-per-chunk=$frames_per_chunk \ + --extra-left-context-initial=$extra_left_context_initial \ + --online=$online \ + $frame_subsampling_opt \ --config=$online_config \ --min-active=$min_active --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --word-symbol-table=$graphdir/words.txt \ diff --git a/egs/wsj/s5/steps/resolve_ctm_overlaps.py b/egs/wsj/s5/steps/resolve_ctm_overlaps.py new file mode 100755 index 00000000000..aaee767e7e4 --- /dev/null +++ b/egs/wsj/s5/steps/resolve_ctm_overlaps.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# Copyright 2014 Johns Hopkins University (Authors: Daniel Povey, Vijayaditya Peddinti). +# 2016 Vimal Manohar +# Apache 2.0. + +# Script to combine ctms with overlapping segments + +import sys, math, numpy as np, argparse +break_threshold = 0.01 + +def ReadSegments(segments_file): + segments = {} + for line in open(segments_file).readlines(): + parts = line.strip().split() + segments[parts[0]] = (parts[1], float(parts[2]), float(parts[3])) + return segments + +#def get_breaks(ctm, prev_end): +# breaks = [] +# for i in xrange(0, len(ctm)): +# if ctm[i][2] - prev_end > break_threshold: +# breaks.append([i, ctm[i][2]]) +# prev_end = ctm[i][2] + ctm[i][3] +# return np.array(breaks) + +# Resolve overlaps within segments of the same recording +def ResolveOverlaps(ctms, segments): + total_ctm = [] + if len(ctms) == 0: + raise Exception('Something wrong with the input ctms') + + next_utt = ctms[0][0][0] + for ctm_index in range(len(ctms) - 1): + # Assumption here is that the segments are written in consecutive order? + cur_ctm = ctms[ctm_index] + next_ctm = ctms[ctm_index + 1] + + cur_utt = next_utt + next_utt = next_ctm[0][0] + if (next_utt not in segments): + raise Exception('Could not find utterance %s in segments' % next_utt) + + if len(cur_ctm) > 0: + assert(cur_utt == cur_ctm[0][0]) + + assert(next_utt > cur_utt) + if (cur_utt not in segments): + raise Exception('Could not find utterance %s in segments' % cur_utt) + + # length of this segment + window_length = segments[cur_utt][2] - segments[cur_utt][1] + + # overlap of this segment with the next segment + # Note: It is possible for this to be negative when there is actually + # no overlap between consecutive segments. + overlap = segments[cur_utt][2] - segments[next_utt][1] + + # find the breaks after overlap starts + index = len(cur_ctm) + + for i in xrange(len(cur_ctm)): + if (cur_ctm[i][2] + cur_ctm[i][3]/2.0 > (window_length - overlap/2.0)): + # if midpoint of a hypothesis word is beyond the midpoint of the + # overlap region + index = i + break + + # Ignore the hypotheses beyond this midpoint. They will be considered as + # part of the next segment. + total_ctm += cur_ctm[:index] + + # Ignore the hypotheses of the next utterance that overlaps with the + # current utterance + index = -1 + for i in xrange(len(next_ctm)): + if (next_ctm[i][2] + next_ctm[i][3]/2.0 > (overlap/2.0)): + index = i + break + + if index >= 0: + ctms[ctm_index + 1] = next_ctm[index:] + else: + ctms[ctm_index + 1] = [] + + # merge the last ctm entirely + total_ctm += ctms[-1] + + return total_ctm + +def ReadCtm(ctm_file_lines, segments): + ctms = {} + for key in [ x[0] for x in segments.values() ]: + ctms[key] = [] + + ctm = [] + prev_utt = ctm_file_lines[0].split()[0] + for line in ctm_file_lines: + parts = line.split() + if (prev_utt == parts[0]): + ctm.append([parts[0], parts[1], float(parts[2]), + float(parts[3])] + parts[4:]) + else: + # New utterance. Append the previous utterance's CTM + # into the list for the utterance's recording + ctms[segments[ctm[0][0]][0]].append(ctm) + + assert(parts[0] > prev_utt) + + prev_utt = parts[0] + ctm = [] + ctm.append([parts[0], parts[1], float(parts[2]), + float(parts[3])] + parts[4:]) + + # append the last ctm + ctms[segments[ctm[0][0]][0]].append(ctm) + return ctms + +def WriteCtm(ctm_lines, out_file): + for line in ctm_lines: + out_file.write("{0} {1} {2} {3} {4}\n".format(line[0], line[1], line[2], line[3], " ".join(line[4:]))) + +if __name__ == "__main__": + usage = """ Python script to resolve overlaps in ctms """ + parser = argparse.ArgumentParser(usage) + parser.add_argument('segments', type=str, help = 'use segments to resolve overlaps') + parser.add_argument('ctm_in', type=str, help='input_ctm_file') + parser.add_argument('ctm_out', type=str, help='output_ctm_file') + params = parser.parse_args() + + if params.ctm_in == "-": + params.ctm_in = sys.stdin + else: + params.ctm_in = open(params.ctm_in) + if params.ctm_out == "-": + params.ctm_out = sys.stdout + else: + params.ctm_out = open(params.ctm_out, 'w') + + segments = ReadSegments(params.segments) + + # Read CTMs into a dictionary indexed by the recording + ctms = ReadCtm(params.ctm_in.readlines(), segments) + + for key in sorted(ctms.keys()): + # Process CTMs in the sorted order of recordings + ctm_reco = ctms[key] + ctm_reco = ResolveOverlaps(ctm_reco, segments) + WriteCtm(ctm_reco, params.ctm_out) + params.ctm_out.close() diff --git a/egs/wsj/s5/steps/resolve_ctm_overlaps.py.old b/egs/wsj/s5/steps/resolve_ctm_overlaps.py.old new file mode 100755 index 00000000000..aaee767e7e4 --- /dev/null +++ b/egs/wsj/s5/steps/resolve_ctm_overlaps.py.old @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# Copyright 2014 Johns Hopkins University (Authors: Daniel Povey, Vijayaditya Peddinti). +# 2016 Vimal Manohar +# Apache 2.0. + +# Script to combine ctms with overlapping segments + +import sys, math, numpy as np, argparse +break_threshold = 0.01 + +def ReadSegments(segments_file): + segments = {} + for line in open(segments_file).readlines(): + parts = line.strip().split() + segments[parts[0]] = (parts[1], float(parts[2]), float(parts[3])) + return segments + +#def get_breaks(ctm, prev_end): +# breaks = [] +# for i in xrange(0, len(ctm)): +# if ctm[i][2] - prev_end > break_threshold: +# breaks.append([i, ctm[i][2]]) +# prev_end = ctm[i][2] + ctm[i][3] +# return np.array(breaks) + +# Resolve overlaps within segments of the same recording +def ResolveOverlaps(ctms, segments): + total_ctm = [] + if len(ctms) == 0: + raise Exception('Something wrong with the input ctms') + + next_utt = ctms[0][0][0] + for ctm_index in range(len(ctms) - 1): + # Assumption here is that the segments are written in consecutive order? + cur_ctm = ctms[ctm_index] + next_ctm = ctms[ctm_index + 1] + + cur_utt = next_utt + next_utt = next_ctm[0][0] + if (next_utt not in segments): + raise Exception('Could not find utterance %s in segments' % next_utt) + + if len(cur_ctm) > 0: + assert(cur_utt == cur_ctm[0][0]) + + assert(next_utt > cur_utt) + if (cur_utt not in segments): + raise Exception('Could not find utterance %s in segments' % cur_utt) + + # length of this segment + window_length = segments[cur_utt][2] - segments[cur_utt][1] + + # overlap of this segment with the next segment + # Note: It is possible for this to be negative when there is actually + # no overlap between consecutive segments. + overlap = segments[cur_utt][2] - segments[next_utt][1] + + # find the breaks after overlap starts + index = len(cur_ctm) + + for i in xrange(len(cur_ctm)): + if (cur_ctm[i][2] + cur_ctm[i][3]/2.0 > (window_length - overlap/2.0)): + # if midpoint of a hypothesis word is beyond the midpoint of the + # overlap region + index = i + break + + # Ignore the hypotheses beyond this midpoint. They will be considered as + # part of the next segment. + total_ctm += cur_ctm[:index] + + # Ignore the hypotheses of the next utterance that overlaps with the + # current utterance + index = -1 + for i in xrange(len(next_ctm)): + if (next_ctm[i][2] + next_ctm[i][3]/2.0 > (overlap/2.0)): + index = i + break + + if index >= 0: + ctms[ctm_index + 1] = next_ctm[index:] + else: + ctms[ctm_index + 1] = [] + + # merge the last ctm entirely + total_ctm += ctms[-1] + + return total_ctm + +def ReadCtm(ctm_file_lines, segments): + ctms = {} + for key in [ x[0] for x in segments.values() ]: + ctms[key] = [] + + ctm = [] + prev_utt = ctm_file_lines[0].split()[0] + for line in ctm_file_lines: + parts = line.split() + if (prev_utt == parts[0]): + ctm.append([parts[0], parts[1], float(parts[2]), + float(parts[3])] + parts[4:]) + else: + # New utterance. Append the previous utterance's CTM + # into the list for the utterance's recording + ctms[segments[ctm[0][0]][0]].append(ctm) + + assert(parts[0] > prev_utt) + + prev_utt = parts[0] + ctm = [] + ctm.append([parts[0], parts[1], float(parts[2]), + float(parts[3])] + parts[4:]) + + # append the last ctm + ctms[segments[ctm[0][0]][0]].append(ctm) + return ctms + +def WriteCtm(ctm_lines, out_file): + for line in ctm_lines: + out_file.write("{0} {1} {2} {3} {4}\n".format(line[0], line[1], line[2], line[3], " ".join(line[4:]))) + +if __name__ == "__main__": + usage = """ Python script to resolve overlaps in ctms """ + parser = argparse.ArgumentParser(usage) + parser.add_argument('segments', type=str, help = 'use segments to resolve overlaps') + parser.add_argument('ctm_in', type=str, help='input_ctm_file') + parser.add_argument('ctm_out', type=str, help='output_ctm_file') + params = parser.parse_args() + + if params.ctm_in == "-": + params.ctm_in = sys.stdin + else: + params.ctm_in = open(params.ctm_in) + if params.ctm_out == "-": + params.ctm_out = sys.stdout + else: + params.ctm_out = open(params.ctm_out, 'w') + + segments = ReadSegments(params.segments) + + # Read CTMs into a dictionary indexed by the recording + ctms = ReadCtm(params.ctm_in.readlines(), segments) + + for key in sorted(ctms.keys()): + # Process CTMs in the sorted order of recordings + ctm_reco = ctms[key] + ctm_reco = ResolveOverlaps(ctm_reco, segments) + WriteCtm(ctm_reco, params.ctm_out) + params.ctm_out.close() diff --git a/egs/wsj/s5/steps/search_index.sh b/egs/wsj/s5/steps/search_index.sh index 6d4c344c5db..5db3d39b15a 100755 --- a/egs/wsj/s5/steps/search_index.sh +++ b/egs/wsj/s5/steps/search_index.sh @@ -8,6 +8,7 @@ cmd=run.pl nbest=-1 strict=true indices_dir= +frame_subsampling_factor=1 # End configuration section. echo "$0 $@" # Print the command line for logging @@ -36,15 +37,23 @@ fi mkdir -p $kwsdir/log; nj=`cat $indices_dir/num_jobs` || exit 1; -keywords=$kwsdatadir/keywords.fsts; +if [ -f $kwsdatadir/keywords.fsts.gz ]; then + keywords="\"gunzip -c $kwsdatadir/keywords.fsts.gz|\"" +elif [ -f $kwsdatadir/keywords.fsts ]; then + keywords=$kwsdatadir/keywords.fsts; +else + echo "$0: no such file $kwsdatadir/keywords.fsts[.gz]" && exit 1; +fi -for f in $indices_dir/index.1.gz $keywords; do +for f in $indices_dir/index.1.gz ; do [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1; done $cmd JOB=1:$nj $kwsdir/log/search.JOB.log \ kws-search --strict=$strict --negative-tolerance=-1 \ + --frame-subsampling-factor=${frame_subsampling_factor} \ "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \ - "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id > $kwsdir/result.JOB" || exit 1; + "ark,t:|gzip -c > $kwsdir/result.JOB.gz" \ + "ark,t:|gzip -c > $kwsdir/stats.JOB.gz" || exit 1; exit 0; diff --git a/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh new file mode 100755 index 00000000000..7cf151f1ad0 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh @@ -0,0 +1,154 @@ +#! /bin/bash + +window=2.5 +overlap=0.0 +stage=-1 +cmd=queue.pl +reco_nj=4 +frame_shift=0.01 +utt_nj=18 +min_clusters=10 +clustering_opts="--stopping-threshold=0.5 --max-merge-thresh=0.25 --normalize-by-entropy" + +. path.sh +. utils/parse_options.sh + +set -o pipefail +set -e +set -u + +if [ $# -ne 3 ]; then + echo "Usage: $0 " + exit 1 +fi + +data=$1 +dir=$2 +out_data=$3 + +num_frames=`perl -e "print int($window / $frame_shift + 0.5)"` +num_frames_overlap=`perl -e "print int($overlap/ $frame_shift + 0.5)"` + +data_uniform_seg=$dir/`basename ${data}`_uniform_seg_window${window}_ovlp${overlap} + +mkdir -p ${data_uniform_seg} + +mkdir -p $dir + +#segmentation-cluster-adjacent-segments --verbose=0 'ark:segmentation-copy --keep-label=1 "ark:gunzip -c exp/nnet3_lstm_sad_music/nnet_lstm_1e//segmentation_bn_eval97_whole_bp/orig_segmentation.1.gz |" ark:- | segmentation-split-segments --max-segment-length=250 --overlap-length=0 ark:- ark:- |' scp:data/bn_eval97_bp_hires/feats.scp "ark:| segmentation-post-process --merge-adjacent-segments ark:- ark:- | segmentation-to-segments ark:- ark,t:- /dev/null" 2>&1 | less + +if [ $stage -le 0 ]; then + $cmd $dir/log/get_subsegments.log \ + segmentation-init-from-segments --frame-overlap=0.015 $data/segments ark:- \| \ + segmentation-split-segments --max-segment-length=$num_frames --overlap-length=$num_frames_overlap ark:- ark:- \| \ + segmentation-to-segments --frame-overlap=0.0 ark:- ark:/dev/null \ + ${data_uniform_seg}/sub_segments + + utils/data/subsegment_data_dir.sh ${data} ${data_uniform_seg}{/sub_segments,} +fi + +gmm_dir=$dir/gmms +mkdir -p $gmm_dir + +utils/split_data.sh --per-reco ${data_uniform_seg} $reco_nj + +if [ $stage -le 1 ]; then + echo $reco_nj > $gmm_dir/num_jobs + $cmd JOB=1:$reco_nj $gmm_dir/log/train_gmm.JOB.log \ + gmm-global-init-models-from-feats --share-covars=true \ + --spk2utt-rspecifier=ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt \ + --num-gauss-init=64 --num-gauss=64 --num-gauss-fraction=0.001 --max-gauss=512 --min-gauss=64 \ + --num-iters=20 --num-frames=500000 \ + scp:${data_uniform_seg}/split${reco_nj}reco/JOB/feats.scp \ + ark,scp:$gmm_dir/gmm.JOB.ark,$gmm_dir/gmm.JOB.scp + + for n in `seq $reco_nj`; do + cat $gmm_dir/gmm.$n.scp + done > $gmm_dir/gmm.scp + +fi + +post_dir=$gmm_dir/post_`basename $data_uniform_seg` +mkdir -p $post_dir + +if [ $stage -le 2 ]; then + echo $reco_nj > $post_dir/num_jobs + + $cmd JOB=1:$reco_nj $gmm_dir/log/compute_post.JOB.log \ + gmm-global-get-post \ + --utt2spk="ark,t:cut -d ' ' -f 1,2 ${data_uniform_seg}/split${reco_nj}reco/JOB/segments |" \ + scp:$gmm_dir/gmm.scp \ + scp:${data_uniform_seg}/split${reco_nj}reco/JOB/feats.scp \ + "ark:| gzip -c > $post_dir/post.JOB.gz" \ + "ark:| gzip -c > $post_dir/frame_loglikes.JOB.gz" +fi + +if [ $stage -le 3 ]; then + utils/data/get_utt2num_frames.sh --nj $utt_nj --cmd "$cmd" ${data_uniform_seg} + + $cmd JOB=1:$reco_nj $post_dir/log/compute_average_post.JOB.log \ + gmm-global-post-to-feats \ + --utt2spk="ark,t:cut -d ' ' -f 1,2 ${data_uniform_seg}/split${reco_nj}reco/JOB/segments |" \ + scp:$gmm_dir/gmm.scp "ark:gunzip -c $post_dir/post.JOB.gz |" ark:- \| \ + matrix-sum-rows --do-average ark:- "ark:| gzip -c > $post_dir/avg_post.JOB.gz" +fi + +seg_dir=$dir/segmentation_`basename $data_uniform_seg` + +if [ $stage -le 4 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/compute_scores.JOB.log \ + ib-scoring-dense --input-factor=0.0 $clustering_opts \ + --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ + "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ + "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ + ark,t:$seg_dir/scores.JOB.txt ark:/dev/null +fi + +if [ $stage -le 5 ]; then + threshold=$(for n in `seq $reco_nj`; do + /export/a12/vmanoha1/kaldi-diarization-v2/src/ivectorbin/compute-calibration \ + ark,t:$seg_dir/scores.$n.txt -; done | \ + awk '{i += $1; j++;} END{print i / j}') + echo $threshold > $seg_dir/threshold +fi + +threshold=$(cat $seg_dir/threshold) +if [ $stage -le 6 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/cluster_segments.JOB.log \ + agglomerative-cluster-ib --input-factor=0.0 --min-clusters=$min_clusters $clustering_opts \ + --max-merge-thresh=$threshold --verbose=3 \ + --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ + "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ + "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ + ark,t:$seg_dir/utt2cluster_id.JOB +fi + +if [ $stage -le 7 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/init_segmentation.JOB.log \ + segmentation-init-from-segments --frame-overlap=0.0 --shift-to-zero=false \ + --utt2label-rspecifier=ark,t:${seg_dir}/utt2cluster_id.JOB \ + ${data_uniform_seg}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt \ + ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- ark:- \| \ + segmentation-post-process --max-segment-length=1000 --overlap-length=250 ark:- ark:- \| \ + segmentation-to-segments ark:- ark,t:$seg_dir/utt2spk.JOB $seg_dir/segments.JOB +fi + +if [ $stage -le 8 ]; then + rm -r $out_data || true + utils/data/convert_data_dir_to_whole.sh $data $out_data + rm $out_data/{text,cmvn.scp} || true + + for n in `seq $reco_nj`; do + cat $seg_dir/utt2spk.$n + done > $out_data/utt2spk + + for n in `seq $reco_nj`; do + cat $seg_dir/segments.$n + done > $out_data/segments + + utils/utt2spk_to_spk2utt.pl $out_data/utt2spk > $out_data/spk2utt + utils/fix_data_dir.sh $out_data +fi diff --git a/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh new file mode 100755 index 00000000000..9ca3efb7b9a --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh @@ -0,0 +1,161 @@ +#! /bin/bash + +window=2.5 +overlap=0.0 +stage=-1 +cmd=queue.pl +reco_nj=4 +frame_shift=0.01 +frame_overlap=0.0 +utt_nj=18 +min_clusters=10 +clustering_opts="--stopping-threshold=0.5 --max-merge-thresh=0.25 --normalize-by-entropy" + +. path.sh +. utils/parse_options.sh + +set -o pipefail +set -e +set -u + +if [ $# -ne 3 ]; then + echo "Usage: $0 " + exit 1 +fi + +data=$1 +dir=$2 +out_data=$3 + +num_frames=`perl -e "print int($window / $frame_shift + 0.5)"` +num_frames_overlap=`perl -e "print int($overlap/ $frame_shift + 0.5)"` + +data_id=`basename $data` +data_uniform_seg=$dir/${data_id}_uniform_seg_window${window}_ovlp${overlap} + +mkdir -p $dir + +#segmentation-cluster-adjacent-segments --verbose=0 'ark:segmentation-copy --keep-label=1 "ark:gunzip -c exp/nnet3_lstm_sad_music/nnet_lstm_1e//segmentation_bn_eval97_whole_bp/orig_segmentation.1.gz |" ark:- | segmentation-split-segments --max-segment-length=250 --overlap-length=0 ark:- ark:- |' scp:data/bn_eval97_bp_hires/feats.scp "ark:| segmentation-post-process --merge-adjacent-segments ark:- ark:- | segmentation-to-segments ark:- ark,t:- /dev/null" 2>&1 | less + +if [ $stage -le 0 ]; then + rm -r ${data_uniform_seg} || true + mkdir -p ${data_uniform_seg} + + $cmd $dir/log/get_subsegments.log \ + segmentation-init-from-segments --frame-overlap=$frame_overlap $data/segments ark:- \| \ + segmentation-split-segments --max-segment-length=$num_frames --overlap-length=$num_frames_overlap ark:- ark:- \| \ + segmentation-cluster-adjacent-segments --verbose=3 ark:- "scp:$data/feats.scp" ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- ark:- \| \ + segmentation-to-segments --frame-overlap=0.0 ark:- ark:/dev/null \ + ${data_uniform_seg}/sub_segments + + utils/data/subsegment_data_dir.sh ${data} ${data_uniform_seg}{/sub_segments,} +fi + +gmm_dir=$dir/gmms +mkdir -p $gmm_dir + +utils/split_data.sh --per-reco ${data_uniform_seg} $reco_nj + +if [ $stage -le 1 ]; then + echo $reco_nj > $gmm_dir/num_jobs + $cmd JOB=1:$reco_nj $gmm_dir/log/train_gmm.JOB.log \ + gmm-global-init-models-from-feats --share-covars=true \ + --spk2utt-rspecifier=ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt \ + --num-gauss-init=64 --num-gauss=64 --num-gauss-fraction=0.001 --max-gauss=512 --min-gauss=64 \ + --num-iters=20 --num-frames=500000 \ + scp:${data_uniform_seg}/split${reco_nj}reco/JOB/feats.scp \ + ark,scp:$gmm_dir/gmm.JOB.ark,$gmm_dir/gmm.JOB.scp + + for n in `seq $reco_nj`; do + cat $gmm_dir/gmm.$n.scp + done > $gmm_dir/gmm.scp + +fi + +post_dir=$gmm_dir/post_`basename $data_uniform_seg` +mkdir -p $post_dir + +if [ $stage -le 2 ]; then + echo $reco_nj > $post_dir/num_jobs + + $cmd JOB=1:$reco_nj $gmm_dir/log/compute_post.JOB.log \ + gmm-global-get-post \ + --utt2spk="ark,t:cut -d ' ' -f 1,2 ${data_uniform_seg}/split${reco_nj}reco/JOB/segments |" \ + scp:$gmm_dir/gmm.scp \ + scp:${data_uniform_seg}/split${reco_nj}reco/JOB/feats.scp \ + "ark:| gzip -c > $post_dir/post.JOB.gz" \ + "ark:| gzip -c > $post_dir/frame_loglikes.JOB.gz" +fi + +if [ $stage -le 3 ]; then + $cmd JOB=1:$reco_nj $post_dir/log/compute_average_post.JOB.log \ + gmm-global-post-to-feats \ + --utt2spk="ark,t:cut -d ' ' -f 1,2 ${data_uniform_seg}/split${reco_nj}reco/JOB/segments |" \ + scp:$gmm_dir/gmm.scp "ark:gunzip -c $post_dir/post.JOB.gz |" ark:- \| \ + matrix-sum-rows --do-average ark:- "ark:| gzip -c > $post_dir/avg_post.JOB.gz" +fi + +seg_dir=$dir/segmentation_`basename $data_uniform_seg` + +if [ $stage -le 4 ]; then + utils/data/get_utt2num_frames.sh --nj $utt_nj --cmd "$cmd" ${data_uniform_seg} + + $cmd JOB=1:$reco_nj $seg_dir/log/compute_scores.JOB.log \ + ib-scoring-dense --input-factor=0 $clustering_opts \ + --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ + "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ + "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ + ark,t:$seg_dir/scores.JOB.txt ark:/dev/null +fi + +if [ $stage -le 5 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/calibrate.JOB.log \ + /export/a12/vmanoha1/kaldi-diarization-v2/src/ivectorbin/compute-calibration \ + ark,t:$seg_dir/scores.JOB.txt $seg_dir/threshold.JOB.txt + + threshold=$(for n in `seq $reco_nj`; do cat $seg_dir/threshold.$n.txt; done | \ + awk '{i += $1; j++;} END{print i / j}') + echo $threshold > $seg_dir/threshold +fi + +threshold=$(cat $seg_dir/threshold) +if [ $stage -le 6 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/cluster_segments.JOB.log \ + agglomerative-cluster-ib --input-factor=0.0 $clustering_opts \ + --max-merge-thresh=$threshold --verbose=3 \ + --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ + "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ + "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ + ark,t:$seg_dir/utt2cluster_id.JOB +fi + +if [ $stage -le 7 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/init_segmentation.JOB.log \ + segmentation-init-from-segments --frame-overlap=0.0 --shift-to-zero=false \ + --utt2label-rspecifier=ark,t:${seg_dir}/utt2cluster_id.JOB \ + ${data_uniform_seg}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt \ + ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- ark:- \| \ + segmentation-post-process --max-segment-length=1000 --overlap-length=250 ark:- ark:- \| \ + segmentation-to-segments ark:- ark,t:$seg_dir/utt2spk.JOB $seg_dir/segments.JOB +fi + +if [ $stage -le 8 ]; then + rm -r $out_data || true + utils/data/convert_data_dir_to_whole.sh $data $out_data + rm $out_data/{text,cmvn.scp} || true + + for n in `seq $reco_nj`; do + cat $seg_dir/utt2spk.$n + done > $out_data/utt2spk + + for n in `seq $reco_nj`; do + cat $seg_dir/segments.$n + done > $out_data/segments + + utils/utt2spk_to_spk2utt.pl $out_data/utt2spk > $out_data/spk2utt + utils/fix_data_dir.sh $out_data +fi diff --git a/egs/wsj/s5/steps/segmentation/convert_ali_to_vec.pl b/egs/wsj/s5/steps/segmentation/convert_ali_to_vec.pl new file mode 100755 index 00000000000..c0d1a9eeae2 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/convert_ali_to_vec.pl @@ -0,0 +1,17 @@ +#! /usr/bin/perl + +# Converts a kaldi integer vector in text format to +# a kaldi vector in text format by adding a pair +# of square brackets around the data. +# Assumes the first column to be the utterance id. + +while (<>) { + chomp; + my @F = split; + + printf ("$F[0] [ "); + for (my $i = 1; $i <= $#F; $i++) { + printf ("$F[$i] "); + } + print ("]"); +} diff --git a/egs/wsj/s5/steps/segmentation/convert_rttm_to_utt2spk_and_segments.py b/egs/wsj/s5/steps/segmentation/convert_rttm_to_utt2spk_and_segments.py new file mode 100755 index 00000000000..23dc5a14f09 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/convert_rttm_to_utt2spk_and_segments.py @@ -0,0 +1,79 @@ +#! /usr/bin/env python + +"""This script converts an RTTM with +speaker info into kaldi utt2spk and segments""" + +import argparse + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script converts an RTTM with + speaker info into kaldi utt2spk and segments""") + parser.add_argument("--use-reco-id-as-spkr", type=str, + choices=["true", "false"], + help="Use the recording ID based on RTTM and " + "reco2file_and_channel as the speaker") + parser.add_argument("rttm_file", type=str, + help="""Input RTTM file. + The format of the RTTM file is + """ + """ """) + parser.add_argument("reco2file_and_channel", type=str, + help="""Input reco2file_and_channel. + The format is .""") + parser.add_argument("utt2spk", type=str, + help="Output utt2spk file") + parser.add_argument("segments", type=str, + help="Output segments file") + + args = parser.parse_args() + + args.use_reco_id_as_spkr = bool(args.use_reco_id_as_spkr == "true") + + return args + +def main(): + args = get_args() + + file_and_channel2reco = {} + for line in open(args.reco2file_and_channel): + parts = line.strip().split() + file_and_channel2reco[(parts[1], parts[2])] = parts[0] + + utt2spk_writer = open(args.utt2spk, 'w') + segments_writer = open(args.segments, 'w') + for line in open(args.rttm_file): + parts = line.strip().split() + if parts[0] != "SPEAKER": + continue + + file_id = parts[1] + channel = parts[2] + + try: + reco = file_and_channel2reco[(file_id, channel)] + except KeyError as e: + raise Exception("Could not find recording with " + "(file_id, channel) " + "= ({0},{1}) in {2}: {3}\n".format( + file_id, channel, + args.reco2file_and_channel, str(e))) + + start_time = float(parts[3]) + end_time = start_time + float(parts[4]) + + if args.use_reco_id_as_spkr: + spkr = reco + else: + spkr = parts[7] + + st = int(start_time * 100) + end = int(end_time * 100) + utt = "{0}-{1:06d}-{2:06d}".format(spkr, st, end) + + utt2spk_writer.write("{0} {1}\n".format(utt, spkr)) + segments_writer.write("{0} {1} {2:7.2f} {3:7.2f}\n".format( + utt, reco, start_time, end_time)) + +if __name__ == '__main__': + main() diff --git a/egs/wsj/s5/steps/segmentation/convert_utt2spk_and_segments_to_rttm.py b/egs/wsj/s5/steps/segmentation/convert_utt2spk_and_segments_to_rttm.py new file mode 100755 index 00000000000..1443259286b --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/convert_utt2spk_and_segments_to_rttm.py @@ -0,0 +1,65 @@ +#! /usr/bin/env python + +"""This script converts kaldi-style utt2spk and segments to an RTTM""" + +import argparse + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script converts kaldi-style utt2spk and + segments to an RTTM""") + + parser.add_argument("utt2spk", type=str, + help="Input utt2spk file") + parser.add_argument("segments", type=str, + help="Input segments file") + parser.add_argument("reco2file_and_channel", type=str, + help="""Input reco2file_and_channel. + The format is .""") + parser.add_argument("rttm_file", type=str, + help="Output RTTM file") + + args = parser.parse_args() + return args + +def main(): + args = get_args() + + reco2file_and_channel = {} + for line in open(args.reco2file_and_channel): + parts = line.strip().split() + reco2file_and_channel[parts[0]] = (parts[1], parts[2]) + + utt2spk = {} + with open(args.utt2spk, 'r') as utt2spk_reader: + for line in utt2spk_reader: + parts = line.strip().split() + utt2spk[parts[0]] = parts[1] + + with open(args.rttm_file, 'w') as rttm_writer: + for line in open(args.segments, 'r'): + parts = line.strip().split() + + utt = parts[0] + spkr = utt2spk[utt] + + reco = parts[1] + + try: + file_id, channel = reco2file_and_channel[reco] + except KeyError as e: + raise Exception("Could not find recording {0} in {1}: " + "{2}\n".format(reco, + args.reco2file_and_channel, + str(e))) + + start_time = float(parts[2]) + duration = float(parts[3]) - start_time + + rttm_writer.write("SPEAKER {0} {1} {2:7.2f} {3:7.2f} " + " {4} \n".format( + file_id, channel, start_time, + duration, spkr)) + +if __name__ == '__main__': + main() diff --git a/egs/wsj/s5/steps/segmentation/decode_sad.sh b/egs/wsj/s5/steps/segmentation/decode_sad.sh new file mode 100755 index 00000000000..a39e93dd83f --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/decode_sad.sh @@ -0,0 +1,56 @@ +#! /bin/bash + +set -e +set -o pipefail + +cmd=run.pl +acwt=0.1 +beam=8 +max_active=1000 +get_pdfs=false +iter=final + +. path.sh + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 " + echo " e.g.: $0 " + exit 1 +fi + +graph_dir=$1 +log_likes_dir=$2 +dir=$3 + +mkdir -p $dir +nj=`cat $log_likes_dir/num_jobs` +echo $nj > $dir/num_jobs + +if [ -f $dir/$iter.mdl ]; then + srcdir=$dir +else + srcdir=`dirname $dir` +fi + +for f in $srcdir/$iter.mdl $log_likes_dir/log_likes.1.gz $graph_dir/HCLG.fst; do + if [ ! -f $f ]; then + echo "$0: Could not find file $f" + exit 1 + fi +done + +decoder_opts+=(--acoustic-scale=$acwt --beam=$beam --max-active=$max_active) + +ali="ark:| ali-to-phones --per-frame $srcdir/$iter.mdl ark:- ark:- | gzip -c > $dir/ali.JOB.gz" + +if $get_pdfs; then + ali="ark:| ali-to-pdf $srcdir/$iter.mdl ark:- ark:- | gzip -c > $dir/ali.JOB.gz" +fi + +$cmd JOB=1:$nj $dir/log/decode.JOB.log \ + decode-faster-mapped ${decoder_opts[@]} \ + $srcdir/$iter.mdl \ + $graph_dir/HCLG.fst "ark:gunzip -c $log_likes_dir/log_likes.JOB.gz |" \ + ark:/dev/null "$ali" diff --git a/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh b/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh new file mode 100755 index 00000000000..84287230fba --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh @@ -0,0 +1,109 @@ +#! /bin/bash + +set -e +set -o pipefail +set -u + +stage=-1 +segmentation_config=conf/segmentation.conf +cmd=run.pl + +# Viterbi options +min_silence_duration=30 # minimum number of frames for silence +min_speech_duration=30 # minimum number of frames for speech +frame_subsampling_factor=1 +nonsil_transition_probability=0.1 +sil_transition_probability=0.1 +sil_prior=0.5 +speech_prior=0.5 +use_unigram_lm=true + +# Decoding options +acwt=1 +beam=10 +max_active=7000 + +. utils/parse_options.sh + +if [ $# -ne 4 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/babel_bengali_dev10h exp/nnet3_sad_snr/tdnn_b_n4/sad_babel_bengali_dev10h exp/nnet3_sad_snr/tdnn_b_n4/segmentation_babel_bengali_dev10h exp/nnet3_sad_snr/tdnn_b_n4/segmentation_babel_bengali_dev10h/babel_bengali_dev10h.seg" + exit 1 +fi + +data=$1 +sad_likes_dir=$2 +dir=$3 +out_data=$4 + +t=sil${sil_prior}_sp${speech_prior} +lang=$dir/lang_test_${t} + +min_silence_duration=`perl -e "print (int($min_silence_duration / $frame_subsampling_factor))"` +min_speech_duration=`perl -e "print (int($min_speech_duration / $frame_subsampling_factor))"` + +if [ $stage -le 1 ]; then + mkdir -p $lang + + steps/segmentation/internal/prepare_sad_lang.py \ + --phone-transition-parameters="--phone-list=1 --min-duration=$min_silence_duration --end-transition-probability=$sil_transition_probability" \ + --phone-transition-parameters="--phone-list=2 --min-duration=$min_speech_duration --end-transition-probability=$nonsil_transition_probability" $lang + + cp $lang/phones.txt $lang/words.txt +fi + +feat_dim=2 # dummy. We don't need this. +if [ $stage -le 2 ]; then + $cmd $dir/log/create_transition_model.log gmm-init-mono \ + $lang/topo $feat_dim - $dir/tree \| \ + copy-transition-model --binary=false - $dir/trans.mdl || exit 1 +fi + +if [ $stage -le 3 ]; then + if $use_unigram_lm; then + cat > $lang/word2prior < $lang/G.fst + else + { + echo "1 0.99 1:0.6 2:0.39"; + echo "2 0.01 1:0.5 2:0.49"; + } | \ + steps/segmentation/internal/make_bigram_G_fst.py - - | \ + fstcompile --isymbols=$lang/words.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false \ + > $lang/G.fst + fi +fi + +graph_dir=$dir/graph_test_${t} + +if [ $stage -le 4 ]; then + $cmd $dir/log/make_vad_graph.log \ + steps/segmentation/internal/make_sad_graph.sh --iter trans \ + $lang $dir $dir/graph_test_${t} || exit 1 + cp $dir/trans.mdl $graph_dir +fi + +if [ $stage -le 5 ]; then + steps/segmentation/decode_sad.sh \ + --acwt $acwt --beam $beam --max-active $max_active --iter trans \ + $graph_dir $sad_likes_dir $dir +fi + +if [ $stage -le 6 ]; then + cat > $lang/phone2sad_map < 8kHz sampling frequency. +do_downsampling=false + +# Segmentation configs +min_silence_duration=30 +min_speech_duration=30 +sil_prior=0.5 +speech_prior=0.5 +segmentation_config=conf/segmentation_speech.conf +convert_data_dir_to_whole=true + +echo $* + +. utils/parse_options.sh + +if [ $# -ne 4 ]; then + echo "Usage: $0 " + echo " e.g.: $0 ~/workspace/egs/ami/s5b/data/sdm1/dev exp/nnet3_sad_snr/nnet_tdnn_j_n4 mfcc_hires_bp data/ami_sdm1_dev" + exit 1 +fi + +src_data_dir=$1 # The input data directory that needs to be segmented. + # Any segments in that will be ignored. +sad_nnet_dir=$2 # The SAD neural network +mfcc_dir=$3 # The directory to store the features +data_dir=$4 # The output data directory will be ${data_dir}_seg + +affix=${affix:+_$affix} +feat_affix=${feat_affix:+_$feat_affix} + +data_id=`basename $data_dir` +sad_dir=${sad_nnet_dir}/${sad_name}${affix}_${data_id}_whole${feat_affix} +seg_dir=${sad_nnet_dir}/${segmentation_name}${affix}_${data_id}_whole${feat_affix} + +export PATH="$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH" +[ ! -z `which sph2pipe` ] + +whole_data_dir=${sad_dir}/${data_id}_whole + +if $convert_data_dir_to_whole; then + if [ $stage -le 0 ]; then + utils/data/convert_data_dir_to_whole.sh $src_data_dir ${whole_data_dir} + + if $do_downsampling; then + freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` + utils/data/downsample_data_dir.sh $freq $whole_data_dir + fi + + utils/copy_data_dir.sh ${whole_data_dir} ${whole_data_dir}${feat_affix}_hires + fi + + if [ $stage -le 1 ]; then + steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $reco_nj --cmd "$train_cmd" \ + ${whole_data_dir}${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} $mfcc_dir + steps/compute_cmvn_stats.sh ${whole_data_dir}${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} $mfcc_dir + utils/fix_data_dir.sh ${whole_data_dir}${feat_affix}_hires + fi + test_data_dir=${whole_data_dir}${feat_affix}_hires +else + test_data_dir=$src_data_dir +fi + +post_vec=$sad_nnet_dir/post_${output_name}.vec +if [ ! -f $sad_nnet_dir/post_${output_name}.vec ]; then + echo "$0: Could not find $sad_nnet_dir/post_${output_name}.vec. See the last stage of local/segmentation/run_train_sad.sh" + exit 1 +fi + +if [ $stage -le 2 ]; then + steps/nnet3/compute_output.sh --nj $reco_nj --cmd "$train_cmd" \ + --post-vec "$post_vec" \ + --iter $iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk 150 \ + --stage $sad_stage --output-name $output_name \ + --frame-subsampling-factor $frame_subsampling_factor \ + --use-raw-nnet true ${test_data_dir} $sad_nnet_dir $sad_dir +fi + +if [ $stage -le 3 ]; then + steps/segmentation/decode_sad_to_segments.sh \ + --use-unigram-lm false \ + --frame-subsampling-factor $frame_subsampling_factor \ + --min-silence-duration $min_silence_duration \ + --min-speech-duration $min_speech_duration \ + --sil-prior $sil_prior \ + --speech-prior $speech_prior \ + --segmentation-config $segmentation_config --cmd "$train_cmd" \ + ${test_data_dir} $sad_dir $seg_dir ${data_dir}_seg +fi + +# Subsegment data directory +if [ $stage -le 4 ]; then + rm ${data_dir}_seg/feats.scp || true + utils/data/get_reco2num_frames.sh --cmd "$train_cmd" --nj $reco_nj ${test_data_dir} + awk '{print $1" "$2}' ${data_dir}_seg/segments | \ + utils/apply_map.pl -f 2 ${test_data_dir}/reco2num_frames > \ + ${data_dir}_seg/utt2max_frames + + #frame_shift_info=`cat $mfcc_config | steps/segmentation/get_frame_shift_info_from_config.pl` + #utils/data/get_subsegment_feats.sh ${test_data_dir}/feats.scp \ + # $frame_shift_info ${data_dir}_seg/segments | \ + # utils/data/fix_subsegmented_feats.pl ${data_dir}_seg/utt2max_frames > \ + # ${data_dir}_seg/feats.scp + steps/compute_cmvn_stats.sh --fake ${data_dir}_seg + + utils/fix_data_dir.sh ${data_dir}_seg +fi diff --git a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh new file mode 100755 index 00000000000..7211b6b7084 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh @@ -0,0 +1,182 @@ +#!/bin/bash + +set -e +set -o pipefail +set -u + +. path.sh +. cmd.sh + +affix= # Affix for the segmentation +nj=32 # works on recordings as against on speakers + +# Feature options (Must match training) +mfcc_config=conf/mfcc_hires_bp.conf +feat_affix=bp # Affix for the type of feature used + +convert_data_dir_to_whole=true + +# Set to true if the test data has > 8kHz sampling frequency. +do_downsampling=false + +stage=-1 +sad_stage=-1 +output_name=output-speech # The output node in the network +sad_name=sad # Base name for the directory storing the computed loglikes +segmentation_name=segmentation # Base name for the directory doing segmentation + +# SAD network config +iter=final # Model iteration to use + +# Contexts must ideally match training for LSTM models, but +# may not necessarily for stats components +extra_left_context=0 # Set to some large value, typically 40 for LSTM (must match training) +extra_right_context=0 + +frame_subsampling_factor=1 # Subsampling at the output + +transition_scale=3.0 +loopscale=0.1 +acwt=1.0 + +# Segmentation configs +segmentation_config=conf/segmentation_speech.conf + +echo $* + +. utils/parse_options.sh + +if [ $# -ne 5 ]; then + echo "Usage: $0 " + echo " e.g.: $0 ~/workspace/egs/ami/s5b/data/sdm1/dev exp/nnet3_sad_snr/nnet_tdnn_j_n4 mfcc_hires_bp data/ami_sdm1_dev" + exit 1 +fi + +src_data_dir=$1 # The input data directory that needs to be segmented. + # Any segments in that will be ignored. +sad_nnet_dir=$2 # The SAD neural network +lang=$3 +mfcc_dir=$4 # The directory to store the features +data_dir=$5 # The output data directory will be ${data_dir}_seg + +affix=${affix:+_$affix} +feat_affix=${feat_affix:+_$feat_affix} + +data_id=`basename $data_dir` +sad_dir=${sad_nnet_dir}/${sad_name}${affix}_${data_id}_whole${feat_affix} +seg_dir=${sad_nnet_dir}/${segmentation_name}${affix}_${data_id}_whole${feat_affix} + +export PATH="$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH" +[ ! -z `which sph2pipe` ] + +test_data_dir=data/${data_id}${feat_affix}_hires + +if $convert_data_dir_to_whole; then + if [ $stage -le 0 ]; then + whole_data_dir=${sad_dir}/${data_id}_whole + utils/data/convert_data_dir_to_whole.sh $src_data_dir ${whole_data_dir} + + if $do_downsampling; then + freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` + utils/data/downsample_data_dir.sh $freq $whole_data_dir + fi + + rm -r ${test_data_dir} || true + utils/copy_data_dir.sh ${whole_data_dir} $test_data_dir + fi +else + if [ $stage -le 0 ]; then + rm -r ${test_data_dir} || true + utils/copy_data_dir.sh $src_data_dir $test_data_dir + + if $do_downsampling; then + freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` + utils/data/downsample_data_dir.sh $freq $test_data_dir + fi + fi +fi + +if [ $stage -le 1 ]; then + utils/fix_data_dir.sh $test_data_dir + steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $nj --cmd "$train_cmd" \ + ${test_data_dir} exp/make_hires/${data_id}${feat_affix} $mfcc_dir + steps/compute_cmvn_stats.sh ${test_data_dir} exp/make_hires/${data_id}${feat_affix} $mfcc_dir + utils/fix_data_dir.sh ${test_data_dir} +fi + +post_vec=$sad_nnet_dir/post_${output_name}.vec +if [ ! -f $sad_nnet_dir/post_${output_name}.vec ]; then + echo "$0: Could not find $sad_nnet_dir/post_${output_name}.vec. See the last stage of local/segmentation/run_train_sad.sh" + exit 1 +fi + +create_topo=true +if $create_topo; then + if [ ! -f $lang/classes_info.txt ]; then + echo "$0: Could not find $lang/topo or $lang/classes_info.txt" + exit 1 + else + steps/segmentation/internal/prepare_simple_hmm_lang.py \ + $lang/classes_info.txt $lang + fi +fi + +if [ $stage -le 3 ]; then + simple-hmm-init $lang/topo $lang/init.mdl + + $train_cmd $sad_nnet_dir/log/get_final_${output_name}_model.log \ + nnet3-am-init $lang/init.mdl \ + "nnet3-copy --edits='rename-node old-name=$output_name new-name=output' $sad_nnet_dir/$iter.raw - |" - \| \ + nnet3-am-adjust-priors - $sad_nnet_dir/post_${output_name}.vec \ + $sad_nnet_dir/${iter}_${output_name}.mdl +fi +iter=${iter}_${output_name} + +if [ $stage -le 4 ]; then + steps/nnet3/compute_output.sh --nj $nj --cmd "$train_cmd" \ + --iter $iter --use-raw-nnet false \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk 150 \ + --stage $sad_stage \ + --frame-subsampling-factor $frame_subsampling_factor \ + ${test_data_dir} $sad_nnet_dir $sad_dir +fi + +graph_dir=${sad_nnet_dir}/graph_${output_name} + +if [ $stage -le 5 ]; then + cp -r $lang $graph_dir + + if [ ! -f $lang/final.mdl ]; then + echo "$0: Could not find $lang/final.mdl!" + echo "$0: Using $lang/init.mdl instead" + cp $lang/init.mdl $graph_dir/final.mdl + else + cp $lang/final.mdl $graph_dir + fi + + $train_cmd $lang/log/make_graph.log \ + make-simple-hmm-graph --transition-scale=$transition_scale \ + --self-loop-scale=$loopscale \ + $graph_dir/final.mdl \| \ + fstdeterminizestar --use-log=true \| \ + fstrmepslocal \| \ + fstminimizeencoded '>' $graph_dir/HCLG.fst +fi + +if [ $stage -le 6 ]; then + steps/segmentation/decode_sad.sh --acwt 1.0 --cmd "$decode_cmd" \ + --iter ${iter} \ + --get-pdfs true $graph_dir $sad_dir $seg_dir +fi + +if [ $stage -le 7 ]; then + steps/segmentation/post_process_sad_to_subsegments.sh \ + --cmd "$train_cmd" --segmentation-config $segmentation_config \ + --frame-subsampling-factor $frame_subsampling_factor \ + ${test_data_dir} $lang/phone2sad_map ${seg_dir} \ + ${seg_dir} ${data_dir}_seg + + cp $src_data_dir/wav.scp ${data_dir}_seg +fi diff --git a/egs/wsj/s5/steps/segmentation/evaluate_segmentation.pl b/egs/wsj/s5/steps/segmentation/evaluate_segmentation.pl new file mode 100755 index 00000000000..06a762d7762 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/evaluate_segmentation.pl @@ -0,0 +1,198 @@ +#!/usr/bin/env perl + +# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar +# Apache 2.0 + +################################################################################ +# +# This script was written to check the goodness of automatic segmentation tools +# It assumes input in the form of two Kaldi segments files, i.e. a file each of +# whose lines contain four space-separated values: +# +# UtteranceID FileID StartTime EndTime +# +# It computes # missed frames, # false positives and # overlapping frames. +# +################################################################################ + +if ($#ARGV == 1) { + $ReferenceSegmentation = $ARGV[0]; + $HypothesizedSegmentation = $ARGV[1]; + printf STDERR ("Comparing reference segmentation\n\t%s\nwith proposed segmentation\n\t%s\n", + $ReferenceSegmentation, + $HypothesizedSegmentation); +} else { + printf STDERR "This program compares the reference segmenation with the proposted segmentation\n"; + printf STDERR "Usage: $0 reference_segments_filename proposed_segments_filename\n"; + printf STDERR "e.g. $0 data/dev10h/segments data/dev10h.seg/segments\n"; + exit (0); +} + +################################################################################ +# First read the reference segmentation, and +# store the start- and end-times of all segments in each file. +################################################################################ + +open (SEGMENTS, "cat $ReferenceSegmentation | sort -k2,2 -k3n,3 -k4n,4 |") + || die "Unable to open $ReferenceSegmentation"; +$numLines = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $ReferenceSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + unless (exists $firstSeg{$fileID}) { + $firstSeg{$fileID} = $numLines; + $actualSpeech{$fileID} = 0.0; + $hypothesizedSpeech{$fileID} = 0.0; + $foundSpeech{$fileID} = 0.0; + $falseAlarm{$fileID} = 0.0; + $minStartTime{$fileID} = 0.0; + $maxEndTime{$fileID} = 0.0; + } + $refSegName[$numLines] = $field[0]; + $refSegStart[$numLines] = $field[2]; + $refSegEnd[$numLines] = $field[3]; + $actualSpeech{$fileID} += ($field[3]-$field[2]); + $minStartTime{$fileID} = $field[2] if ($minStartTime{$fileID}>$field[2]); + $maxEndTime{$fileID} = $field[3] if ($maxEndTime{$fileID}<$field[3]); + $lastSeg{$fileID} = $numLines; + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $ReferenceSegmentation\n"; + +################################################################################ +# Process hypothesized segments sequentially, and gather speech/nonspeech stats +################################################################################ + +open (SEGMENTS, "cat $HypothesizedSegmentation | sort -k2,2 -k1,1 |") + # Kaldi segments files are sorted by UtteranceID, but we re-sort them here + # so that all segments of a file are read together, sorted by start-time. + || die "Unable to open $HypothesizedSegmentation"; +$numLines = 0; +$totalHypSpeech = 0.0; +$totalFoundSpeech = 0.0; +$totalFalseAlarm = 0.0; +$numShortSegs = 0; +$numLongSegs = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $HypothesizedSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + $segStart = $field[2]; + $segEnd = $field[3]; + if (exists $firstSeg{$fileID}) { + # This FileID exists in the reference segmentation + # So gather statistics for this UtteranceID + $hypothesizedSpeech{$fileID} += ($segEnd-$segStart); + $totalHypSpeech += ($segEnd-$segStart); + if (($segStart>=$maxEndTime{$fileID}) || ($segEnd<=$minStartTime{$fileID})) { + # This entire segment is a false alarm + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + # This segment may overlap one or more reference segments + $p = $firstSeg{$fileID}; + while ($refSegEnd[$p]<=$segStart) { + ++$p; + } + # The overlap, if any, begins at the reference segment p + $q = $lastSeg{$fileID}; + while ($refSegStart[$q]>=$segEnd) { + --$q; + } + # The overlap, if any, ends at the reference segment q + if ($q<$p) { + # This segment sits entirely in the nonspeech region + # between the two reference speech segments q and p + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + if (($segEnd-$segStart)<0.20) { + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found short speech region $line\n"; + ++$numShortSegs; + } elsif (($segEnd-$segStart)>60.0) { + ++$numLongSegs; + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found long speech region $line\n"; + } + # There is some overlap with segments p through q + for ($s=$p; $s<=$q; ++$s) { + if ($segStart<$refSegStart[$s]) { + # There is a leading false alarm portion before s + $falseAlarm{$fileID} += ($refSegStart[$s]-$segStart); + $totalFalseAlarm += ($refSegStart[$s]-$segStart); + $segStart=$refSegStart[$s]; + } + $speechPortion = ($refSegEnd[$s]<$segEnd) ? + ($refSegEnd[$s]-$segStart) : ($segEnd-$segStart); + $foundSpeech{$fileID} += $speechPortion; + $totalFoundSpeech += $speechPortion; + $segStart=$refSegEnd[$s]; + } + if ($segEnd>$segStart) { + # There is a trailing false alarm portion after q + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } + } + } + } else { + # This FileID does not exist in the reference segmentation + # So all this speech counts as a false alarm + exit (1); + printf STDERR ("Unexpected fileID in hypothesized segments: %s", $fileID); + $totalFalseAlarm += ($segEnd-$segStart); + } + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $HypothesizedSegmentation\n"; + +################################################################################ +# Now that all hypothesized segments have been processed, compute needed stats +################################################################################ + +$totalActualSpeech = 0.0; +$totalNonSpeechEst = 0.0; # This is just a crude estimate of total nonspeech. +foreach $fileID (sort keys %actualSpeech) { + $totalActualSpeech += $actualSpeech{$fileID}; + $totalNonSpeechEst += $maxEndTime{$fileID} - $actualSpeech{$fileID}; + ####################################################################### + # Print file-wise statistics to STDOUT; can pipe to /dev/null is needed + ####################################################################### + printf STDOUT ("%s: %.2f min actual speech, %.2f min hypothesized: %.2f min overlap (%d\%), %.2f min false alarm (~%d\%)\n", + $fileID, + ($actualSpeech{$fileID}/60.0), + ($hypothesizedSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}*100/($actualSpeech{$fileID}+0.01)), + ($falseAlarm{$fileID}/60.0), + ($falseAlarm{$fileID}*100/($maxEndTime{$fileID}-$actualSpeech{$fileID}+0.01))); +} + +################################################################################ +# Finally, we have everything needed to report the segmentation statistics. +################################################################################ + +printf STDERR ("------------------------------------------------------------------------\n"); +printf STDERR ("TOTAL: %.2f hrs actual speech, %.2f hrs hypothesized: %.2f hrs overlap (%d\%), %.2f hrs false alarm (~%d\%)\n", + ($totalActualSpeech/3600.0), + ($totalHypSpeech/3600.0), + ($totalFoundSpeech/3600.0), + ($totalFoundSpeech*100/($totalActualSpeech+0.000001)), + ($totalFalseAlarm/3600.0), + ($totalFalseAlarm*100/($totalNonSpeechEst+0.000001))); +printf STDERR ("\t$numShortSegs segments < 0.2 sec and $numLongSegs segments > 60.0 sec\n"); +printf STDERR ("------------------------------------------------------------------------\n"); diff --git a/egs/wsj/s5/steps/segmentation/get_frame_shift_info_from_config.pl b/egs/wsj/s5/steps/segmentation/get_frame_shift_info_from_config.pl new file mode 100755 index 00000000000..79a42aa9852 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/get_frame_shift_info_from_config.pl @@ -0,0 +1,21 @@ +#! /usr/bin/perl +use strict; +use warnings; + +# This script parses a features config file such as conf/mfcc.conf +# and returns the pair of values frame_shift and frame_overlap in seconds. + +my $frame_shift = 0.01; +my $frame_overlap = 0.015; + +while (<>) { + if (m/--frame-length=(\d+)/) { + $frame_shift = $1 / 1000; + } + + if (m/--window-length=(\d+)/) { + $frame_overlap = $1 / 1000 - $frame_shift; + } +} + +print "$frame_shift $frame_overlap\n"; diff --git a/egs/wsj/s5/steps/segmentation/get_reverb_scp.pl b/egs/wsj/s5/steps/segmentation/get_reverb_scp.pl new file mode 100755 index 00000000000..57f63b517f2 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/get_reverb_scp.pl @@ -0,0 +1,58 @@ +#! /usr/bin/perl +use strict; +use warnings; + +my $field_begin = -1; +my $field_end = -1; + +if ($ARGV[0] eq "-f") { + shift @ARGV; + my $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } +} + +if (scalar @ARGV != 1 && scalar @ARGV != 2 ) { + print "Usage: get_reverb_scp.pl [-f -] [] < input_scp > output_scp\n"; + exit(1); +} + +my $num_reps = $ARGV[0]; +my $prefix = "rev"; + +if (scalar @ARGV == 2) { + $prefix = $ARGV[1]; +} + +while () { + chomp; + my @A = split; + + for (my $i = 1; $i <= $num_reps; $i++) { + for (my $pos = 0; $pos <= $#A; $pos++) { + my $a = $A[$pos]; + if ( ($field_begin < 0 || $pos >= $field_begin) + && ($field_end < 0 || $pos <= $field_end) ) { + if ($a =~ m/^(sp[0-9.]+-)(.+)$/) { + $a = $1 . "$prefix" . $i . "_" . $2; + } else { + $a = "$prefix" . $i . "_" . $a; + } + } + print $a . " "; + } + print "\n"; + } +} diff --git a/egs/wsj/s5/steps/segmentation/get_sad_map.py b/egs/wsj/s5/steps/segmentation/get_sad_map.py new file mode 100755 index 00000000000..222e6c1a512 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/get_sad_map.py @@ -0,0 +1,132 @@ +#! /usr/bin/env python + +"""This script prints a mapping from phones to speech +activity labels +0 for silence, 1 for speech, 2 for noise and 3 for OOV. +Other labels can be optionally defined. +e.g. If 1, 2 and 3 are silence phones, 4, 5 and 6 are speech phones, +the SAD map would be +1 0 +2 0 +3 0 +4 1 +5 1 +6 1. +The silence and speech are read from the phones/silence.txt and +phones/nonsilence.txt from the lang directory. +An initial SAD map can be provided using --init-sad-map to override +the above default mapping of phones. This is useful to say map + or noise phones to separate SAD labels. +""" + +import argparse +import sys + +sys.path.insert(0, 'steps') +import libs.common as common_lib + + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script prints a mapping from phones to speech + activity labels + 0 for silence, 1 for speech, 2 for noise and 3 for OOV. + Other labels can be optionally defined. + e.g. If 1, 2 and 3 are silence phones, 4, 5 and 6 are speech phones, + the SAD map would be + 1 0 + 2 0 + 3 0 + 4 1 + 5 1 + 6 1. + The silence and speech are read from the phones/silence.txt and + phones/nonsilence.txt from the lang directory. + An initial SAD map can be provided using --init-sad-map to override + the above default mapping of phones. This is useful to say map + or noise phones to separate SAD labels. + """) + + parser.add_argument("--init-sad-map", type=str, action=common_lib.NullstrToNoneAction, + help="""Initial SAD map that will be used to override + the default mapping using phones/silence.txt and + phones/nonsilence.txt. Does not need to specify labels + for all the phones. + e.g. + 3 + 2""") + + noise_group = parser.add_mutually_exclusive_group() + noise_group.add_argument("--noise-phones-file", type=str, + action=common_lib.NullstrToNoneAction, + help="Map noise phones from file to label 2") + noise_group.add_argument("--noise-phones-list", type=str, + action=common_lib.NullstrToNoneAction, + help="A colon-separated list of noise phones to " + "map to label 2") + parser.add_argument("--unk", type=str, action=common_lib.NullstrToNoneAction, + help="""UNK phone, if provided will be mapped to + label 3""") + + parser.add_argument("--map-noise-to-sil", type=str, + action=common_lib.StrToBoolAction, + choices=["true", "false"], default=False, + help="""Map noise phones to silence before writing the + map. i.e. anything with label 2 is mapped to + label 0.""") + parser.add_argument("--map-unk-to-speech", type=str, + action=common_lib.StrToBoolAction, + choices=["true", "false"], default=False, + help="""Map UNK phone to speech before writing the map + i.e. anything with label 3 is mapped to label 1.""") + + parser.add_argument("lang_dir") + + args = parser.parse_args() + + return args + + +def main(): + args = get_args() + + sad_map = {} + + for line in open('{0}/phones/nonsilence.txt'.format(args.lang_dir)): + parts = line.strip().split() + sad_map[parts[0]] = 1 + + for line in open('{0}/phones/silence.txt'.format(args.lang_dir)): + parts = line.strip().split() + sad_map[parts[0]] = 0 + + if args.init_sad_map is not None: + for line in open(args.init_sad_map): + parts = line.strip().split() + try: + sad_map[parts[0]] = int(parts[1]) + except Exception: + raise Exception("Invalid line " + line) + + if args.unk is not None: + sad_map[args.unk] = 3 + + noise_phones = {} + if args.noise_phones_file is not None: + for line in open(args.noise_phones_file): + parts = line.strip().split() + noise_phones[parts[0]] = 1 + + if args.noise_phones_list is not None: + for x in args.noise_phones_list.split(":"): + noise_phones[x] = 1 + + for x, l in sad_map.iteritems(): + if l == 2 and args.map_noise_to_sil: + l = 0 + if l == 3 and args.map_unk_to_speech: + l = 1 + print ("{0} {1}".format(x, l)) + +if __name__ == "__main__": + main() diff --git a/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh b/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh new file mode 100755 index 00000000000..0d8939a9b80 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh @@ -0,0 +1,59 @@ +#! /bin/bash + +set -o pipefail +set -e +set -u + +. path.sh + +cmd=run.pl + +. parse_options.sh + +if [ $# -ne 3 ]; then + echo "This script converts the alignment in the alignment directory " + echo "to speech activity segments based on the provided phone-map." + echo "The output is stored in sad_seg.*.ark along with an scp-file " + echo "sad_seg.scp in Segmentation format.\n" + echo "If alignment directory has frame_subsampling_factor, the segments " + echo "are applied that frame-subsampling-factor.\n" + echo "The phone-map file must have two columns: " + echo " \n" + echo "\n" + echo "Usage: $0 " + echo "e.g. : $0 exp/tri3_ali data/lang/phones/sad.map exp/tri3_ali_vad" + exit 1 +fi + +ali_dir=$1 +phone_map=$2 +dir=$3 + +for f in $phone_map $ali_dir/ali.1.gz; do + [ ! -f $f ] && echo "$0: Could not find $f" && exit 1 +done + +mkdir -p $dir + +nj=`cat $ali_dir/num_jobs` || exit 1 +echo $nj > $dir/num_jobs + +frame_subsampling_factor=1 +if [ -f $ali_dir/frame_subsampling_factor ]; then + frame_subsampling_factor=`cat $ali_dir/frame_subsampling_factor` +fi + +dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}` + +$cmd JOB=1:$nj $dir/log/get_sad.JOB.log \ + segmentation-init-from-ali \ + "ark:gunzip -c ${ali_dir}/ali.JOB.gz | ali-to-phones --per-frame ${ali_dir}/final.mdl ark:- ark:- |" \ + ark:- \| \ + segmentation-copy --label-map=$phone_map \ + --frame-subsampling-factor=$frame_subsampling_factor ark:- ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- \ + ark,scp:$dir/sad_seg.JOB.ark,$dir/sad_seg.JOB.scp + +for n in `seq $nj`; do + cat $dir/sad_seg.$n.scp +done | sort -k1,1 > $dir/sad_seg.scp diff --git a/egs/wsj/s5/steps/segmentation/internal/make_G_fst.py b/egs/wsj/s5/steps/segmentation/internal/make_G_fst.py new file mode 100755 index 00000000000..5ad7e867d10 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/make_G_fst.py @@ -0,0 +1,52 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse, math + +def ParseArgs(): + parser = argparse.ArgumentParser("""Make a simple unigram FST for +decoding for segmentation purpose.""") + + parser.add_argument("--word2prior-map", type=str, required=True, + help = "A file with priors for different words") + parser.add_argument("--end-probability", type=float, default=0.01, + help = "Ending probability") + + args = parser.parse_args() + + return args + +def ReadMap(map_file): + out_map = {} + sum_prob = 0 + for line in open(map_file): + parts = line.strip().split() + if len(parts) == 0: + continue + if len(parts) != 2: + raise Exception("Invalid line {0} in {1}".format(line.strip(), map_file)) + + if parts[0] in out_map: + raise Exception("Duplicate entry of {0} in {1}".format(parts[0], map_file)) + + prob = float(parts[1]) + out_map[parts[0]] = prob + + sum_prob += prob + + return (out_map, sum_prob) + +def Main(): + args = ParseArgs() + + word2prior, sum_prob = ReadMap(args.word2prior_map) + sum_prob += args.end_probability + + for w,p in word2prior.iteritems(): + print ("0 0 {word} {word} {log_p}".format(word = w, + log_p = -math.log(p / sum_prob))) + print ("0 {log_p}".format(word = w, + log_p = -math.log(args.end_probability / sum_prob))) + +if __name__ == '__main__': + Main() diff --git a/egs/wsj/s5/steps/segmentation/internal/make_bigram_G_fst.py b/egs/wsj/s5/steps/segmentation/internal/make_bigram_G_fst.py new file mode 100755 index 00000000000..2431d293c4c --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/make_bigram_G_fst.py @@ -0,0 +1,174 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse +import logging +import math + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " + "%(funcName)s - %(levelname)s ] %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) + + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script generates a bigram G.fst lang for decoding. + It needs as an input classes_info file with the format: + , + where each pair is :. + destination-class -1 is used to represent final probabilitiy.""") + + parser.add_argument("classes_info", type=argparse.FileType('r'), + help="File with classes_info") + parser.add_argument("out_file", type=argparse.FileType('w'), + help="Output G.fst. Use '-' for stdout") + args = parser.parse_args() + return args + + +class ClassInfo(object): + def __init__(self, class_id): + self.class_id = class_id + self.start_state = -1 + self.initial_prob = 0 + self.transitions = {} + + def __str__(self): + return ("class-id={0},start-state={1}," + "initial-prob={2:.2f},transitions={3}".format( + self.class_id, self.start_state, + self.initial_prob, ' '.join( + ['{0}:{1}'.format(x, y) + for x, y in self.transitions.iteritems()]))) + + +def read_classes_info(file_handle): + classes_info = {} + + num_states = 1 + num_classes = 0 + + for line in file_handle.readlines(): + try: + parts = line.split() + class_id = int(parts[0]) + assert class_id > 0, class_id + if class_id in classes_info: + raise RuntimeError( + "Duplicate class-id {0} in file {1}".format( + class_id, file_handle.name)) + + classes_info[class_id] = ClassInfo(class_id) + class_info = classes_info[class_id] + class_info.initial_prob = float(parts[1]) + class_info.start_state = num_states + num_states += 1 + num_classes += 1 + + total_prob = 0.0 + if len(parts) > 2: + for part in parts[2:]: + dest_class, transition_prob = part.split(':') + dest_class = int(dest_class) + total_prob += float(transition_prob) + + if total_prob > 1.0: + raise ValueError("total-probability out of class {0} " + "is {1} > 1.0".format(class_id, + total_prob)) + + if dest_class in class_info.transitions: + logger.error( + "Duplicate transition to class-id {0}" + "in transitions".format(dest_class)) + raise RuntimeError + class_info.transitions[dest_class] = float(transition_prob) + + if -1 in class_info.transitions: + if abs(total_prob - 1.0) > 0.001: + raise ValueError("total-probability out of class {0} " + "is {1} != 1.0".format(class_id, + total_prob)) + else: + class_info.transitions[-1] = 1.0 - total_prob + else: + raise RuntimeError( + "No transitions out of class {0}".format(class_id)) + except Exception: + logger.error("Error processing line %s in file %s", + line, file_handle.name) + raise + + # Final state + classes_info[-1] = ClassInfo(-1) + class_info = classes_info[-1] + class_info.start_state = num_states + + for class_id, class_info in classes_info.iteritems(): + logger.info("For class %d, got class-info %s", class_id, class_info) + + return classes_info, num_classes + + +def print_states_for_class(class_id, classes_info, out_file): + class_info = classes_info[class_id] + + state = class_info.start_state + + # Transition from the FST initial state + print ("0 {end} {logprob}".format( + end=state, logprob=-math.log(class_info.initial_prob)), + file=out_file) + + for dest_class, prob in class_info.transitions.iteritems(): + try: + if dest_class == class_id: # self loop + next_state = state + else: # other transition + next_state = classes_info[dest_class].start_state + + print ("{start} {end} {class_id} {class_id} {logprob}".format( + start=state, end=next_state, class_id=class_id, + logprob=-math.log(prob)), + file=out_file) + + except Exception: + logger.error("Failed to add transition (%d->%d).\n" + "classes_info = %s", class_id, dest_class, + class_info) + + print ("{start} {final} {class_id} {class_id}".format( + start=state, final=classes_info[-1].start_state, + class_id=class_id), + file=out_file) + print ("{0}".format(classes_info[-1].start_state), file=out_file) + + +def run(args): + classes_info, num_classes = read_classes_info(args.classes_info) + + for class_id in range(1, num_classes + 1): + print_states_for_class(class_id, classes_info, args.out_file) + + +def main(): + try: + args = get_args() + run(args) + except Exception: + logger.error("Failed to make G.fst") + raise + finally: + for f in [args.classes_info, args.out_file]: + if f is not None: + f.close() + + +if __name__ == '__main__': + main() diff --git a/egs/wsj/s5/steps/segmentation/internal/make_sad_graph.sh b/egs/wsj/s5/steps/segmentation/internal/make_sad_graph.sh new file mode 100755 index 00000000000..5edb3eb2bb6 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/make_sad_graph.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar + +# Begin configuration section. +stage=0 +cmd=run.pl +iter=final # use $iter.mdl from $model_dir +tree=tree +tscale=1.0 # transition scale. +loopscale=0.1 # scale for self-loops. +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo " e.g.: $0 exp/vad_dev/lang exp/vad_dev exp/vad_dev/graph" + echo "Makes the graph in \$dir, corresponding to the model in \$model_dir" + exit 1; +fi + +lang=$1 +model=$2/$iter.mdl +tree=$2/$tree +dir=$3 + +for f in $lang/G.fst $model $tree; do + if [ ! -f $f ]; then + echo "$0: expected $f to exist" + exit 1; + fi +done + +mkdir -p $dir $lang/tmp + +clg=$lang/tmp/CLG.fst + +if [[ ! -s $clg || $clg -ot $lang/G.fst ]]; then + echo "$0: creating CLG." + + fstcomposecontext --context-size=1 --central-position=0 \ + $lang/tmp/ilabels < $lang/G.fst | \ + fstarcsort --sort_type=ilabel > $clg + fstisstochastic $clg || echo "[info]: CLG not stochastic." +fi + +if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model || $dir/Ha.fst -ot $lang/tmp/ilabels ]]; then + make-h-transducer --disambig-syms-out=$dir/disambig_tid.int \ + --transition-scale=$tscale $lang/tmp/ilabels $tree $model \ + > $dir/Ha.fst || exit 1; +fi + +if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || $dir/HCLGa.fst -ot $clg ]]; then + fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \ + | fstrmsymbols $dir/disambig_tid.int | fstrmepslocal | \ + fstminimizeencoded > $dir/HCLGa.fst || exit 1; + fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic" +fi + +if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then + add-self-loops --self-loop-scale=$loopscale --reorder=true \ + $model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1; + + if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then + # No point doing this test if transition-scale not 1, as it is bound to fail. + fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic." + fi +fi + +# keep a copy of the lexicon and a list of silence phones with HCLG... +# this means we can decode without reference to the $lang directory. + +cp $lang/words.txt $dir/ || exit 1; +cp $lang/phones.txt $dir/ 2> /dev/null # ignore the error if it's not there. + +# to make const fst: +# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst +am-info --print-args=false $model | grep pdfs | awk '{print $NF}' > $dir/num_pdfs + diff --git a/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh b/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh new file mode 100755 index 00000000000..31f0d09f351 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh @@ -0,0 +1,104 @@ +#! /bin/bash + +# Copyright 2015-16 Vimal Manohar +# Apache 2.0. + +set -e +set -o pipefail +set -u + +. path.sh + +cmd=run.pl +stage=-10 + +# General segmentation options +pad_length=50 # Pad speech segments by this many frames on either side +max_blend_length=10 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=50 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=50 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=100 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=30 # Min silence length at which to split very long segments +min_segment_length=20 + +frame_shift=0.01 +frame_overlap=0.016 + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "This script post-processes a speech activity segmentation to create " + echo "a kaldi-style data directory." + echo "See the comments for the kind of post-processing options." + echo "Usage: $0 " + echo " e.g.: $0 data/dev_aspire_whole exp/vad_dev_aspire data/dev_aspire_seg" + exit 1 +fi + +data_dir=$1 +dir=$2 +segmented_data_dir=$3 + +for f in $dir/orig_segmentation.1.gz; do + if [ ! -f $f ]; then + echo "$0: Could not find $f" + exit 1 + fi +done + +nj=`cat $dir/num_jobs` || exit 1 + +[ $pad_length -eq -1 ] && pad_length= +[ $post_pad_length -eq -1 ] && post_pad_length= +[ $max_blend_length -eq -1 ] && max_blend_length= + +if [ $stage -le 2 ]; then + # Post-process the orignal SAD segmentation using the following steps: + # 1) blend short speech segments of less than $max_blend_length frames + # into silence + # 2) Remove all silence frames and widen speech segments by padding + # $pad_length frames + # 3) Merge adjacent segments that have an intersegment length of less than + # $max_intersegment_length frames + # 4) Widen speech segments again after merging + # 5) Split segments into segments of $max_segment_length at the point where + # the original segmentation had silence + # 6) Split segments into overlapping segments of max length + # $max_segment_length and overlap $overlap_length + # 7) Convert segmentation to kaldi segments and utt2spk + $cmd JOB=1:$nj $dir/log/post_process_segmentation.JOB.log \ + gunzip -c $dir/orig_segmentation.JOB.gz \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=0 ark:- ark:- \| \ + segmentation-post-process ${max_blend_length:+--max-blend-length=$max_blend_length --blend-short-segments-class=1} ark:- ark:- \| \ + segmentation-post-process --remove-labels=0 ${pad_length:+--pad-label=1 --pad-length=$pad_length} ark:- ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=$max_intersegment_length ark:- ark:- \| \ + segmentation-post-process ${post_pad_length:+--pad-label=1 --pad-length=$post_pad_length} ark:- ark:- \| \ + segmentation-split-segments --alignments="ark,s,cs:gunzip -c $dir/orig_segmentation.JOB.gz | segmentation-to-ali ark:- ark:- |" \ + --max-segment-length=$max_segment_length --min-alignment-chunk-length=$min_silence_length --ali-label=0 ark:- ark:- \| \ + segmentation-post-process --remove-labels=1 --max-remove-length=$min_segment_length ark:- ark:- \| \ + segmentation-split-segments \ + --max-segment-length=$max_segment_length --overlap-length=$overlap_length ark:- ark:- \| \ + segmentation-to-segments --frame-shift=$frame_shift \ + --frame-overlap=$frame_overlap ark:- \ + ark,t:$dir/utt2spk.JOB $dir/segments.JOB || exit 1 +fi + +for n in `seq $nj`; do + cat $dir/utt2spk.$n +done > $segmented_data_dir/utt2spk + +for n in `seq $nj`; do + cat $dir/segments.$n +done > $segmented_data_dir/segments + +if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ]; then + echo "$0: Segmentation failed to generate segments or utt2spk!" + exit 1 +fi diff --git a/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py b/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py new file mode 100755 index 00000000000..b539286a85b --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py @@ -0,0 +1,121 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse +import sys +import shlex + +sys.path.insert(0, 'steps') +import libs.common as common_lib + +def GetArgs(): + parser = argparse.ArgumentParser(description="""This script generates a lang +directory for purpose of segmentation. It takes as arguments the list of phones, +the corresponding min durations and end transition probability.""") + + parser.add_argument("--phone-transition-parameters", dest='phone_transition_para_array', + type=str, action='append', required=True, + help="Options to build topology. \n" + "--phone-list= # Colon-separated list of phones\n" + "--min-duration= # Min duration for the phones\n" + "--end-transition-probability= # Probability of the end transition after the minimum duration\n") + parser.add_argument("dir", type=str, + help="Output lang directory") + args = parser.parse_args() + return args + + +def ParsePhoneTransitionParameters(para_array): + parser = argparse.ArgumentParser() + + parser.add_argument("--phone-list", type=str, required=True, + help="Colon-separated list of phones") + parser.add_argument("--min-duration", type=int, default=3, + help="Minimum number of states for the phone") + parser.add_argument("--end-transition-probability", type=float, default=0.1, + help="Probability of the end transition after the minimum duration") + + phone_transition_parameters = [ parser.parse_args(shlex.split(x)) for x in para_array ] + + for t in phone_transition_parameters: + if (t.end_transition_probability > 1.0 or + t.end_transition_probability < 0.0): + raise ValueError("Expected --end-transition-probability to be " + "between 0 and 1, got {0} for phones {1}".format( + t.end_transition_probability, t.phone_list)) + if t.min_duration > 100 or t.min_duration < 1: + raise ValueError("Expected --min-duration to be " + "between 1 and 100, got {0} for phones {1}".format( + t.min_duration, t.phone_list)) + + t.phone_list = t.phone_list.split(":") + + return phone_transition_parameters + + +def get_phone_map(phone_transition_parameters): + phone2int = {} + n = 1 + for t in phone_transition_parameters: + for p in t.phone_list: + if p in phone2int: + raise Exception("Phone {0} found in multiple topologies".format(p)) + phone2int[p] = n + n += 1 + + return phone2int + + +def print_duration_constraint_states(min_duration, topo): + for state in range(0, min_duration - 1): + print(" {state} 0" + " {dest_state} 1.0 ".format( + state=state, dest_state=state + 1), + file=topo) + + +def print_topology(phone_transition_parameters, phone2int, args, topo): + for t in phone_transition_parameters: + print ("", file=topo) + print ("", file=topo) + print ("{0}".format(" ".join([str(phone2int[p]) + for p in t.phone_list])), file=topo) + print ("", file=topo) + + print_duration_constraint_states(t.min_duration, topo) + + print(" {state} 0 " + " {state} {self_prob} " + " {next_state} {next_prob} ".format( + state=t.min_duration - 1, next_state=t.min_duration, + self_prob=1 - t.end_transition_probability, + next_prob=t.end_transition_probability), file=topo) + + print(" {state} ".format(state=t.min_duration), + file=topo) # Final state + print ("", file=topo) + + +def main(): + args = GetArgs() + phone_transition_parameters = ParsePhoneTransitionParameters(args.phone_transition_para_array) + + phone2int = get_phone_map(phone_transition_parameters) + + topo = open("{0}/topo".format(args.dir), 'w') + + print ("", file=topo) + + print_topology(phone_transition_parameters, phone2int, args, topo) + + print ("", file=topo) + + phones_file = open("{0}/phones.txt".format(args.dir), 'w') + + print (" 0", file=phones_file) + + for p,n in sorted(list(phone2int.items()), key=lambda x:x[1]): + print ("{0} {1}".format(p, n), file=phones_file) + +if __name__ == '__main__': + main() diff --git a/egs/wsj/s5/steps/segmentation/internal/prepare_simple_hmm_lang.py b/egs/wsj/s5/steps/segmentation/internal/prepare_simple_hmm_lang.py new file mode 100755 index 00000000000..eae0f142668 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/prepare_simple_hmm_lang.py @@ -0,0 +1,202 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse +import logging +import os +import sys + +sys.path.insert(0, 'steps') +import libs.common as common_lib + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " + "%(funcName)s - %(levelname)s ] %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) + + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script generates a lang directory for decoding with + simple HMM model. + It needs as an input classes_info file with the + format: + , + where each pair is :. + destination-class -1 is used to represent final probabilitiy.""") + + parser.add_argument("classes_info", type=argparse.FileType('r'), + help="File with classes_info") + parser.add_argument("dir", type=str, + help="Output lang directory") + args = parser.parse_args() + return args + + +class ClassInfo(object): + def __init__(self, class_id): + self.class_id = class_id + self.start_state = -1 + self.num_states = 0 + self.initial_prob = 0 + self.self_loop_prob= 0 + self.transitions = {} + + def __str__(self): + return ("class-id={0},start-state={1},num-states={2}," + "initial-prob={3:.2f},transitions={4}".format( + self.class_id, self.start_state, self.num_states, + self.initial_prob, ' '.join( + ['{0}:{1}'.format(x,y) + for x,y in self.transitions.iteritems()]))) + + +def read_classes_info(file_handle): + classes_info = {} + + num_states = 1 + num_classes = 0 + + for line in file_handle.readlines(): + try: + parts = line.split() + class_id = int(parts[0]) + assert class_id > 0, class_id + if class_id in classes_info: + raise RuntimeError( + "Duplicate class-id {0} in file {1}".format( + class_id, file_handle.name)) + classes_info[class_id] = ClassInfo(class_id) + class_info = classes_info[class_id] + class_info.initial_prob = float(parts[1]) + class_info.self_loop_prob = float(parts[2]) + class_info.num_states = int(parts[3]) + class_info.start_state = num_states + num_states += class_info.num_states + num_classes += 1 + + if len(parts) > 4: + for part in parts[4:]: + dest_class, transition_prob = part.split(':') + dest_class = int(dest_class) + if dest_class in class_info.transitions: + logger.error( + "Duplicate transition to class-id {0}" + "in transitions".format(dest_class)) + raise RuntimeError + class_info.transitions[dest_class] = float(transition_prob) + else: + raise RuntimeError( + "No transitions out of class {0}".format(class_id)) + except Exception: + logger.error("Error processing line %s in file %s", + line, file_handle.name) + raise + + # Final state + classes_info[-1] = ClassInfo(-1) + class_info = classes_info[-1] + class_info.num_states = 1 + class_info.start_state = num_states + + for class_id, class_info in classes_info.iteritems(): + logger.info("For class %d, dot class-info %s", class_id, class_info) + + return classes_info, num_classes + + +def print_states_for_class(class_id, classes_info, topo): + class_info = classes_info[class_id] + + assert class_info.num_states > 1, class_info + + for state in range(class_info.start_state, + class_info.start_state + class_info.num_states - 1): + print(" {state} {pdf}" + " {dest_state} 1.0 ".format( + state=state, dest_state=state + 1, + pdf=class_info.class_id - 1), + file=topo) + + state = class_info.start_state + class_info.num_states - 1 + + transitions = [] + + transitions.append(" {next_state} {next_prob}".format( + next_state=state, next_prob=class_info.self_loop_prob)) + + for dest_class, prob in class_info.transitions.iteritems(): + try: + next_state = classes_info[dest_class].start_state + + transitions.append(" {next_state} {next_prob}".format( + next_state=next_state, next_prob=prob)) + except Exception: + logger.error("Failed to add transition (%d->%d).\n" + "classes_info = %s", class_id, dest_class, + class_info) + + print(" {state} {pdf} " + "{transitions} ".format( + state=state, pdf=class_id - 1, + transitions=' '.join(transitions)), file=topo) + + +def main(): + try: + args = get_args() + run(args) + except Exception: + logger.error("Failed preparing lang directory") + raise + + +def run(args): + if not os.path.exists(args.dir): + os.makedirs(args.dir) + + classes_info, num_classes = read_classes_info(args.classes_info) + + topo = open("{0}/topo".format(args.dir), 'w') + + print ("", file=topo) + print ("", file=topo) + print ("", file=topo) + print ("1", file=topo) + print ("", file=topo) + + # Print transitions from initial state (initial probs) + transitions = [] + for class_id in range(1, num_classes + 1): + class_info = classes_info[class_id] + transitions.append(" {next_state} {next_prob}".format( + next_state=class_info.start_state, + next_prob=class_info.initial_prob)) + print(" 0 {transitions} ".format( + transitions=' '.join(transitions)), file=topo) + + for class_id in range(1, num_classes + 1): + print_states_for_class(class_id, classes_info, topo) + + print(" {state} ".format( + state=classes_info[-1].start_state), file=topo) + + print ("", file=topo) + print ("", file=topo) + topo.close() + + with open('{0}/phones.txt'.format(args.dir), 'w') as phones_f: + for class_id in range(1, num_classes + 1): + print ("{0} {1}".format(class_id - 1, class_id), file=phones_f) + + common_lib.force_symlink('{0}/phones.txt'.format(args.dir), + '{0}/words.txt'.format(args.dir)) + + +if __name__ == '__main__': + main() diff --git a/egs/wsj/s5/steps/segmentation/invert_vector.pl b/egs/wsj/s5/steps/segmentation/invert_vector.pl new file mode 100755 index 00000000000..c16243a0b93 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/invert_vector.pl @@ -0,0 +1,20 @@ +#! /usr/bin/perl +use strict; +use warnings; + +while () { + chomp; + my @F = split; + my $utt = shift @F; + shift @F; + + print "$utt [ "; + for (my $i = 0; $i < $#F; $i++) { + if ($F[$i] == 0) { + print "1 "; + } else { + print 1.0/$F[$i] . " "; + } + } + print "]\n"; +} diff --git a/egs/wsj/s5/steps/segmentation/make_snr_targets.sh b/egs/wsj/s5/steps/segmentation/make_snr_targets.sh new file mode 100755 index 00000000000..71f603a690e --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/make_snr_targets.sh @@ -0,0 +1,104 @@ +#!/bin/bash + +# Copyright 2015-16 Vimal Manohar +# Apache 2.0 +set -e +set -o pipefail + +nj=4 +cmd=run.pl +stage=0 + +data_id= + +compress=true +target_type=Irm +apply_exp=false + +ali_rspecifier= +silence_phones_str=0 + +ignore_noise_dir=false + +ceiling=inf +floor=-inf + +length_tolerance=2 +transform_matrix= + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +if [ $# != 5 ]; then + echo "Usage: $0 [options] --target-type (Irm|Snr) "; + echo " or : $0 [options] --target-type FbankMask "; + echo "e.g.: $0 data/train_clean_fbank data/train_noise_fbank data/train_corrupted_hires exp/make_snr_targets/train snr_targets" + echo "options: " + echo " --nj # number of parallel jobs" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +clean_data=$1 +noise_or_noisy_data=$2 +data=$3 +tmpdir=$4 +targets_dir=$5 + +mkdir -p $targets_dir + +[ -z "$data_id" ] && data_id=`basename $data` + +utils/split_data.sh $clean_data $nj + +for n in `seq $nj`; do + utils/subset_data_dir.sh --utt-list $clean_data/split$nj/$n/utt2spk $noise_or_noisy_data $noise_or_noisy_data/subset${nj}/$n +done + +$ignore_noise_dir && utils/split_data.sh $data $nj + +targets_dir=`perl -e '($data,$pwd)= @ARGV; if($data!~m:^/:) { $data = "$pwd/$data"; } print $data; ' $targets_dir ${PWD}` + +for n in `seq $nj`; do + utils/create_data_link.pl $targets_dir/${data_id}.$n.ark +done + +apply_exp_opts= +if $apply_exp; then + apply_exp_opts=" copy-matrix --apply-exp=true ark:- ark:- |" +fi + +copy_feats_opts="copy-feats" +if [ ! -z "$transform_matrix" ]; then + copy_feats_opts="transform-feats $transform_matrix" +fi + +if [ $stage -le 1 ]; then + if ! $ignore_noise_dir; then + $cmd JOB=1:$nj $tmpdir/make_`basename $targets_dir`_${data_id}.JOB.log \ + compute-snr-targets --length-tolerance=$length_tolerance --target-type=$target_type \ + ${ali_rspecifier:+--ali-rspecifier="$ali_rspecifier" --silence-phones=$silence_phones_str} \ + --floor=$floor --ceiling=$ceiling \ + "ark:$copy_feats_opts scp:$clean_data/split$nj/JOB/feats.scp ark:- |" \ + "ark,s,cs:$copy_feats_opts scp:$noise_or_noisy_data/subset$nj/JOB/feats.scp ark:- |" \ + ark:- \|$apply_exp_opts \ + copy-feats --compress=$compress ark:- \ + ark,scp:$targets_dir/${data_id}.JOB.ark,$targets_dir/${data_id}.JOB.scp || exit 1 + else + feat_dim=$(feat-to-dim scp:$data/feats.scp -) || exit 1 + $cmd JOB=1:$nj $tmpdir/make_`basename $targets_dir`_${data_id}.JOB.log \ + compute-snr-targets --length-tolerance=$length_tolerance --target-type=$target_type \ + ${ali_rspecifier:+--ali-rspecifier="$ali_rspecifier" --silence-phones=$silence_phones_str} \ + --floor=$floor --ceiling=$ceiling --binary-targets --target-dim=$feat_dim \ + scp:$data/split$nj/JOB/feats.scp \ + ark:- \|$apply_exp_opts \ + copy-feats --compress=$compress ark:- \ + ark,scp:$targets_dir/${data_id}.JOB.ark,$targets_dir/${data_id}.JOB.scp || exit 1 + fi +fi + +for n in `seq $nj`; do + cat $targets_dir/${data_id}.$n.scp +done > $data/`basename $targets_dir`.scp diff --git a/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh b/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh new file mode 100755 index 00000000000..c1006d09678 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh @@ -0,0 +1,130 @@ +#! /bin/bash + +# Copyright 2015 Vimal Manohar +# Apache 2.0. + +set -e -o pipefail -u +. path.sh + +cmd=run.pl +stage=-10 + +segmentation_config=conf/segmentation.conf +nj=18 + +frame_shift=0.01 +weight_threshold=0.5 +ali_suffix=_acwt0.1 + +frame_subsampling_factor=1 + +phone2sad_map= + +. utils/parse_options.sh + +if [ $# -ne 5 ] && [ $# -ne 4 ]; then + echo "This script converts an alignment directory containing per-frame SAD " + echo "labels or per-frame speech probabilities into kaldi-style " + echo "segmented data directory. " + echo "This script first converts the per-frame labels or weights into " + echo "segmentation and then calls " + echo "steps/segmentation/internal/post_process_sad_to_segments.sh, " + echo "which does the actual post-processing step." + echo "Usage: $0 ( |) " + echo " e.g.: $0 data/dev_aspire_whole exp/vad_dev_aspire data/dev_aspire_seg" + exit 1 +fi + +data_dir=$1 +vad_dir= + +if [ $# -eq 5 ]; then + lang=$2 + vad_dir=$3 + shift; shift; shift +else + weights_scp=$2 + shift; shift +fi + +dir=$1 +segmented_data_dir=$2 + +utils/data/get_reco2utt.sh $data_dir + +mkdir -p $dir + +if [ ! -z "$vad_dir" ]; then + nj=`cat $vad_dir/num_jobs` || exit 1 + + utils/split_data.sh $data_dir $nj + + for n in `seq $nj`; do + cat $data_dir/split$nj/$n/segments | awk '{print $1" "$2}' | \ + utils/utt2spk_to_spk2utt.pl > $data_dir/split$nj/$n/reco2utt + done + + if [ -z "$phone2sad_map" ]; then + phone2sad_map=$dir/phone2sad_map + + { + cat $lang/phones/silence.int | awk '{print $1" 0"}'; + cat $lang/phones/nonsilence.int | awk '{print $1" 1"}'; + } | sort -k1,1 -n > $dir/phone2sad_map + fi + + frame_shift_subsampled=`perl -e "print ($frame_subsampling_factor * $frame_shift)"` + + if [ $stage -le 0 ]; then + # Convert the original SAD into segmentation + $cmd JOB=1:$nj $dir/log/segmentation.JOB.log \ + segmentation-init-from-ali \ + "ark:gunzip -c $vad_dir/ali${ali_suffix}.JOB.gz |" ark:- \| \ + segmentation-combine-segments ark:- \ + "ark:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$frame_shift_subsampled $data_dir/split$nj/JOB/segments ark:- |" \ + "ark,t:$data_dir/split$nj/JOB/reco2utt" ark:- \| \ + segmentation-copy --label-map=$phone2sad_map \ + --frame-subsampling-factor=$frame_subsampling_factor ark:- \ + "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" + fi +else + utils/split_data.sh $data_dir $nj + + for n in `seq $nj`; do + utils/data/get_reco2utt.sh $data_dir/split$nj/$n + utils/filter_scp.pl $data_dir/split$nj/$n/reco2utt $weights_scp > \ + $dir/weights.$n.scp + done + + $cmd JOB=1:$nj $dir/log/weights_to_segments.JOB.log \ + copy-vector scp:$dir/weights.JOB.scp ark,t:- \| \ + awk -v t=$weight_threshold '{printf $1; for (i=3; i < NF; i++) { if ($i >= t) printf (" 1"); else printf (" 0"); }; print "";}' \| \ + segmentation-init-from-ali \ + ark,t:- ark:- \| segmentation-combine-segments ark:- \ + "ark:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$frame_shift_subsampled $data_dir/split$nj/JOB/segments ark:- |" \ + "ark,t:$data_dir/split$nj/JOB/reco2utt" ark:- \| \ + segmentation-copy --frame-subsampling-factor=$frame_subsampling_factor \ + ark:- "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" +fi + +echo $nj > $dir/num_jobs + +if [ $stage -le 1 ]; then + rm -r $segmented_data_dir || true + utils/data/convert_data_dir_to_whole.sh $data_dir $segmented_data_dir || exit 1 + rm $segmented_data_dir/text || true +fi + +steps/segmentation/internal/post_process_segments.sh \ + --stage $stage --cmd "$cmd" \ + --config $segmentation_config --frame-shift $frame_shift \ + $data_dir $dir $segmented_data_dir + +utils/utt2spk_to_spk2utt.pl $segmented_data_dir/utt2spk > $segmented_data_dir/spk2utt || exit 1 +utils/fix_data_dir.sh $segmented_data_dir + +if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ]; then + echo "$0: Segmentation failed to generate segments or utt2spk!" + exit 1 +fi + diff --git a/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh b/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh new file mode 100755 index 00000000000..d5ad48a492f --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh @@ -0,0 +1,92 @@ +#! /bin/bash + +# Copyright 2015 Vimal Manohar +# Apache 2.0. + +set -e -o pipefail -u +. path.sh + +cmd=run.pl +stage=-10 + +segmentation_config=conf/segmentation.conf +nj=18 + +frame_subsampling_factor=1 +frame_shift=0.01 +frame_overlap=0.015 + +. utils/parse_options.sh + +if [ $# -ne 5 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/dev_aspire_whole exp/vad_dev_aspire data/dev_aspire_seg" + exit 1 +fi + +data_dir=$1 +phone2sad_map=$2 +vad_dir=$3 +dir=$4 +segmented_data_dir=$5 + +mkdir -p $dir + +nj=`cat $vad_dir/num_jobs` || exit 1 + +utils/split_data.sh $data_dir $nj + +if [ $stage -le 0 ]; then + # Convert the original SAD into segmentation + $cmd JOB=1:$nj $dir/log/segmentation.JOB.log \ + segmentation-init-from-ali \ + "ark:gunzip -c $vad_dir/ali.JOB.gz |" ark:- \| \ + segmentation-copy --label-map=$phone2sad_map \ + --frame-subsampling-factor=$frame_subsampling_factor ark:- \ + "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" +fi + +echo $nj > $dir/num_jobs + +# Create a temporary directory into which we can create the new segments +# file. +if [ $stage -le 1 ]; then + rm -r $segmented_data_dir || true + utils/data/convert_data_dir_to_whole.sh $data_dir $segmented_data_dir || exit 1 + rm $segmented_data_dir/text || true +fi + +if [ $stage -le 2 ]; then + # --frame-overlap is set to 0 to not do any additional padding when writing + # segments. This padding will be done later by the option + # --segment-end-padding to utils/data/subsegment_data_dir.sh. + steps/segmentation/internal/post_process_segments.sh \ + --stage $stage --cmd "$cmd" \ + --config $segmentation_config --frame-shift $frame_shift \ + --frame-overlap 0 \ + $data_dir $dir $segmented_data_dir +fi + +mv $segmented_data_dir/segments $segmented_data_dir/sub_segments +utils/data/subsegment_data_dir.sh --segment-end-padding `perl -e "print $frame_overlap"` \ + $data_dir $segmented_data_dir/sub_segments $segmented_data_dir +utils/fix_data_dir.sh $segmented_data_dir + +utils/data/get_reco2num_frames.sh --nj $nj --cmd "$cmd" ${data_dir} +mv $segmented_data_dir/feats.scp $segmented_data_dir/feats.scp.tmp +cat $segmented_data_dir/segments | awk '{print $1" "$2}' | \ + utils/apply_map.pl -f 2 $data_dir/reco2num_frames > \ + $segmented_data_dir/utt2max_frames +cat $segmented_data_dir/feats.scp.tmp | \ + utils/data/fix_subsegmented_feats.pl $segmented_data_dir/utt2max_frames > \ + $segmented_data_dir/feats.scp + +utils/utt2spk_to_spk2utt.pl $segmented_data_dir/utt2spk > \ + $segmented_data_dir/spk2utt || exit 1 +utils/fix_data_dir.sh $segmented_data_dir + +if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ]; then + echo "$0: Segmentation failed to generate segments or utt2spk!" + exit 1 +fi + diff --git a/egs/wsj/s5/steps/segmentation/quantize_vector.pl b/egs/wsj/s5/steps/segmentation/quantize_vector.pl new file mode 100755 index 00000000000..0bccebade4c --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/quantize_vector.pl @@ -0,0 +1,28 @@ +#!/usr/bin/perl + +# This script convert per-frame speech probabilities into +# 0-1 labels. + +@ARGV <= 1 or die "Usage: quantize_vector.pl [threshold]"; + +my $t = 0.5; + +if (scalar @ARGV == 1) { + $t = $ARGV[0]; +} + +while () { + chomp; + my @F = split; + + my $str = "$F[0]"; + for (my $i = 2; $i < $#F; $i++) { + if ($F[$i] >= $t) { + $str = "$str 1"; + } else { + $str = "$str 0"; + } + } + + print ("$str\n"); +} diff --git a/egs/wsj/s5/steps/segmentation/split_data_on_reco.sh b/egs/wsj/s5/steps/segmentation/split_data_on_reco.sh new file mode 100755 index 00000000000..4c167d99a1e --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/split_data_on_reco.sh @@ -0,0 +1,29 @@ +#! /bin/bash + +set -e + +if [ $# -ne 3 ]; then + echo "Usage: split_data_on_reco.sh " + exit 1 +fi + +ref_data=$1 +data=$2 +nj=$3 + +utils/data/get_reco2utt.sh $ref_data +utils/data/get_reco2utt.sh $data + +utils/split_data.sh --per-reco $ref_data $nj + +for n in `seq $nj`; do + srn=$ref_data/split${nj}reco/$n + dsn=$data/split${nj}reco/$n + + mkdir -p $dsn + + utils/data/get_reco2utt.sh $srn + utils/filter_scp.pl $srn/reco2utt $data/reco2utt > $dsn/reco2utt + utils/spk2utt_to_utt2spk.pl $dsn/reco2utt > $dsn/utt2reco + utils/subset_data_dir.sh --utt-list $dsn/utt2reco $data $dsn +done diff --git a/egs/wsj/s5/steps/segmentation/train_simple_hmm.py b/egs/wsj/s5/steps/segmentation/train_simple_hmm.py new file mode 100755 index 00000000000..9f581b0a520 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/train_simple_hmm.py @@ -0,0 +1,194 @@ +#! /usr/bin/env python + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +import argparse +import logging +import os +import sys + +sys.path.insert(0, 'steps') +import libs.common as common_lib + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - " + "%(funcName)s - %(levelname)s ] %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) + + +def get_args(): + """Parse command-line arguments""" + + parser = argparse.ArgumentParser( + """Train a simple HMM model starting from HMM topology.""") + + # Alignment options + parser.add_argument("--align.transition-scale", dest='transition_scale', + type=float, default=10.0, + help="""Transition-probability scale [relative to + acoustics]""") + parser.add_argument("--align.self-loop-scale", dest='self_loop_scale', + type=float, default=1.0, + help="""Scale on self-loop versus non-self-loop log + probs [relative to acoustics]""") + parser.add_argument("--align.beam", dest='beam', + type=float, default=6, + help="""Decoding beam used in alignment""") + + # Training options + parser.add_argument("--training.num-iters", dest='num_iters', + type=int, default=30, + help="""Number of iterations of training""") + parser.add_argument("--training.use-soft-counts", dest='use_soft_counts', + type=str, action=common_lib.StrToBoolAction, + choices=["true", "false"], default=False, + help="""Use soft counts (posteriors) instead of + alignments""") + + # General options + parser.add_argument("--scp2ark-cmd", type=str, + default="copy-int-vector scp:- ark:- |", + help="The command used to convert scp from stdin to " + "write archive to stdout") + parser.add_argument("--cmd", dest='command', type=str, + default="run.pl", + help="Command used to run jobs") + parser.add_argument("--stage", type=int, default=-10, + help="""Stage to run training from""") + + parser.add_argument("--data", type=str, required=True, + help="Data directory; primarily used for splitting") + + labels_group = parser.add_mutually_exclusive_group(required=True) + labels_group.add_argument("--labels-scp", type=str, + help="Input labels that must be convert to alignment " + "of class-ids using --scp2ark-cmd") + labels_group.add_argument("--labels-rspecifier", type=str, + help="Input labels rspecifier") + + parser.add_argument("--lang", type=str, required=True, + help="The language directory containing the " + "HMM Topology file topo") + parser.add_argument("--loglikes-dir", type=str, required=True, + help="Directory containing the log-likelihoods") + parser.add_argument("--dir", type=str, required=True, + help="Directory where the intermediate and final " + "models will be written") + + args = parser.parse_args() + + if args.use_soft_counts: + raise NotImplementedError("--use-soft-counts not supported yet!") + + return args + + +def check_files(args): + """Check files required for this script""" + + files = ("{lang}/topo {data}/utt2spk " + "{loglikes_dir}/log_likes.1.gz {loglikes_dir}/num_jobs " + "".format(lang=args.lang, data=args.data, + loglikes_dir=args.loglikes_dir).split()) + + if args.labels_scp is not None: + files.append(args.labels_scp) + + for f in files: + if not os.path.exists(f): + logger.error("Could not find file %s", f) + raise RuntimeError + + +def run(args): + """The function that does it all""" + + check_files(args) + + if args.stage <= -2: + logger.info("Initializing simple HMM model") + common_lib.run_kaldi_command( + """{cmd} {dir}/log/init.log simple-hmm-init {lang}/topo """ + """ {dir}/0.mdl""".format(cmd=args.command, dir=args.dir, + lang=args.lang)) + + num_jobs = common_lib.get_number_of_jobs(args.loglikes_dir) + split_data = common_lib.split_data(args.data, num_jobs) + + if args.labels_rspecifier is not None: + labels_rspecifier = args.labels_rspecifier + else: + labels_rspecifier = ("ark:utils/filter_scp.pl {sdata}/JOB/utt2spk " + "{labels_scp} | {scp2ark_cmd}".format( + sdata=split_data, labels_scp=args.labels_scp, + scp2ark_cmd=args.scp2ark_cmd)) + + if args.stage <= -1: + logger.info("Compiling training graphs") + common_lib.run_kaldi_command( + """{cmd} JOB=1:{nj} {dir}/log/compile_graphs.JOB.log """ + """ compile-train-simple-hmm-graphs {dir}/0.mdl """ + """ "{labels_rspecifier}" """ + """ "ark:| gzip -c > {dir}/fsts.JOB.gz" """.format( + cmd=args.command, nj=num_jobs, + dir=args.dir, lang=args.lang, + labels_rspecifier=labels_rspecifier)) + + scale_opts = ("--transition-scale={tscale} --self-loop-scale={loop_scale}" + "".format(tscale=args.transition_scale, + loop_scale=args.self_loop_scale)) + + for iter_ in range(0, args.num_iters): + if args.stage > iter_: + continue + + logger.info("Training iteration %d", iter_) + + common_lib.run_kaldi_command( + """{cmd} JOB=1:{nj} {dir}/log/align.{iter}.JOB.log """ + """ simple-hmm-align-compiled {scale_opts} """ + """ --beam={beam} --retry-beam={retry_beam} {dir}/{iter}.mdl """ + """ "ark:gunzip -c {dir}/fsts.JOB.gz |" """ + """ "ark:gunzip -c {loglikes_dir}/log_likes.JOB.gz |" """ + """ ark:- \| """ + """ simple-hmm-acc-stats-ali {dir}/{iter}.mdl ark:- """ + """ {dir}/{iter}.JOB.acc""".format( + cmd=args.command, nj=num_jobs, dir=args.dir, iter=iter_, + scale_opts=scale_opts, beam=args.beam, + retry_beam=args.beam * 4, loglikes_dir=args.loglikes_dir)) + + common_lib.run_kaldi_command( + """{cmd} {dir}/log/update.{iter}.log """ + """ simple-hmm-est {dir}/{iter}.mdl """ + """ "vector-sum {dir}/{iter}.*.acc - |" """ + """ {dir}/{new_iter}.mdl""".format( + cmd=args.command, dir=args.dir, iter=iter_, + new_iter=iter_ + 1)) + + common_lib.run_kaldi_command( + "rm {dir}/{iter}.*.acc".format(dir=args.dir, iter=iter_)) + # end train loop + + common_lib.force_symlink("{0}.mdl".format(args.num_iters), + "{0}/final.mdl".format(args.dir)) + + logger.info("Done training simple HMM in %s/final.mdl", args.dir) + + +def main(): + try: + args = get_args() + run(args) + except Exception: + logger.error("Failed training models") + raise + + +if __name__ == '__main__': + main() diff --git a/egs/wsj/s5/steps/segmentation/vector_get_max.pl b/egs/wsj/s5/steps/segmentation/vector_get_max.pl new file mode 100644 index 00000000000..abb8ea977a2 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/vector_get_max.pl @@ -0,0 +1,26 @@ +#! /usr/bin/perl + +use warnings; +use strict; + +while (<>) { + chomp; + if (m/^\S+\s+\[.+\]\s*$/) { + my @F = split; + my $utt = shift @F; + shift; + + my $max_id = 0; + my $max = $F[0]; + for (my $i = 1; $i < $#F; $i++) { + if ($F[$i] > $max) { + $max_id = $i; + $max = $F[$i]; + } + } + + print "$utt $max_id\n"; + } else { + die "Invalid line $_\n"; + } +} diff --git a/egs/wsj/s5/steps/select_feats.sh b/egs/wsj/s5/steps/select_feats.sh index 072dd3194cf..e77c9b53630 100755 --- a/egs/wsj/s5/steps/select_feats.sh +++ b/egs/wsj/s5/steps/select_feats.sh @@ -2,6 +2,9 @@ # Copyright 2014 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 + +# This script is deprecated. Use utils/data/limit_feature_dim.sh. + # This script selects some specified dimensions of the features in the # input data directory. diff --git a/egs/wsj/s5/steps/shift_feats.sh b/egs/wsj/s5/steps/shift_feats.sh index 9ad85368c3f..ada5716f187 100755 --- a/egs/wsj/s5/steps/shift_feats.sh +++ b/egs/wsj/s5/steps/shift_feats.sh @@ -3,11 +3,15 @@ # Copyright 2016 Vimal Manohar # Apache 2.0 +# This script is deprecated. The newer script utils/data/shift_feats.sh +# should be used instead. + # This script shifts the feats in the input data directory and creates a # new directory _fs with shifted feats. -# If the shift is negative, the initial frames get truncated. -# If the shift is positive, the first frame is repeated. -# Usually applicable for sequence training +# If the shift is negative, the initial frames get truncated and the +# last frame repeated; if positive, vice versa. +# Used to prepare data for sequence training of models with +# frame_subsampling_factor != 1 (e.g. chain models). # To be run from .. (one directory up from here) # see ../run.sh for example @@ -24,6 +28,8 @@ if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -ne 4 ]; then + echo "This script is deprecated. The newer script utils/data/shift_feats.sh" + echo "should be used instead." echo "usage: $0 [options] "; echo "e.g.: $0 -1 data/train exp/shift-1_train mfcc" echo "options: " @@ -82,4 +88,3 @@ if [ $nf -ne $nu ]; then fi echo "Succeeded shifting features for $name into $data" - diff --git a/egs/wsj/s5/steps/tandem/align_sgmm.sh b/egs/wsj/s5/steps/tandem/align_sgmm.sh deleted file mode 100755 index bb3ba79bd9f..00000000000 --- a/egs/wsj/s5/steps/tandem/align_sgmm.sh +++ /dev/null @@ -1,236 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) -# Korbinian Riedhammer -# Apache 2.0 - -# Computes training alignments and (if needed) speaker-vectors, given an -# SGMM system. If the system is built on top of SAT, you should supply -# transforms with the --transform-dir option. - -# If you supply the --use-graphs option, it will use the training -# graphs from the source directory. - -# Begin configuration section. -stage=0 -nj=4 -cmd=run.pl -use_graphs=false # use graphs from srcdir -use_gselect=false # use gselect info from srcdir [regardless, we use - # Gaussian-selection info, we might have to compute it though.] -gselect=15 # Number of Gaussian-selection indices for SGMMs. -# Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" -beam=10 -retry_beam=40 -transform_dir= # directory to find fMLLR transforms in. -# End configuration options. - -echo "$0 $@" # Print the command line for logging - -[ -f path.sh ] && . ./path.sh # source the path. -. parse_options.sh || exit 1; - -if [ $# != 5 ]; then - echo "usage: steps/tandem/align_sgmm.sh " - echo "e.g.: steps/tandem/align_sgmm.sh --transform-dir exp/tri3b data1/train data1/lang \\" - echo " exp/sgmm4a exp/sgmm5a_ali" - echo "main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --nj # number of parallel jobs" - echo " --use-graphs true # use graphs in src-dir" - echo " --transform-dir # directory to find fMLLR transforms" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - exit 1; -fi - -data1=$1 -data2=$2 -lang=$3 -srcdir=$4 -dir=$5 - -oov=`cat $lang/oov.int` || exit 1; -silphonelist=`cat $lang/phones/silence.csl` || exit 1; - -mkdir -p $dir/log -echo $nj > $dir/num_jobs - -utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -## Set up features. - -sdata1=$data1/split$nj -sdata2=$data2/split$nj -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - -cp $srcdir/{tree,final.mdl} $dir || exit 1; -[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir -cp $srcdir/final.occs $dir; - -## Set up features. -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. -normft2=`cat $srcdir/normft2 2>/dev/null` - -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $srcdir/{lda,final}.mat $dir/ || exit 1; - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null - -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1; - [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \ - && echo "$0: #jobs mismatch with transform-dir." && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then - echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms," - echo " but you are not providing the --transform-dir option during alignment." -fi -## - -## Set up model and alignment model. -mdl=$srcdir/final.mdl -if [ -f $srcdir/final.alimdl ]; then - alimdl=$srcdir/final.alimdl -else - alimdl=$srcdir/final.mdl -fi -[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1; - -## Work out where we're getting the graphs from. -if $use_graphs; then - [ "$nj" != "`cat $srcdir/num_jobs`" ] && \ - echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1; - [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1; - graphdir=$srcdir - ln.pl $srcdir/fsts.*.gz $dir -else - graphdir=$dir - if [ $stage -le 0 ]; then - echo "$0: compiling training graphs" - tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata1/JOB/text|"; - $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ - compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" \ - "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; - fi -fi - -## Work out where we're getting the Gaussian-selection info from -if $use_gselect; then - [ "$nj" != "`cat $srcdir/num_jobs`" ] && \ - echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1; - [ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1; - graphdir=$srcdir - gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|" - ln.pl $srcdir/gselect.*.gz $dir -else - graphdir=$dir - if [ $stage -le 1 ]; then - echo "$0: computing Gaussian-selection info" - # Note: doesn't matter whether we use $alimdl or $mdl, they will - # have the same gselect info. - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect --full-gmm-nbest=$gselect $alimdl \ - "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1; - fi - gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|" -fi - - -if [ $alimdl == $mdl ]; then - # Speaker-independent decoding-- just one pass. Not normal. - T=`sgmm-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1; - [ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1; - - if [ $stage -le 2 ]; then - echo "$0: aligning data in $data using model $mdl (no speaker-vectors)" - $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \ - sgmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \ - "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; - fi - echo "$0: done aligning data." - exit 0; -fi - -# Continue with system with speaker vectors. -if [ $stage -le 2 ]; then - echo "$0: aligning data in $data using model $alimdl" - $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \ - sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \ - "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1; -fi - -if [ $stage -le 3 ]; then - echo "$0: computing speaker vectors (1st pass)" - $cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \ - ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \ - sgmm-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \ - sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata1/JOB/spk2utt \ - $mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1; -fi - -if [ $stage -le 4 ]; then - echo "$0: computing speaker vectors (2nd pass)" - $cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \ - ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" \ - --spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1; - rm $dir/pre_vecs.* -fi - -if [ $stage -le 5 ]; then - echo "$0: doing final alignment." - $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \ - sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \ - --utt2spk=ark:$sdata1/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \ - $mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; -fi - -rm $dir/pre_ali.*.gz - -echo "$0: done aligning data." - -utils/summarize_warnings.pl $dir/log - -exit 0; diff --git a/egs/wsj/s5/steps/tandem/decode_sgmm.sh b/egs/wsj/s5/steps/tandem/decode_sgmm.sh deleted file mode 100755 index c980bf13f4f..00000000000 --- a/egs/wsj/s5/steps/tandem/decode_sgmm.sh +++ /dev/null @@ -1,303 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. -# Korbinian Riedhammer - -# This script does decoding with an SGMM system, with speaker vectors. -# If the SGMM system was -# built on top of fMLLR transforms from a conventional system, you should -# provide the --transform-dir option. - -# Begin configuration section. -stage=1 -alignment_model= -transform_dir= # dir to find fMLLR transforms. -nj=4 # number of decoding jobs. -acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. -cmd=run.pl -beam=15.0 -gselect=15 # Number of Gaussian-selection indices for SGMMs. [Note: - # the first_pass_gselect variable is used for the 1st pass of - # decoding and can be tighter. -first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in - # the 1st pass of decoding (lattice generation). -max_active=7000 - -#WARNING: This option is renamed lattice_beam (it was renamed to follow the naming -# in the other scripts -lattice_beam=8.0 # Beam we use in lattice generation. -vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for - # speaker-vector computation. Can be quite tight (actually we could - # probably just do best-path. -use_fmllr=false -fmllr_iters=10 -fmllr_min_count=1000 -skip_scoring=false -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# -ne 4 ]; then - echo "Usage: steps/tandem/decode_sgmm.sh [options] " - echo " e.g.: steps/tandem/decode_sgmm.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\" - echo " exp/sgmm3a/graph_tgpr {mfcc,bottleneck}/data/test_dev93 exp/sgmm3a/decode_dev93_tgpr" - echo "main options (for others, see top of script file)" - echo " --transform-dir # directory of previous decoding" - echo " # where we can find transforms for SAT systems." - echo " --alignment-model # Model for the first-pass decoding." - echo " --config # config containing options" - echo " --nj # number of parallel jobs" - echo " --cmd # Command to run in parallel with" - echo " --beam # Decoding beam; default 13.0" - exit 1; -fi - -graphdir=$1 -data1=$2 -data2=$3 -dir=$4 -srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory. - -for f in $graphdir/HCLG.fst $data1/feats.scp $data2/feats.scp $srcdir/final.mdl; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done - -silphonelist=`cat $graphdir/phones/silence.csl` || exit 1 -gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|" -gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |" - -mkdir -p $dir/log -echo $nj > $dir/num_jobs - -sdata1=$data1/split$nj; -sdata2=$data2/split$nj; -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - - -## Set up features. - -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. -normft2=`cat $srcdir/normft2 2>/dev/null` - -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $srcdir/{lda,final}.mat $dir/ - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - echo "Using cmvn for feats2" - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null - -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1; - [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \ - && echo "$0: #jobs mismatch with transform-dir." && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then - echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms," - echo " but you are not providing the --transform-dir option in test time." -fi -## - - -## Calculate FMLLR pre-transforms if needed. We are doing this here since this -## step is requried by models both with and without speaker vectors -if $use_fmllr; then - if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then - echo "$0: computing pre-transform for fMLLR computation." - sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1; - fi -fi - -## Save Gaussian-selection info to disk. -# Note: we can use final.mdl regardless of whether there is an alignment model-- -# they use the same UBM. -if [ $stage -le 1 ]; then - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \ - "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1; -fi - -## Work out name of alignment model. ## -if [ -z "$alignment_model" ]; then - if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl; - else alignment_model=$srcdir/final.mdl; fi -fi -[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1; - -# Generate state-level lattice which we can rescore. This is done with the -# alignment model and no speaker-vectors. -if [ $stage -le 2 ]; then - $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \ - sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ - --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \ - --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $alignment_model \ - $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1; -fi - -## Check if the model has speaker vectors -spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'` - -if [ $spkdim -gt 0 ]; then ### For models with speaker vectors: - -# Estimate speaker vectors (1st pass). Prune before determinizing -# because determinization can take a while on un-pruned lattices. -# Note: the sgmm-post-to-gpost stage is necessary because we have -# a separate alignment-model and final model, otherwise we'd skip it -# and use sgmm-est-spkvecs. - if [ $stage -le 3 ]; then - $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \ - sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \ - sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata1/JOB/spk2utt \ - $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1; - fi - -# Estimate speaker vectors (2nd pass). Since we already have spk vectors, -# at this point we need to rescore the lattice to get the correct posteriors. - if [ $stage -le 4 ]; then - $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \ - $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1; - fi - rm $dir/pre_vecs.* - - if $use_fmllr; then - # Estimate fMLLR transforms (note: these may be on top of any - # fMLLR transforms estimated with the baseline GMM system. - if [ $stage -le 5 ]; then # compute fMLLR transforms. - echo "$0: computing fMLLR transforms." - $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-fmllr --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \ - --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \ - $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1; - fi - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" - fi - -# Now rescore the state-level lattices with the adapted features and the -# corresponding model. Prune and determinize the lattices to limit -# their size. - if [ $stage -le 6 ]; then - $cmd JOB=1:$nj $dir/log/rescore.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata1/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \ - $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ - "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - fi - rm $dir/pre_lat.*.gz - -else ### For models without speaker vectors: - - if $use_fmllr; then - # Estimate fMLLR transforms (note: these may be on top of any - # fMLLR transforms estimated with the baseline GMM system. - if [ $stage -le 5 ]; then # compute fMLLR transforms. - echo "$0: computing fMLLR transforms." - $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --utt2spk=ark:$sdata1/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-fmllr --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" \ - --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \ - $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1; - fi - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" - fi - -# Now rescore the state-level lattices with the adapted features and the -# corresponding model. Prune and determinize the lattices to limit -# their size. - if [ $stage -le 6 ] && $use_fmllr; then - $cmd JOB=1:$nj $dir/log/rescore.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata1/JOB/utt2spk \ - $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ - "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - rm $dir/pre_lat.*.gz - else # Already done with decoding if no adaptation needed. - for n in `seq 1 $nj`; do - mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz - done - fi - -fi - -# The output of this script is the files "lat.*.gz"-- we'll rescore this at -# different acoustic scales to get the final output. - - -if [ $stage -le 7 ]; then - if ! $skip_scoring ; then - [ ! -x local/score.sh ] && \ - echo "Not scoring because local/score.sh does not exist or not executable." && exit 1; - echo "score best paths" - local/score.sh --cmd "$cmd" $data $graphdir $dir || - { echo "$0: Scoring failed. (ignore by '--skip-scoring true')"; exit 1; } - # echo "score confidence and timing with sclite" - # local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $graphdir $dir - fi -fi -echo "Decoding done." -exit 0; diff --git a/egs/wsj/s5/steps/tandem/make_denlats_sgmm.sh b/egs/wsj/s5/steps/tandem/make_denlats_sgmm.sh deleted file mode 100755 index 6ee4609fb48..00000000000 --- a/egs/wsj/s5/steps/tandem/make_denlats_sgmm.sh +++ /dev/null @@ -1,199 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. -# Korbinian Riedhammer - -# Create denominator lattices for MMI/MPE training, with SGMM models. If the -# features have fMLLR transforms you have to supply the --transform-dir option. -# It gets any speaker vectors from the "alignment dir" ($srcdir). Note: this is -# possibly a slight mismatch because the speaker vectors come from supervised -# adaptation. - -# Begin configuration section. -nj=4 -cmd=run.pl -sub_split=1 -beam=13.0 -lattice_beam=7.0 -acwt=0.1 -max_active=5000 -transform_dir= -max_mem=20000000 # This will stop the processes getting too large. -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# != 5 ]; then - echo "Usage: steps/tandem/make_denlats_sgmm.sh [options] " - echo " e.g.: steps/tandem/make_denlats_sgmm.sh {mfcc,bottleneck}/data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats" - echo "Works for (delta|lda) features, and (with --transform-dir option) such features" - echo " plus transforms." - echo "" - echo "Main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --sub-split # e.g. 40; use this for " - echo " # large databases so your jobs will be smaller and" - echo " # will (individually) finish reasonably soon." - echo " --transform-dir # directory to find fMLLR transforms." - exit 1; -fi - -data1=$1 -data2=$2 -lang=$3 -srcdir=$4 # could also be $srcdir, but only if no vectors supplied. -dir=$5 - -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` -normft2=`cat $srcdir/normft2 2>/dev/null` -mkdir -p $dir/log - -utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; - -sdata1=$data1/split$nj -sdata2=$data2/split$nj -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - -echo $nj > $dir/num_jobs - -oov=`cat $lang/oov.int` || exit 1; - -mkdir -p $dir - -cp -r $lang $dir/ - -# Compute grammar FST which corresponds to unigram decoding graph. - -cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \ - awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \ - utils/make_unigram_grammar.pl | fstcompile > $dir/lang/G.fst \ - || exit 1; - -# mkgraph.sh expects a whole directory "lang", so put everything in one directory... -# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and -# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph. - -if [ -s $dir/dengraph/HCLG.fst ]; then - echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation." -else - utils/mkgraph.sh $dir/lang $srcdir $dir/dengraph || exit 1; -fi - -# Set up features -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $srcdir/{lda,final}.mat $dir/ || exit 1; - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null - - -if [ ! -z "$transform_dir" ]; then # add transforms to features... - echo "$0: using fMLLR transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist." - [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \ - && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1; - [ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \ - echo "$0: LDA transforms differ between $srcdir and $transform_dir" - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |" -else - echo "Assuming you don't have a SAT system, since no --transform-dir option supplied " -fi - -if [ -f $srcdir/gselect.1.gz ]; then - gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|" -else - echo "$0: no such file $srcdir/gselect.1.gz" && exit 1; -fi - -if [ -f $srcdir/vecs.1 ]; then - spkvecs_opt="--spk-vecs=ark:$srcdir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk" -else - if [ -f $srcdir/final.alimdl ]; then - echo "You seem to have an SGMM system with speaker vectors," - echo "yet we can't find speaker vectors. Perhaps you supplied" - echo "the model director instead of the alignment directory?" - exit 1; - fi -fi - -if [ $sub_split -eq 1 ]; then - $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \ - sgmm-latgen-faster $spkvecs_opt "$gselect_opt" --beam=$beam \ - --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ - --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ - $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; -else - for n in `seq $nj`; do - if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $srcdir/final.mdl ]; then - echo "Not processing subset $n as already done (delete $dir/.done.$n if not)"; - else - ssdata1=$data1/split$nj/$n/split${sub_split}utt; - split_data.sh --per-utt $sdata1/$n $sub_split || exit 1; - ssdata2=$data2/split$nj/$n/split${sub_split}utt; - split_data.sh --per-utt $sdata2/$n $sub_split || exit 1; - mkdir -p $dir/log/$n - mkdir -p $dir/part - feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split${sub_split}utt/JOB/:g` - spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"` - gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"` - $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ - sgmm-latgen-faster $spkvecs_opt_subset "$gselect_opt_subset" \ - --beam=$beam --lattice-beam=$lattice_beam \ - --acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \ - --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ - $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1; - echo Merging archives for data subset $n - rm $dir/.error 2>/dev/null; - for k in `seq $sub_split`; do - gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error; - done | gzip -c > $dir/lat.$n.gz || touch $dir/.error; - [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1; - rm $dir/lat.$n.*.gz - touch $dir/.done.$n - fi - done -fi - - -echo "$0: done generating denominator lattices with SGMMs." diff --git a/egs/wsj/s5/steps/tandem/train_mmi_sgmm.sh b/egs/wsj/s5/steps/tandem/train_mmi_sgmm.sh deleted file mode 100755 index 3077fbceef3..00000000000 --- a/egs/wsj/s5/steps/tandem/train_mmi_sgmm.sh +++ /dev/null @@ -1,193 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. -# Korbinian Riedhammer - -# MMI training (or optionally boosted MMI, if you give the --boost option), -# for SGMMs. 4 iterations (by default) of Extended Baum-Welch update. -# -# Begin configuration section. -cmd=run.pl -num_iters=4 -boost=0.0 -cancel=true # if true, cancel num and den counts on each frame. -acwt=0.1 -stage=0 - -update_opts= -transform_dir= -# End configuration section - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# -ne 6 ]; then - echo "Usage: steps/tandem/train_mmi_sgmm.sh " - echo " e.g.: steps/tandem/train_mmi_sgmm.sh {mfcc,bottleneck}/data1/train_si84 data1/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi" - echo "Main options (for others, see top of script file)" - echo " --boost # (e.g. 0.1), for boosted MMI. (default 0)" - echo " --cancel (true|false) # cancel stats (true by default)" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --config # config containing options" - echo " --stage # stage to do partial re-run from." - echo " --transform-dir # directory to find fMLLR transforms." - exit 1; -fi - -data1=$1 -data2=$2 -lang=$3 -alidir=$4 -denlatdir=$5 -dir=$6 -mkdir -p $dir/log - -utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -for f in $data1/feats.scp $data2/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done -nj=`cat $alidir/num_jobs` || exit 1; -[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \ - echo "$alidir and $denlatdir have different num-jobs" && exit 1; - -mkdir -p $dir/log -echo $nj > $dir/num_jobs - -cp $alidir/{final.mdl,tree} $dir -silphonelist=`cat $lang/phones/silence.csl` || exit 1; - -# Set up features - -sdata1=$data1/split$nj -sdata2=$data2/split$nj -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - -splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options. -normft2=`cat $alidir/normft2 2>/dev/null` - -if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $alidir/{lda,final}.mat $dir/ || exit 1; - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $alidir/{splice_opts,normft2,tandem} $dir 2>/dev/null - - -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \ - && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -else - echo "$0: no fMLLR transforms." -fi - -if [ -f $alidir/vecs.1 ]; then - echo "$0: using speaker vectors from $alidir" - spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk" -else - echo "$0: no speaker vectors." - spkvecs_opt= -fi - -if [ -f $alidir/gselect.1.gz ]; then - echo "$0: using Gaussian-selection info from $alidir" - gselect_opt="--gselect=ark:gunzip -c $alidir/gselect.JOB.gz|" -else - echo "$0: error: no Gaussian-selection info found" && exit 1; -fi - -lats="ark:gunzip -c $denlatdir/lat.JOB.gz|" -if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then - lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |" -fi - - -cur_mdl=$alidir/final.mdl -x=0 -while [ $x -lt $num_iters ]; do - echo "Iteration $x of MMI training" - # Note: the num and den states are accumulated at the same time, so we - # can cancel them per frame. - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" $spkvecs_opt $cur_mdl "$lats" "$feats" ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - sum-post --merge=$cancel --scale1=-1 \ - ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \ - sgmm-acc-stats2 "$gselect_opt" $spkvecs_opt $cur_mdl "$feats" ark,s,cs:- \ - $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1; - - n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`; - [ "$n" -ne $[$nj*2] ] && \ - echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1; - $cmd $dir/log/den_acc_sum.$x.log \ - sgmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1; - rm $dir/den_acc.$x.*.acc - $cmd $dir/log/num_acc_sum.$x.log \ - sgmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1; - rm $dir/num_acc.$x.*.acc - - $cmd $dir/log/update.$x.log \ - sgmm-est-ebw $update_opts $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1; - fi - cur_mdl=$dir/$[$x+1].mdl - - - # Some diagnostics: the objective function progress and auxiliary-function - # improvement. Note: this code is same as in train_mmi.sh - tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while() { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1); $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf - objf=`cat $dir/tmpf | awk '{print $1}'`; - nf=`cat $dir/tmpf | awk '{print $2}'`; - rm $dir/tmpf - impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'` - impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames. - echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log - x=$[$x+1] -done - -echo "MMI training finished" - -rm $dir/final.mdl 2>/dev/null -ln -s $x.mdl $dir/final.mdl - -exit 0; diff --git a/egs/wsj/s5/steps/tandem/train_sgmm.sh b/egs/wsj/s5/steps/tandem/train_sgmm.sh deleted file mode 100755 index 48f392141a1..00000000000 --- a/egs/wsj/s5/steps/tandem/train_sgmm.sh +++ /dev/null @@ -1,315 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. -# Korbinian Riedhammer - -# SGMM training, with speaker vectors. This script would normally be called on -# top of fMLLR features obtained from a conventional system, but it also works -# on top of any type of speaker-independent features (based on -# deltas+delta-deltas or LDA+MLLT). For more info on SGMMs, see the paper "The -# subspace Gaussian mixture model--A structured model for speech recognition". -# (Computer Speech and Language, 2011). - -# Begin configuration section. -nj=4 -cmd=run.pl -stage=-6 -context_opts= # e.g. set it to "--context-width=5 --central-position=2" for a -# quinphone system. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" -num_iters=25 # Total number of iterations -num_iters_alimdl=3 # Number of iterations for estimating alignment model. -max_iter_inc=15 # Last iter to increase #substates on. -realign_iters="5 10 15"; # Iters to realign on. -spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on. -increase_dim_iters="6 8"; # Iters on which to increase phn dim and/or spk dim; - # rarely necessary, and if it is, only the 1st will normally be necessary. -rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training. -phn_dim= # You can use this to set the phonetic subspace dim. [default: feat-dim+1] -spk_dim= # You can use this to set the speaker subspace dim. [default: feat-dim] -power=0.2 # Exponent for number of gaussians according to occurrence counts -beam=8 -retry_beam=40 -cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves -normft2=true -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -if [ -f path.sh ]; then . ./path.sh; fi -. parse_options.sh || exit 1; - - -if [ $# != 8 ]; then - echo "Usage: steps/tandem/train_sgmm.sh " - echo " e.g.: steps/tandem/train_sgmm.sh 3500 10000 {mfcc,bottleneck},data/train_si84 data/lang \\" - echo " exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a" - echo "main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --silence-weight # weight for silence (e.g. 0.5 or 0.0)" - echo " --num-iters <#iters> # Number of iterations of E-M" - exit 1; -fi - - -num_leaves=$1 -totsubstates=$2 -data1=$3 -data2=$4 -lang=$5 -alidir=$6 -ubm=$7 -dir=$8 - -# Check some files. -for f in $data1/feats.scp $data2/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done - - -# Set some variables. -oov=`cat $lang/oov.int` -silphonelist=`cat $lang/phones/silence.csl` -numsubstates=$num_leaves # Initial #-substates. -incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates -feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1; -[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric. -[ -z $phn_dim ] && phn_dim=$[$feat_dim+1] -[ -z $spk_dim ] && spk_dim=$feat_dim -nj=`cat $alidir/num_jobs` || exit 1; - -mkdir -p $dir/log -echo $nj > $dir/num_jobs - -utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -sdata1=$data1/split$nj; -sdata2=$data2/split$nj; -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - -spkvecs_opt= # Empty option for now, until we estimate the speaker vectors. -gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" - -## Set up features. -splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options. -normft2=`cat $alidir/normft2 2>/dev/null` - -if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $alidir/{lda,final}.mat $dir/ || exit 1; - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $alidir/{splice_opts,normft2,tandem} $dir 2>/dev/null - -if [ -f $alidir/trans.1 ]; then - echo "$0: using transforms from $alidir" - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |" -fi -## - - -if [ $stage -le -6 ]; then - echo "$0: accumulating tree stats" - $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \ - acc-tree-stats --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \ - "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1; - [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1; - sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1; - rm $dir/*.treeacc -fi - -if [ $stage -le -5 ]; then - echo "$0: Getting questions for tree clustering." - # preparing questions, roots file... - cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1; - cat $lang/phones/extra_questions.int >> $dir/questions.int - compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1; - - echo "$0: Building the tree" - $cmd $dir/log/build_tree.log \ - build-tree --verbose=1 --max-leaves=$num_leaves \ - --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \ - $dir/questions.qst $lang/topo $dir/tree || exit 1; -fi - -if [ $stage -le -4 ]; then - echo "$0: Initializing the model" - # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims - # will be truncated on initialization. - $cmd $dir/log/init_sgmm.log \ - sgmm-init --phn-space-dim=$phn_dim --spk-space-dim=$spk_dim $lang/topo \ - $dir/tree $ubm $dir/0.mdl || exit 1; -fi - -if [ $stage -le -3 ]; then - echo "$0: doing Gaussian selection" - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect $dir/0.mdl "$feats" \ - "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1; -fi - -if [ $stage -le -2 ]; then - echo "$0: compiling training graphs" - text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata1/JOB/text|" - $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ - compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/0.mdl $lang/L.fst \ - "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; -fi - -if [ $stage -le -1 ]; then - echo "$0: Converting alignments" - $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \ - convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \ - "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; -fi - -x=0 -while [ $x -lt $num_iters ]; do - echo "$0: training pass $x ... " - if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then - echo "$0: re-aligning data" - $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ - sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \ - --utt2spk=ark:$sdata1/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \ - $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \ - "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; - fi - if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \ - ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata1/JOB/spk2utt \ - $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \ - ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1; - fi - spkvecs_opt[$n]="--spk-vecs=ark:$dir/vecs.JOB" - fi - if [ $x -eq 0 ]; then - flags=vwcSt # on the first iteration, don't update projections M or N - elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then - # Update N if we have speaker-vector space and x is odd, - # and we've already updated the speaker vectors... - flags=vNwcSt - else - # otherwise update M. - flags=vMwcSt - fi - - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ - sgmm-acc-stats $spkvecs_opt --utt2spk=ark:$sdata1/JOB/utt2spk \ - --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \ - $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \ - $dir/$x.JOB.acc || exit 1; - fi - - # The next option is needed if the user specifies a phone or speaker sub-space - # dimension that's higher than the "normal" one. - increase_dim_opts= - if echo $increase_dim_iters | grep -w $x >/dev/null; then - increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim" - # Note: the command below might have a null effect on some iterations. - if [ $spk_dim -gt $feat_dim ]; then - cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \ - copy-vector --print-args=false --change-dim=$spk_dim \ - ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \ - mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1; - fi - fi - - if [ $stage -le $x ]; then - $cmd $dir/log/update.$x.log \ - sgmm-est --update-flags=$flags --split-substates=$numsubstates $increase_dim_opts \ - --power=$power --write-occs=$dir/$[$x+1].occs $dir/$x.mdl "sgmm-sum-accs - $dir/$x.*.acc|" \ - $dir/$[$x+1].mdl || exit 1; - rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null - fi - - if [ $x -lt $max_iter_inc ]; then - numsubstates=$[$numsubstates+$incsubstates] - fi - x=$[$x+1]; -done - -rm $dir/final.mdl $dir/final.occs 2>/dev/null -ln -s $x.mdl $dir/final.mdl -ln -s $x.occs $dir/final.occs - -if [ $spk_dim -gt 0 ]; then - # We need to create an "alignment model" that's been trained - # without the speaker vectors, to do the first-pass decoding with. - # in test time. - - # We do this for a few iters, in this recipe. - final_mdl=$dir/$x.mdl - cur_alimdl=$dir/$x.mdl - while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do - echo "$0: building alignment model (pass $x)" - if [ $x -eq $num_iters ]; then # 1st pass of building alimdl. - flags=MwcS # don't update v the first time. Note-- we never update transitions. - # they wouldn't change anyway as we use the same alignment as previously. - else - flags=vMwcS - fi - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \ - ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ - sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \ - --utt2spk=ark:$sdata1/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \ - sgmm-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \ - $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1; - $cmd $dir/log/update_ali.$x.log \ - sgmm-est --update-flags=$flags --remove-speaker-space=true --power=$power $cur_alimdl \ - "sgmm-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1; - rm $dir/$x.*.aliacc || exit 1; - [ $x -gt $num_iters ] && rm $dir/$x.alimdl - fi - cur_alimdl=$dir/$[$x+1].alimdl - x=$[$x+1] - done - rm $dir/final.alimdl 2>/dev/null - ln -s $x.alimdl $dir/final.alimdl -fi - -utils/summarize_warnings.pl $dir/log - -echo Done diff --git a/egs/wsj/s5/steps/train_lda_mllt.sh b/egs/wsj/s5/steps/train_lda_mllt.sh index 8b5e19ec8d1..363df34a3cd 100755 --- a/egs/wsj/s5/steps/train_lda_mllt.sh +++ b/egs/wsj/s5/steps/train_lda_mllt.sh @@ -95,7 +95,7 @@ feats="$splicedfeats transform-feats $dir/0.mat ark:- ark:- |" if [ $stage -le -5 ]; then if [ -z "$use_lda_mat" ]; then - echo "Accumulating LDA statistics." + echo "$0: Accumulating LDA statistics." rm $dir/lda.*.acc 2>/dev/null $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \ ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ @@ -106,11 +106,11 @@ if [ $stage -le -5 ]; then 2>$dir/log/lda_est.log || exit 1; rm $dir/lda.*.acc else - echo "Using supplied LDA matrix $use_lda_mat" + echo "$0: Using supplied LDA matrix $use_lda_mat" cp $use_lda_mat $dir/0.mat || exit 1; [ ! -z "$mllt_iters" ] && \ - echo "Warning: using supplied LDA matrix $use_lda_mat but we will do MLLT," && \ - echo "which you might not want; to disable MLLT, specify --mllt-iters ''" && \ + echo "$0: Warning: using supplied LDA matrix $use_lda_mat but we will do MLLT," && \ + echo " which you might not want; to disable MLLT, specify --mllt-iters ''" && \ sleep 5 fi fi @@ -118,12 +118,12 @@ fi cur_lda_iter=0 if [ $stage -le -4 ] && $train_tree; then - echo "Accumulating tree stats" + echo "$0: Accumulating tree stats" $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \ acc-tree-stats $context_opts \ --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \ "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1; - [ `ls $dir/*.treeacc | wc -w` -ne "$nj" ] && echo "Wrong #tree-accs" && exit 1; + [ `ls $dir/*.treeacc | wc -w` -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1; $cmd $dir/log/sum_tree_acc.log \ sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1; rm $dir/*.treeacc @@ -131,7 +131,7 @@ fi if [ $stage -le -3 ] && $train_tree; then - echo "Getting questions for tree clustering." + echo "$0: Getting questions for tree clustering." # preparing questions, roots file... cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int \ $dir/questions.int 2> $dir/log/questions.log || exit 1; @@ -139,7 +139,7 @@ if [ $stage -le -3 ] && $train_tree; then compile-questions $context_opts $lang/topo $dir/questions.int \ $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1; - echo "Building the tree" + echo "$0: Building the tree" $cmd $dir/log/build_tree.log \ build-tree $context_opts --verbose=1 --max-leaves=$numleaves \ --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \ @@ -164,14 +164,14 @@ fi if [ $stage -le -1 ]; then # Convert the alignments. - echo "Converting alignments from $alidir to use current tree" + echo "$0: Converting alignments from $alidir to use current tree" $cmd JOB=1:$nj $dir/log/convert.JOB.log \ convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \ "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; fi if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then - echo "Compiling graphs of transcripts" + echo "$0: Compiling graphs of transcripts" $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/1.mdl $lang/L.fst \ "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \ @@ -192,7 +192,7 @@ while [ $x -lt $num_iters ]; do fi if echo $mllt_iters | grep -w $x >/dev/null; then if [ $stage -le $x ]; then - echo "Estimating MLLT" + echo "$0: Estimating MLLT" $cmd JOB=1:$nj $dir/log/macc.$x.JOB.log \ ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- \| \ @@ -233,6 +233,6 @@ utils/summarize_warnings.pl $dir/log steps/info/gmm_dir_info.pl $dir -echo "Done training system with LDA+MLLT features in $dir" +echo "$0: Done training system with LDA+MLLT features in $dir" exit 0 diff --git a/egs/wsj/s5/steps/train_mmi_sgmm.sh b/egs/wsj/s5/steps/train_mmi_sgmm.sh deleted file mode 100755 index cb0700e92fc..00000000000 --- a/egs/wsj/s5/steps/train_mmi_sgmm.sh +++ /dev/null @@ -1,156 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. - -# MMI training (or optionally boosted MMI, if you give the --boost option), -# for SGMMs. 4 iterations (by default) of Extended Baum-Welch update. -# -# Begin configuration section. -cmd=run.pl -num_iters=4 -boost=0.0 -cancel=true # if true, cancel num and den counts on each frame. -acwt=0.1 -stage=0 - -update_opts= -transform_dir= -# End configuration section - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# -ne 5 ]; then - echo "Usage: steps/train_mmi_sgmm.sh " - echo " e.g.: steps/train_mmi_sgmm.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi" - echo "Main options (for others, see top of script file)" - echo " --boost # (e.g. 0.1), for boosted MMI. (default 0)" - echo " --cancel (true|false) # cancel stats (true by default)" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --config # config containing options" - echo " --stage # stage to do partial re-run from." - echo " --transform-dir # directory to find fMLLR transforms." - exit 1; -fi - -data=$1 -lang=$2 -alidir=$3 -denlatdir=$4 -dir=$5 -mkdir -p $dir/log - -utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done -nj=`cat $alidir/num_jobs` || exit 1; -[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \ - echo "$alidir and $denlatdir have different num-jobs" && exit 1; - -sdata=$data/split$nj -splice_opts=`cat $alidir/splice_opts 2>/dev/null` -cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null` -mkdir -p $dir/log -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; -cp $alidir/splice_opts $dir 2>/dev/null -cp $alidir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option. -echo $nj > $dir/num_jobs - -cp $alidir/tree $dir -cp $alidir/final.mdl $dir/0.mdl -cp $alidir/final.alimdl $dir - -silphonelist=`cat $lang/phones/silence.csl` || exit 1; - -# Set up features - -if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" - -case $feat_type in - delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |" - cp $alidir/final.mat $dir - ;; - *) echo "Invalid feature type $feat_type" && exit 1; -esac - -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \ - && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -else - echo "$0: no fMLLR transforms." -fi - -if [ -f $alidir/vecs.1 ]; then - echo "$0: using speaker vectors from $alidir" - spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk" -else - echo "$0: no speaker vectors." - spkvecs_opt= -fi - -if [ -f $alidir/gselect.1.gz ]; then - echo "$0: using Gaussian-selection info from $alidir" - gselect_opt="--gselect=ark,s,cs:gunzip -c $alidir/gselect.JOB.gz|" -else - echo "$0: error: no Gaussian-selection info found" && exit 1; -fi - -lats="ark:gunzip -c $denlatdir/lat.JOB.gz|" -if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then - lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |" -fi - -x=0 -while [ $x -lt $num_iters ]; do - echo "Iteration $x of MMI training" - # Note: the num and den states are accumulated at the same time, so we - # can cancel them per frame. - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ - sgmm-rescore-lattice --speedup=true "$gselect_opt" $spkvecs_opt $dir/$x.mdl "$lats" "$feats" ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - sum-post --merge=$cancel --scale1=-1 \ - ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \ - sgmm-acc-stats2 "$gselect_opt" $spkvecs_opt $dir/$x.mdl "$feats" ark,s,cs:- \ - $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1; - - n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`; - [ "$n" -ne $[$nj*2] ] && \ - echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1; - $cmd $dir/log/den_acc_sum.$x.log \ - sgmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1; - rm $dir/den_acc.$x.*.acc - $cmd $dir/log/num_acc_sum.$x.log \ - sgmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1; - rm $dir/num_acc.$x.*.acc - - $cmd $dir/log/update.$x.log \ - sgmm-est-ebw $update_opts $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1; - fi - - # Some diagnostics: the objective function progress and auxiliary-function - # improvement. Note: this code is same as in train_mmi.sh - tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while() { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1); $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf - objf=`cat $dir/tmpf | awk '{print $1}'`; - nf=`cat $dir/tmpf | awk '{print $2}'`; - rm $dir/tmpf - impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'` - impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames. - echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log - x=$[$x+1] -done - -echo "MMI training finished" - -rm $dir/final.mdl 2>/dev/null -ln -s $x.mdl $dir/final.mdl - -exit 0; diff --git a/egs/wsj/s5/steps/train_sgmm.sh b/egs/wsj/s5/steps/train_sgmm.sh deleted file mode 100755 index 0d372be2d84..00000000000 --- a/egs/wsj/s5/steps/train_sgmm.sh +++ /dev/null @@ -1,280 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. - -# SGMM training, with speaker vectors. This script would normally be called on -# top of fMLLR features obtained from a conventional system, but it also works -# on top of any type of speaker-independent features (based on -# deltas+delta-deltas or LDA+MLLT). For more info on SGMMs, see the paper "The -# subspace Gaussian mixture model--A structured model for speech recognition". -# (Computer Speech and Language, 2011). - -# Begin configuration section. -nj=4 -cmd=run.pl -stage=-6 -context_opts= # e.g. set it to "--context-width=5 --central-position=2" for a -# quinphone system. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" -num_iters=25 # Total number of iterations -num_iters_alimdl=3 # Number of iterations for estimating alignment model. -max_iter_inc=15 # Last iter to increase #substates on. -realign_iters="5 10 15"; # Iters to realign on. -spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on. -increase_dim_iters="6 8"; # Iters on which to increase phn dim and/or spk dim; - # rarely necessary, and if it is, only the 1st will normally be necessary. -rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training. -phn_dim= # You can use this to set the phonetic subspace dim. [default: feat-dim+1] -spk_dim= # You can use this to set the speaker subspace dim. [default: feat-dim] -power=0.25 # Exponent for number of gaussians according to occurrence counts -beam=8 -retry_beam=40 -cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -if [ -f path.sh ]; then . ./path.sh; fi -. parse_options.sh || exit 1; - - -if [ $# != 7 ]; then - echo "Usage: steps/train_sgmm.sh " - echo " e.g.: steps/train_sgmm.sh 3500 10000 data/train_si84 data/lang \\" - echo " exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a" - echo "main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --silence-weight # weight for silence (e.g. 0.5 or 0.0)" - echo " --num-iters <#iters> # Number of iterations of E-M" - exit 1; -fi - - -num_leaves=$1 -totsubstates=$2 -data=$3 -lang=$4 -alidir=$5 -ubm=$6 -dir=$7 - -# Check some files. -for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done - - -# Set some variables. -oov=`cat $lang/oov.int` -silphonelist=`cat $lang/phones/silence.csl` -numsubstates=$num_leaves # Initial #-substates. -incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates -feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1; -[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric. -[ -z $phn_dim ] && phn_dim=$[$feat_dim+1] -[ -z $spk_dim ] && spk_dim=$feat_dim -nj=`cat $alidir/num_jobs` || exit 1; -ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1; - -mkdir -p $dir/log -echo $nj > $dir/num_jobs -sdata=$data/split$nj; -splice_opts=`cat $alidir/splice_opts 2>/dev/null` -cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null` -cp $alidir/splice_opts $dir 2>/dev/null -cp $alidir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option. - -utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; - -spkvecs_opt= # Empty option for now, until we estimate the speaker vectors. -gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" - -## Set up features. -if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" - -case $feat_type in - delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |" - cp $alidir/final.mat $dir - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac -if [ -f $alidir/trans.1 ]; then - echo "$0: using transforms from $alidir" - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |" -fi -## - - -if [ $stage -le -6 ]; then - echo "$0: accumulating tree stats" - $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \ - acc-tree-stats --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \ - "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1; - [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1; - sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1; - rm $dir/*.treeacc -fi - -if [ $stage -le -5 ]; then - echo "$0: Getting questions for tree clustering." - # preparing questions, roots file... - cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1; - cat $lang/phones/extra_questions.int >> $dir/questions.int - compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1; - - echo "$0: Building the tree" - $cmd $dir/log/build_tree.log \ - build-tree --verbose=1 --max-leaves=$num_leaves \ - --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \ - $dir/questions.qst $lang/topo $dir/tree || exit 1; -fi - -if [ $stage -le -4 ]; then - echo "$0: Initializing the model" - # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims - # will be truncated on initialization. - $cmd $dir/log/init_sgmm.log \ - sgmm-init --phn-space-dim=$phn_dim --spk-space-dim=$spk_dim $lang/topo \ - $dir/tree $ubm $dir/0.mdl || exit 1; -fi - -if [ $stage -le -3 ]; then - echo "$0: doing Gaussian selection" - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect $dir/0.mdl "$feats" \ - "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1; -fi - -if [ $stage -le -2 ]; then - echo "$0: compiling training graphs" - text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text|" - $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ - compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/0.mdl $lang/L.fst \ - "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; -fi - -if [ $stage -le -1 ]; then - echo "$0: Converting alignments" - $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \ - convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \ - "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; -fi - -x=0 -while [ $x -lt $num_iters ]; do - echo "$0: training pass $x ... " - if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then - echo "$0: re-aligning data" - $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ - sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \ - --utt2spk=ark:$sdata/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \ - $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \ - "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; - fi - if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \ - ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata/JOB/spk2utt \ - $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \ - ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1; - fi - spkvecs_opt[$n]="--spk-vecs=ark:$dir/vecs.JOB" - fi - if [ $x -eq 0 ]; then - flags=vwcSt # on the first iteration, don't update projections M or N - elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then - # Update N if we have speaker-vector space and x is odd, - # and we've already updated the speaker vectors... - flags=vNwcSt - else - # otherwise update M. - flags=vMwcSt - fi - - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ - sgmm-acc-stats $spkvecs_opt --utt2spk=ark:$sdata/JOB/utt2spk \ - --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \ - $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \ - $dir/$x.JOB.acc || exit 1; - fi - - # The next option is needed if the user specifies a phone or speaker sub-space - # dimension that's higher than the "normal" one. - increase_dim_opts= - if echo $increase_dim_iters | grep -w $x >/dev/null; then - increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim" - # Note: the command below might have a null effect on some iterations. - if [ $spk_dim -gt $feat_dim ]; then - cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \ - copy-vector --print-args=false --change-dim=$spk_dim \ - ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \ - mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1; - fi - fi - - if [ $stage -le $x ]; then - $cmd $dir/log/update.$x.log \ - sgmm-est --update-flags=$flags --split-substates=$numsubstates $increase_dim_opts \ - --power=$power --write-occs=$dir/$[$x+1].occs $dir/$x.mdl "sgmm-sum-accs - $dir/$x.*.acc|" \ - $dir/$[$x+1].mdl || exit 1; - rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null - fi - - if [ $x -lt $max_iter_inc ]; then - numsubstates=$[$numsubstates+$incsubstates] - fi - x=$[$x+1]; -done - -rm $dir/final.mdl $dir/final.occs 2>/dev/null -ln -s $x.mdl $dir/final.mdl -ln -s $x.occs $dir/final.occs - -if [ $spk_dim -gt 0 ]; then - # We need to create an "alignment model" that's been trained - # without the speaker vectors, to do the first-pass decoding with. - # in test time. - - # We do this for a few iters, in this recipe. - final_mdl=$dir/$x.mdl - cur_alimdl=$dir/$x.mdl - while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do - echo "$0: building alignment model (pass $x)" - if [ $x -eq $num_iters ]; then # 1st pass of building alimdl. - flags=MwcS # don't update v the first time. Note-- we never update transitions. - # they wouldn't change anyway as we use the same alignment as previously. - else - flags=vMwcS - fi - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \ - ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ - sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \ - --utt2spk=ark:$sdata/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \ - sgmm-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \ - $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1; - $cmd $dir/log/update_ali.$x.log \ - sgmm-est --update-flags=$flags --remove-speaker-space=true --power=$power $cur_alimdl \ - "sgmm-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1; - rm $dir/$x.*.aliacc || exit 1; - [ $x -gt $num_iters ] && rm $dir/$x.alimdl - fi - cur_alimdl=$dir/$[$x+1].alimdl - x=$[$x+1] - done - rm $dir/final.alimdl 2>/dev/null - ln -s $x.alimdl $dir/final.alimdl -fi - -utils/summarize_warnings.pl $dir/log - -echo Done diff --git a/egs/wsj/s5/utils/build_const_arpa_lm.sh b/egs/wsj/s5/utils/build_const_arpa_lm.sh index 375ffd79eb4..ec067df0d39 100755 --- a/egs/wsj/s5/utils/build_const_arpa_lm.sh +++ b/egs/wsj/s5/utils/build_const_arpa_lm.sh @@ -34,7 +34,7 @@ mkdir -p $new_lang cp -r $old_lang/* $new_lang unk=`cat $new_lang/oov.int` -bos=`grep "" $new_lang/words.txt | awk '{print $2}'` +bos=`grep -w "" $new_lang/words.txt | awk '{print $2}'` eos=`grep "" $new_lang/words.txt | awk '{print $2}'` if [[ -z $bos || -z $eos ]]; then echo "$0: and symbols are not in $new_lang/words.txt" diff --git a/egs/wsj/s5/utils/convert_slf_parallel.sh b/egs/wsj/s5/utils/convert_slf_parallel.sh index 4e4ce41d236..1b242ed2c38 100755 --- a/egs/wsj/s5/utils/convert_slf_parallel.sh +++ b/egs/wsj/s5/utils/convert_slf_parallel.sh @@ -7,7 +7,7 @@ # begin configuration section. cmd=run.pl dirname=lats-in-htk-slf -parallel_opts="-tc 50" # We should limit disk stress +parallel_opts="--max-jobs-run 50" # We should limit disk stress word_to_node=false # Words in arcs or nodes? [default:arcs] #end configuration section. @@ -21,7 +21,7 @@ if [ $# -ne 3 ]; then echo " Options:" echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." echo " --word-to-link (true|false) # put word symbols on links or nodes." - echo " --parallel-opts STR # parallelization options (def.: '-tc 50')." + echo " --parallel-opts STR # parallelization options (def.: '--max-jobs-run 50')." echo "e.g.:" echo "$0 data/dev data/lang exp/tri4a/decode_dev" exit 1; diff --git a/egs/wsj/s5/utils/copy_data_dir.sh b/egs/wsj/s5/utils/copy_data_dir.sh index 008233daf62..222bc708527 100755 --- a/egs/wsj/s5/utils/copy_data_dir.sh +++ b/egs/wsj/s5/utils/copy_data_dir.sh @@ -83,15 +83,16 @@ fi if [ -f $srcdir/segments ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments cp $srcdir/wav.scp $destdir - if [ -f $srcdir/reco2file_and_channel ]; then - cp $srcdir/reco2file_and_channel $destdir/ - fi else # no segments->wav indexed by utt. if [ -f $srcdir/wav.scp ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp fi fi +if [ -f $srcdir/reco2file_and_channel ]; then + cp $srcdir/reco2file_and_channel $destdir/ +fi + if [ -f $srcdir/text ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text fi diff --git a/egs/wsj/s5/utils/data/convert_data_dir_to_whole.sh b/egs/wsj/s5/utils/data/convert_data_dir_to_whole.sh new file mode 100755 index 00000000000..f55f60c4774 --- /dev/null +++ b/egs/wsj/s5/utils/data/convert_data_dir_to_whole.sh @@ -0,0 +1,108 @@ +#! /bin/bash + +# This scripts converts a data directory into a "whole" data directory +# by removing the segments and using the recordings themselves as +# utterances + +set -o pipefail + +. path.sh + +cmd=run.pl +stage=-1 + +. parse_options.sh + +if [ $# -ne 2 ]; then + echo "Usage: convert_data_dir_to_whole.sh " + echo " e.g.: convert_data_dir_to_whole.sh data/dev data/dev_whole" + exit 1 +fi + +data=$1 +dir=$2 + +if [ ! -f $data/segments ]; then + # Data directory already does not contain segments. So just copy it. + utils/copy_data_dir.sh $data $dir + exit 0 +fi + +mkdir -p $dir +cp $data/wav.scp $dir +cp $data/reco2file_and_channel $dir +rm -f $dir/{utt2spk,text} || true + +[ -f $data/stm ] && cp $data/stm $dir +[ -f $data/glm ] && cp $data/glm $dir + +text_files= +[ -f $data/text ] && text_files="$data/text $dir/text" + +# Combine utt2spk and text from the segments into utt2spk and text for the whole +# recording. +cat $data/segments | perl -e ' +if (scalar @ARGV == 4) { + ($utt2spk_in, $utt2spk_out, $text_in, $text_out) = @ARGV; +} elsif (scalar @ARGV == 2) { + ($utt2spk_in, $utt2spk_out) = @ARGV; +} else { + die "Unexpected number of arguments"; +} + +if (defined $text_in) { + open(TI, "<$text_in") || die "Error: fail to open $text_in\n"; + open(TO, ">$text_out") || die "Error: fail to open $text_out\n"; +} +open(UI, "<$utt2spk_in") || die "Error: fail to open $utt2spk_in\n"; +open(UO, ">$utt2spk_out") || die "Error: fail to open $utt2spk_out\n"; + +my %file2utt = (); +while () { + chomp; + my @col = split; + @col >= 4 or die "bad line $_\n"; + + if (! defined $file2utt{$col[1]}) { + $file2utt{$col[1]} = []; + } + push @{$file2utt{$col[1]}}, $col[0]; +} + +my %text = (); +my %utt2spk = (); + +while () { + chomp; + my @col = split; + $utt2spk{$col[0]} = $col[1]; +} + +if (defined $text_in) { + while () { + chomp; + my @col = split; + @col >= 1 or die "bad line $_\n"; + + my $utt = shift @col; + $text{$utt} = join(" ", @col); + } +} + +foreach $file (keys %file2utt) { + my @utts = @{$file2utt{$file}}; + #print STDERR $file . " " . join(" ", @utts) . "\n"; + print UO "$file $file\n"; + + if (defined $text_in) { + $text_line = ""; + print TO "$file $text_line\n"; + } +} +' $data/utt2spk $dir/utt2spk $text_files + +sort -u $dir/utt2spk > $dir/utt2spk.tmp +mv $dir/utt2spk.tmp $dir/utt2spk +utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt + +utils/fix_data_dir.sh $dir diff --git a/egs/wsj/s5/utils/data/data_lib.py b/egs/wsj/s5/utils/data/data_lib.py new file mode 100644 index 00000000000..5e58fcac3d5 --- /dev/null +++ b/egs/wsj/s5/utils/data/data_lib.py @@ -0,0 +1,57 @@ +import os + +import libs.common as common_lib + +def get_frame_shift(data_dir): + frame_shift = common_lib.run_kaldi_command("utils/data/get_frame_shift.sh {0}".format(data_dir))[0] + return float(frame_shift.strip()) + +def generate_utt2dur(data_dir): + common_lib.run_kaldi_command("utils/data/get_utt2dur.sh {0}".format(data_dir)) + +def get_utt2dur(data_dir): + GenerateUtt2Dur(data_dir) + utt2dur = {} + for line in open('{0}/utt2dur'.format(data_dir), 'r').readlines(): + parts = line.split() + utt2dur[parts[0]] = float(parts[1]) + return utt2dur + +def get_utt2uniq(data_dir): + utt2uniq_file = '{0}/utt2uniq'.format(data_dir) + if not os.path.exists(utt2uniq_file): + return None, None + utt2uniq = {} + uniq2utt = {} + for line in open(utt2uniq_file, 'r').readlines(): + parts = line.split() + utt2uniq[parts[0]] = parts[1] + if uniq2utt.has_key(parts[1]): + uniq2utt[parts[1]].append(parts[0]) + else: + uniq2utt[parts[1]] = [parts[0]] + return utt2uniq, uniq2utt + +def get_num_frames(data_dir, utts = None): + GenerateUtt2Dur(data_dir) + frame_shift = GetFrameShift(data_dir) + total_duration = 0 + utt2dur = GetUtt2Dur(data_dir) + if utts is None: + utts = utt2dur.keys() + for utt in utts: + total_duration = total_duration + utt2dur[utt] + return int(float(total_duration)/frame_shift) + +def create_data_links(file_names): + # if file_names already exist create_data_link.pl returns with code 1 + # so we just delete them before calling create_data_link.pl + for file_name in file_names: + TryToDelete(file_name) + common_lib.run_kaldi_command(" utils/create_data_link.pl {0}".format(" ".join(file_names))) + +def try_to_delete(file_name): + try: + os.remove(file_name) + except OSError: + pass diff --git a/egs/wsj/s5/utils/data/downsample_data_dir.sh b/egs/wsj/s5/utils/data/downsample_data_dir.sh new file mode 100755 index 00000000000..022af67d265 --- /dev/null +++ b/egs/wsj/s5/utils/data/downsample_data_dir.sh @@ -0,0 +1,34 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +freq=$1 +dir=$2 + +sox=`which sox` || { echo "Could not find sox in PATH"; exit 1; } + +if [ -f $dir/feats.scp ]; then + mkdir -p $dir/.backup + mv $dir/feats.scp $dir/.backup/ + if [ -f $dir/cmvn.scp ]; then + mv $dir/cmvn.scp $dir/.backup/ + fi + echo "$0: feats.scp already exists. Moving it to $dir/.backup" +fi + +mv $dir/wav.scp $dir/wav.scp.tmp +cat $dir/wav.scp.tmp | python -c "import sys +for line in sys.stdin.readlines(): + splits = line.strip().split() + if splits[-1] == '|': + out_line = line.strip() + ' $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |' + else: + out_line = 'cat {0} {1} | $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |'.format(splits[0], ' '.join(splits[1:])) + print (out_line)" > ${dir}/wav.scp +rm $dir/wav.scp.tmp diff --git a/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl b/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl new file mode 100755 index 00000000000..b0cece46ca8 --- /dev/null +++ b/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl @@ -0,0 +1,79 @@ +#!/usr/bin/env perl + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +use warnings; + +# This script modifies the feats ranges and ensures that they don't +# exceed the max number of frames supplied in utt2max_frames. +# utt2max_frames can be computed by using +# steps/segmentation/get_reco2num_frames.sh +# cut -d ' ' -f 1,2 /segments | utils/apply_map.pl -f 2 /reco2num_frames > /utt2max_frames + +(scalar @ARGV == 1) or die "Usage: fix_subsegmented_feats.pl "; + +my $utt2max_frames_file = $ARGV[0]; + +open MAX_FRAMES, $utt2max_frames_file or die "fix_subsegmented_feats.pl: Could not open file $utt2max_frames_file"; + +my %utt2max_frames; + +while () { + chomp; + my @F = split; + + (scalar @F == 2) or die "fix_subsegmented_feats.pl: Invalid line $_ in $utt2max_frames_file"; + + $utt2max_frames{$F[0]} = $F[1]; +} + +while () { + my $line = $_; + + if (m/\[([^][]*)\]\[([^][]*)\]\s*$/) { + print ("fix_subsegmented_feats.pl: this script only supports single indices"); + exit(1); + } + + my $before_range = ""; + my $range = ""; + + if (m/^(.*)\[([^][]*)\]\s*$/) { + $before_range = $1; + $range = $2; + } else { + print; + next; + } + + my @F = split(/ /, $before_range); + my $utt = shift @F; + defined $utt2max_frames{$utt} or die "fix_subsegmented_feats.pl: Could not find key $utt in $utt2max_frames_file.\nError with line $line"; + + if ($range !~ m/^(\d*):(\d*)([,]?.*)$/) { + print STDERR "fix_subsegmented_feats.pl: could not make sense of input line $_"; + exit(1); + } + + my $row_start = $1; + my $row_end = $2; + my $col_range = $3; + + if ($row_end >= $utt2max_frames{$utt}) { + print STDERR "Fixed row_end for $utt from $row_end to $utt2max_frames{$utt}-1\n"; + $row_end = $utt2max_frames{$utt} - 1; + } + + if ($row_start ne "") { + $range = "$row_start:$row_end"; + } else { + $range = ""; + } + + if ($col_range ne "") { + $range .= ",$col_range"; + } + + print ("$utt " . join(" ", @F) . "[" . $range . "]\n"); +} diff --git a/egs/wsj/s5/utils/data/get_dct_matrix.py b/egs/wsj/s5/utils/data/get_dct_matrix.py new file mode 100755 index 00000000000..88b28b5dd5c --- /dev/null +++ b/egs/wsj/s5/utils/data/get_dct_matrix.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +# we're using python 3.x style print but want it to work in python 2.x, +from __future__ import print_function +import os, argparse, sys, math, warnings + +import numpy as np + +def ComputeLifterCoeffs(Q, dim): + coeffs = np.zeros((dim)) + for i in range(0, dim): + coeffs[i] = 1.0 + 0.5 * Q * math.sin(math.pi * i / Q); + + return coeffs + +def ComputeIDctMatrix(K, N, cepstral_lifter=0): + matrix = np.zeros((K, N)) + # normalizer for X_0 + normalizer = math.sqrt(1.0 / N); + for j in range(0, N): + matrix[0, j] = normalizer; + # normalizer for other elements + normalizer = math.sqrt(2.0 / N); + for k in range(1, K): + for n in range(0, N): + matrix[k, n] = normalizer * math.cos(math.pi/N * (n + 0.5) * k); + + if cepstral_lifter != 0: + lifter_coeffs = ComputeLifterCoeffs(cepstral_lifter, K) + for k in range(0, K): + matrix[k, :] = matrix[k, :] / lifter_coeffs[k]; + + return matrix.T + +def ComputeDctMatrix(K, N, cepstral_lifter=0): + matrix = np.zeros((K, N)) + # normalizer for X_0 + normalizer = math.sqrt(1.0 / N); + for j in range(0, N): + matrix[0, j] = normalizer; + # normalizer for other elements + normalizer = math.sqrt(2.0 / N); + for k in range(1, K): + for n in range(0, N): + matrix[k, n] = normalizer * math.cos(math.pi/N * (n + 0.5) * k); + + if cepstral_lifter != 0: + lifter_coeffs = ComputeLifterCoeffs(cepstral_lifter, K) + for k in range(0, K): + matrix[k, :] = matrix[k, :] * lifter_coeffs[k]; + + return matrix + +def GetArgs(): + parser = argparse.ArgumentParser(description="Write DCT/IDCT matrix") + parser.add_argument("--cepstral-lifter", type=float, + help="Here we need the scaling factor on cepstra in the production of MFCC" + "to cancel out the effect of lifter, e.g. 22.0", default=22.0) + parser.add_argument("--num-ceps", type=int, + default=13, + help="Number of cepstral dimensions") + parser.add_argument("--num-filters", type=int, + default=23, + help="Number of mel filters") + parser.add_argument("--get-idct-matrix", type=str, default="false", + choices=["true","false"], + help="Get IDCT matrix instead of DCT matrix") + parser.add_argument("--add-zero-column", type=str, default="true", + choices=["true","false"], + help="Add a column to convert the matrix from a linear transform to affine transform") + parser.add_argument("out_file", type=str, + help="Output file") + + args = parser.parse_args() + + return args + +def CheckArgs(args): + if args.num_ceps > args.num_filters: + raise Exception("num-ceps must not be larger than num-filters") + + args.out_file_handle = open(args.out_file, 'w') + + return args + +def Main(): + args = GetArgs() + args = CheckArgs(args) + + if args.get_idct_matrix == "false": + matrix = ComputeDctMatrix(args.num_ceps, args.num_filters, + args.cepstral_lifter) + if args.add_zero_column == "true": + matrix = np.append(matrix, np.zeros((args.num_ceps,1)), 1) + else: + matrix = ComputeIDctMatrix(args.num_ceps, args.num_filters, + args.cepstral_lifter) + + if args.add_zero_column == "true": + matrix = np.append(matrix, np.zeros((args.num_filters,1)), 1) + + print('[ ', file=args.out_file_handle) + np.savetxt(args.out_file_handle, matrix, fmt='%.6e') + print(' ]', file=args.out_file_handle) + +if __name__ == "__main__": + Main() + diff --git a/egs/wsj/s5/utils/data/get_frame_shift.sh b/egs/wsj/s5/utils/data/get_frame_shift.sh index d032c9c17fa..f5a3bac9009 100755 --- a/egs/wsj/s5/utils/data/get_frame_shift.sh +++ b/egs/wsj/s5/utils/data/get_frame_shift.sh @@ -38,23 +38,27 @@ if [ ! -s $dir/utt2dur ]; then utils/data/get_utt2dur.sh $dir 1>&2 fi -if [ ! -f $dir/feats.scp ]; then - echo "$0: $dir/feats.scp does not exist" 1>&2 - exit 1 -fi +if [ ! -f $dir/frame_shift ]; then + if [ ! -f $dir/feats.scp ]; then + echo "$0: $dir/feats.scp does not exist" 1>&2 + exit 1 + fi -temp=$(mktemp /tmp/tmp.XXXX) + temp=$(mktemp /tmp/tmp.XXXX) -feat-to-len "scp:head -n 10 $dir/feats.scp|" ark,t:- > $temp + feat-to-len "scp:head -n 10 $dir/feats.scp|" ark,t:- > $temp -if [ -z $temp ]; then - echo "$0: error running feat-to-len" 1>&2 - exit 1 -fi + if [ -z $temp ]; then + echo "$0: error running feat-to-len" 1>&2 + exit 1 + fi -head -n 10 $dir/utt2dur | paste - $temp | \ - awk '{ dur += $2; frames += $4; } END { shift = dur / frames; if (shift > 0.01 && shift < 0.0102) shift = 0.01; print shift; }' || exit 1; + frame_shift=$(head -n 10 $dir/utt2dur | paste - $temp | awk '{ dur += $2; frames += $4; } END { shift = dur / frames; if (shift > 0.01 && shift < 0.0102) shift = 0.01; print shift; }') || exit 1; + + echo $frame_shift > $dir/frame_shift + rm $temp +fi -rm $temp +cat $dir/frame_shift exit 0 diff --git a/egs/wsj/s5/utils/data/get_reco2dur.sh b/egs/wsj/s5/utils/data/get_reco2dur.sh new file mode 100755 index 00000000000..5e925fc3e75 --- /dev/null +++ b/egs/wsj/s5/utils/data/get_reco2dur.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +# Copyright 2016 Johns Hopkins University (author: Daniel Povey) +# Apache 2.0 + +# This script operates on a data directory, such as in data/train/, and adds the +# reco2dur file if it does not already exist. The file 'reco2dur' maps from +# utterance to the duration of the utterance in seconds. This script works it +# out from the 'segments' file, or, if not present, from the wav.scp file (it +# first tries interrogating the headers, and if this fails, it reads the wave +# files in entirely.) + +frame_shift=0.01 +cmd=run.pl +nj=4 + +. utils/parse_options.sh +. ./path.sh + +if [ $# != 1 ]; then + echo "Usage: $0 [options] " + echo "e.g.:" + echo " $0 data/train" + echo " Options:" + echo " --frame-shift # frame shift in seconds. Only relevant when we are" + echo " # getting duration from feats.scp (default: 0.01). " + exit 1 +fi + +export LC_ALL=C + +data=$1 + +if [ -s $data/reco2dur ] && \ + [ $(cat $data/wav.scp | wc -l) -eq $(cat $data/reco2dur | wc -l) ]; then + echo "$0: $data/reco2dur already exists with the expected length. We won't recompute it." + exit 0; +fi + +# if the wav.scp contains only lines of the form +# utt1 /foo/bar/sph2pipe -f wav /baz/foo.sph | +if cat $data/wav.scp | perl -e ' + while (<>) { s/\|\s*$/ |/; # make sure final | is preceded by space. + @A = split; if (!($#A == 5 && $A[1] =~ m/sph2pipe$/ && + $A[2] eq "-f" && $A[3] eq "wav" && $A[5] eq "|")) { exit(1); } + $utt = $A[0]; $sphere_file = $A[4]; + + if (!open(F, "<$sphere_file")) { die "Error opening sphere file $sphere_file"; } + $sample_rate = -1; $sample_count = -1; + for ($n = 0; $n <= 30; $n++) { + $line = ; + if ($line =~ m/sample_rate -i (\d+)/) { $sample_rate = $1; } + if ($line =~ m/sample_count -i (\d+)/) { $sample_count = $1; } + if ($line =~ m/end_head/) { break; } + } + close(F); + if ($sample_rate == -1 || $sample_count == -1) { + die "could not parse sphere header from $sphere_file"; + } + $duration = $sample_count * 1.0 / $sample_rate; + print "$utt $duration\n"; + } ' > $data/reco2dur; then + echo "$0: successfully obtained utterance lengths from sphere-file headers" +else + echo "$0: could not get utterance lengths from sphere-file headers, using wav-to-duration" + if ! command -v wav-to-duration >/dev/null; then + echo "$0: wav-to-duration is not on your path" + exit 1; + fi + + read_entire_file=false + if cat $data/wav.scp | grep -q 'sox.*speed'; then + read_entire_file=true + echo "$0: reading from the entire wav file to fix the problem caused by sox commands with speed perturbation. It is going to be slow." + echo "... It is much faster if you call get_reco2dur.sh *before* doing the speed perturbation via e.g. perturb_data_dir_speed.sh or " + echo "... perturb_data_dir_speed_3way.sh." + fi + + utils/split_data.sh $data $nj + if ! $cmd JOB=1:$nj $data/log/get_wav_duration.JOB.log wav-to-duration --read-entire-file=$read_entire_file scp:$data/split$nj/JOB/wav.scp ark,t:$data/split$nj/JOB/reco2dur 2>&1; then + echo "$0: there was a problem getting the durations; moving $data/reco2dur to $data/.backup/" + mkdir -p $data/.backup/ + mv $data/reco2dur $data/.backup/ + exit 1 + fi + + for n in `seq $nj`; do + cat $data/split$nj/$n/reco2dur + done > $data/reco2dur +fi + +echo "$0: computed $data/reco2dur" + +exit 0 + diff --git a/egs/wsj/s5/utils/data/get_reco2num_frames.sh b/egs/wsj/s5/utils/data/get_reco2num_frames.sh new file mode 100755 index 00000000000..edb16609703 --- /dev/null +++ b/egs/wsj/s5/utils/data/get_reco2num_frames.sh @@ -0,0 +1,28 @@ +#! /bin/bash + +cmd=run.pl +nj=4 + +frame_shift=0.01 +frame_overlap=0.015 + +. utils/parse_options.sh + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +data=$1 + +if [ -s $data/reco2num_frames ]; then + echo "$0: $data/reco2num_frames already present!" + exit 0; +fi + +utils/data/get_reco2dur.sh --cmd "$cmd" --nj $nj $data +awk -v fs=$frame_shift -v fovlp=$frame_overlap \ + '{print $1" "int( ($2 - fovlp) / fs)}' $data/reco2dur > $data/reco2num_frames + +echo "$0: Computed and wrote $data/reco2num_frames" + diff --git a/egs/wsj/s5/utils/data/get_reco2utt.sh b/egs/wsj/s5/utils/data/get_reco2utt.sh new file mode 100755 index 00000000000..6c30f812cfe --- /dev/null +++ b/egs/wsj/s5/utils/data/get_reco2utt.sh @@ -0,0 +1,21 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +if [ $# -ne 1 ]; then + echo "This script creates a reco2utt file in the data directory, " + echo "which is analogous to spk2utt file but with the first column " + echo "as recording instead of speaker." + echo "Usage: get_reco2utt.sh " + echo " e.g.: get_reco2utt.sh data/train" + exit 1 +fi + +data=$1 + +if [ ! -s $data/segments ]; then + utils/data/get_segments_for_data.sh $data > $data/segments +fi + +cut -d ' ' -f 1,2 $data/segments | utils/utt2spk_to_spk2utt.pl > $data/reco2utt diff --git a/egs/wsj/s5/utils/data/get_segments_for_data.sh b/egs/wsj/s5/utils/data/get_segments_for_data.sh index 694acc6a256..7adc4c465d3 100755 --- a/egs/wsj/s5/utils/data/get_segments_for_data.sh +++ b/egs/wsj/s5/utils/data/get_segments_for_data.sh @@ -19,7 +19,7 @@ fi data=$1 -if [ ! -f $data/utt2dur ]; then +if [ ! -s $data/utt2dur ]; then utils/data/get_utt2dur.sh $data 1>&2 || exit 1; fi diff --git a/egs/wsj/s5/utils/data/get_subsegmented_feats.sh b/egs/wsj/s5/utils/data/get_subsegmented_feats.sh new file mode 100755 index 00000000000..6baba68eedd --- /dev/null +++ b/egs/wsj/s5/utils/data/get_subsegmented_feats.sh @@ -0,0 +1,46 @@ +#! /bin/bash + +# Copyright 2016 Johns Hopkins University (Author: Dan Povey) +# 2016 Vimal Manohar +# Apache 2.0. + +if [ $# -ne 4 ]; then + echo "This scripts gets subsegmented_feats (by adding ranges to data/feats.scp) " + echo "for the subsegments file. This is does one part of the " + echo "functionality in subsegment_data_dir.sh, which additionally " + echo "creates a new subsegmented data directory." + echo "Usage: $0 " + echo " e.g.: $0 data/train/feats.scp 0.01 0.015 subsegments" + exit 1 +fi + +feats=$1 +frame_shift=$2 +frame_overlap=$3 +subsegments=$4 + +# The subsegments format is . +# e.g. 'utt_foo-1 utt_foo 7.21 8.93' +# The first awk command replaces this with the format: +# +# e.g. 'utt_foo-1 utt_foo 721 893' +# and the apply_map.pl command replaces 'utt_foo' (the 2nd field) with its corresponding entry +# from the original wav.scp, so we get a line like: +# e.g. 'utt_foo-1 foo-bar.ark:514231 721 892' +# Note: the reason we subtract one from the last time is that it's going to +# represent the 'last' frame, not the 'end' frame [i.e. not one past the last], +# in the matlab-like, but zero-indexed [first:last] notion. For instance, a segment with 1 frame +# would have start-time 0.00 and end-time 0.01, which would become the frame range +# [0:0] +# The second awk command turns this into something like +# utt_foo-1 foo-bar.ark:514231[721:892] +# It has to be a bit careful because the format actually allows for more general things +# like pipes that might contain spaces, so it has to be able to produce output like the +# following: +# utt_foo-1 some command|[721:892] +# Lastly, utils/data/normalize_data_range.pl will only do something nontrivial if +# the original data-dir already had data-ranges in square brackets. +awk -v s=$frame_shift -v fovlp=$frame_overlap '{print $1, $2, int(($3/s)+0.5), int(($4-fovlp)/s+0.5);}' <$subsegments| \ + utils/apply_map.pl -f 2 $feats | \ + awk '{p=NF-1; for (n=1;n $data/utt2dur elif [ -f $data/wav.scp ]; then diff --git a/egs/wsj/s5/utils/data/get_utt2num_frames.sh b/egs/wsj/s5/utils/data/get_utt2num_frames.sh new file mode 100755 index 00000000000..ec80e771c83 --- /dev/null +++ b/egs/wsj/s5/utils/data/get_utt2num_frames.sh @@ -0,0 +1,42 @@ +#! /bin/bash + +cmd=run.pl +nj=4 + +frame_shift=0.01 +frame_overlap=0.015 + +. utils/parse_options.sh + +if [ $# -ne 1 ]; then + echo "This script writes a file utt2num_frames with the " + echo "number of frames in each utterance as measured based on the " + echo "duration of the utterances (in utt2dur) and the specified " + echo "frame_shift and frame_overlap." + echo "Usage: $0 " + exit 1 +fi + +data=$1 + +if [ -s $data/utt2num_frames ]; then + echo "$0: $data/utt2num_frames already present!" + exit 0; +fi + +if [ ! -f $data/feats.scp ]; then + utils/data/get_utt2dur.sh $data + awk -v fs=$frame_shift -v fovlp=$frame_overlap \ + '{print $1" "int( ($2 - fovlp) / fs)}' $data/utt2dur > $data/utt2num_frames + exit 0 +fi + +utils/split_data.sh --per-utt $data $nj || exit 1 +$cmd JOB=1:$nj $data/log/get_utt2num_frames.JOB.log \ + feat-to-len scp:$data/split${nj}utt/JOB/feats.scp ark,t:$data/split${nj}utt/JOB/utt2num_frames || exit 1 + +for n in `seq $nj`; do + cat $data/split${nj}utt/$n/utt2num_frames +done > $data/utt2num_frames + +echo "$0: Computed and wrote $data/utt2num_frames" diff --git a/egs/wsj/s5/utils/data/limit_feature_dim.sh b/egs/wsj/s5/utils/data/limit_feature_dim.sh new file mode 100755 index 00000000000..2d969ee569b --- /dev/null +++ b/egs/wsj/s5/utils/data/limit_feature_dim.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright 2016 Alibaba Robotics Corp. (author: Xingyu Na) +# Apache 2.0 + +# The script creates a new data directory by selecting a specified +# dimension range of the features in the source directory. + +. utils/parse_options.sh + +if [ $# != 3 ]; then + echo "Usage: " + echo " $0 " + echo "The script creates a new data directory by selecting a specified" + echo "dimension range of the features in the source directory." + echo "e.g.:" + echo " $0 0:39 data/train_hires_pitch data/train_hires" + exit 1; +fi + +feat_dim_range=$1 +srcdir=$2 +destdir=$3 + +if [ "$destdir" == "$srcdir" ]; then + echo "$0: this script requires and to be different." + exit 1 +fi + +if [ ! -f $srcdir/feats.scp ]; then + echo "$0: no such file $srcdir/feats.scp" + exit 1; +fi + +mkdir -p $destdir +utils/copy_data_dir.sh $srcdir $destdir + +if [ -f $destdir/cmvn.scp ]; then + rm $destdir/cmvn.scp + echo "$0: warning: removing $destdir/cmvn.cp, you will have to regenerate it from the features." +fi + +rm $destdir/feats.scp +sed 's/$/\[:,'${feat_dim_range}'\]/' $srcdir/feats.scp | \ + utils/data/normalize_data_range.pl > $destdir/feats.scp + +[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text" +utils/validate_data_dir.sh $validate_opts $destdir diff --git a/egs/wsj/s5/utils/data/modify_speaker_info.sh b/egs/wsj/s5/utils/data/modify_speaker_info.sh index f75e9be5f67..e42f0df551d 100755 --- a/egs/wsj/s5/utils/data/modify_speaker_info.sh +++ b/egs/wsj/s5/utils/data/modify_speaker_info.sh @@ -37,6 +37,7 @@ utts_per_spk_max=-1 seconds_per_spk_max=-1 respect_speaker_info=true +respect_recording_info=true # end configuration section . utils/parse_options.sh @@ -93,10 +94,26 @@ else utt2dur_opt= fi -utils/data/internal/modify_speaker_info.py \ - $utt2dur_opt --respect-speaker-info=$respect_speaker_info \ - --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \ - <$srcdir/utt2spk >$destdir/utt2spk +if ! $respect_speaker_info && $respect_recording_info; then + if [ -f $srcdir/segments ]; then + cat $srcdir/segments | awk '{print $1" "$2}' | \ + utils/data/internal/modify_speaker_info.py \ + $utt2dur_opt --respect-speaker-info=true \ + --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \ + >$destdir/utt2spk + else + cat $srcdir/wav.scp | awk '{print $1" "$2}' | \ + utils/data/internal/modify_speaker_info.py \ + $utt2dur_opt --respect-speaker-info=true \ + --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \ + >$destdir/utt2spk + fi +else + utils/data/internal/modify_speaker_info.py \ + $utt2dur_opt --respect-speaker-info=$respect_speaker_info \ + --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \ + <$srcdir/utt2spk >$destdir/utt2spk +fi utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt diff --git a/egs/wsj/s5/utils/data/normalize_data_range.pl b/egs/wsj/s5/utils/data/normalize_data_range.pl index f7936d98a31..61ccfd593f7 100755 --- a/egs/wsj/s5/utils/data/normalize_data_range.pl +++ b/egs/wsj/s5/utils/data/normalize_data_range.pl @@ -45,14 +45,13 @@ sub combine_ranges { # though they are supported at the C++ level. if ($start1 eq "" || $start2 eq "" || $end1 eq "" || $end2 == "") { chop $line; - print("normalize_data_range.pl: could not make sense of line $line\n"); + print STDERR ("normalize_data_range.pl: could not make sense of line $line\n"); exit(1) } if ($start1 + $end2 > $end1) { chop $line; - print("normalize_data_range.pl: could not make sense of line $line " . - "[second $row_or_column range too large vs first range, $start1 + $end2 > $end1]\n"); - exit(1); + print STDERR ("normalize_data_range.pl: could not make sense of line $line " . + "[second $row_or_column range too large vs first range, $start1 + $end2 > $end1]; adjusting end.\n"); } return ($start2+$start1, $end2+$start1); } @@ -72,11 +71,11 @@ sub combine_ranges { # sometimes in scp files, we use the command concat-feats to splice together # two feature matrices. Handling this correctly is complicated and we don't # anticipate needing it, so we just refuse to process this type of data. - print "normalize_data_range.pl: this script cannot [yet] normalize the data ranges " . + print STDERR "normalize_data_range.pl: this script cannot [yet] normalize the data ranges " . "if concat-feats was in the input data\n"; exit(1); } - print STDERR "matched: $before_range $first_range $second_range\n"; + # print STDERR "matched: $before_range $first_range $second_range\n"; if ($first_range !~ m/^((\d*):(\d*)|)(,(\d*):(\d*)|)$/) { print STDERR "normalize_data_range.pl: could not make sense of input line $_"; exit(1); diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh b/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh index c575166534e..4b12a94eee9 100755 --- a/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh +++ b/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh @@ -43,5 +43,9 @@ utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_spe rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir" -utils/validate_data_dir.sh --no-feats $destdir +if [ -f $srcdir/text ]; then + utils/validate_data_dir.sh --no-feats $destdir +else + utils/validate_data_dir.sh --no-feats --no-text $destdir +fi diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_speed_random.sh b/egs/wsj/s5/utils/data/perturb_data_dir_speed_random.sh new file mode 100755 index 00000000000..1eb7ebb874c --- /dev/null +++ b/egs/wsj/s5/utils/data/perturb_data_dir_speed_random.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar + +# Apache 2.0 + +speeds="0.9 1.0 1.1" + +. utils/parse_options.sh + +if [ $# != 2 ]; then + echo "Usage: perturb_data_dir_speed_random.sh " + echo "Applies 3-way speed perturbation using factors of 0.9, 1.0 and 1.1 on random subsets." + echo "e.g.:" + echo " $0 data/train data/train_spr" + echo "Note: if /feats.scp already exists, this will refuse to run." + exit 1 +fi + +srcdir=$1 +destdir=$2 + +if [ ! -f $srcdir/wav.scp ]; then + echo "$0: expected $srcdir/wav.scp to exist" + exit 1 +fi + +if [ -f $destdir/feats.scp ]; then + echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)" + exit 1 +fi + +echo "$0: making sure the utt2dur file is present in ${srcdir}, because " +echo "... obtaining it after speed-perturbing would be very slow, and" +echo "... you might need it." +utils/data/get_utt2dur.sh ${srcdir} + +num_speeds=`echo $speeds | awk '{print NF}'` +utils/split_data.sh --per-reco $srcdir $num_speeds + +speed_dirs= +i=1 +for speed in $speeds; do + if [ $speed != 1.0 ]; then + utils/data/perturb_data_dir_speed.sh $speed ${srcdir}/split${num_speeds}reco/$i ${destdir}_speed$speed || exit 1 + speed_dirs="${speed_dirs} ${destdir}_speed$speed" + else + speed_dirs="$speed_dirs ${srcdir}/split${num_speeds}reco/$i" + fi +done + +utils/data/combine_data.sh $destdir ${speed_dirs} || exit 1 + +rm -r $speed_dirs ${srcdir}/split${num_speeds}reco + +echo "$0: generated $num_speeds-way speed-perturbed version of random subsets of data in $srcdir, in $destdir" +if [ -f $srcdir/text ]; then + utils/validate_data_dir.sh --no-feats $destdir +else + utils/validate_data_dir.sh --no-feats --no-text $destdir +fi + + diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh index bc76939643c..ee3c281bdbb 100755 --- a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh +++ b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh @@ -7,6 +7,11 @@ # the wav.scp to perturb the volume (typically useful for training data when # using systems that don't have cepstral mean normalization). +reco2vol= +force=false +scale_low=0.125 +scale_high=2 + . utils/parse_options.sh if [ $# != 1 ]; then @@ -25,29 +30,86 @@ if [ ! -f $data/wav.scp ]; then exit 1 fi -if grep -q "sox --vol" $data/wav.scp; then +volume_perturb_done=`head -n100 $data/wav.scp | python -c " +import sys, re +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename' + parts = line.strip().split() + if line.strip()[-1] == '|': + if re.search('sox --vol', ' '.join(parts[-11:])): + print 'true' + sys.exit(0) + elif re.search(':[0-9]+$', line.strip()) is not None: + continue + else: + if ' '.join(parts[1:3]) == 'sox --vol': + print 'true' + sys.exit(0) +print 'false' +"` || exit 1 + +if $volume_perturb_done; then echo "$0: It looks like the data was already volume perturbed. Not doing anything." exit 0 fi -cat $data/wav.scp | python -c " +if [ -z "$reco2vol" ]; then + cat $data/wav.scp | python -c " import sys, os, subprocess, re, random random.seed(0) -scale_low = 1.0/8 -scale_high = 2.0 +scale_low = $scale_low +scale_high = $scale_high +volume_writer = open('$data/reco2vol', 'w') for line in sys.stdin.readlines(): if len(line.strip()) == 0: continue # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename' + vol = random.uniform(scale_low, scale_high) + + parts = line.strip().split() if line.strip()[-1] == '|': - print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), vol) elif re.search(':[0-9]+$', line.strip()) is not None: - parts = line.split() - print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high)) + print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = vol) else: - parts = line.split() - print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high)) + print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = vol) + volume_writer.write('{id} {vol}\n'.format(id = parts[0], vol = vol)) " > $data/wav.scp_scaled || exit 1; +else + cat $data/wav.scp | python -c " +import sys, os, subprocess, re +volumes = {} +for line in open('$reco2vol'): + if len(line.strip()) == 0: + continue + parts = line.strip().split() + volumes[parts[0]] = float(parts[1]) + +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename' + + parts = line.strip().split() + id = parts[0] + + if id not in volumes: + raise Exception('Could not find volume for id {id}'.format(id = id)) + + vol = volumes[id] + + if line.strip()[-1] == '|': + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), vol) + elif re.search(':[0-9]+$', line.strip()) is not None: + print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = vol) + else: + print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = vol) +" > $data/wav.scp_scaled || exit 1; + + cp $reco2vol $data/reco2vol +fi len1=$(cat $data/wav.scp | wc -l) len2=$(cat $data/wav.scp_scaled | wc -l) diff --git a/egs/wsj/s5/utils/data/resample_data_dir.sh b/egs/wsj/s5/utils/data/resample_data_dir.sh new file mode 100755 index 00000000000..8781ee4c503 --- /dev/null +++ b/egs/wsj/s5/utils/data/resample_data_dir.sh @@ -0,0 +1,35 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +freq=$1 +dir=$2 + +sox=`which sox` || { echo "Could not find sox in PATH"; exit 1; } + +if [ -f $dir/feats.scp ]; then + mkdir -p $dir/.backup + mv $dir/feats.scp $dir/.backup/ + if [ -f $dir/cmvn.scp ]; then + mv $dir/cmvn.scp $dir/.backup/ + fi + echo "$0: feats.scp already exists. Moving it to $dir/.backup" +fi + +mv $dir/wav.scp $dir/wav.scp.tmp +cat $dir/wav.scp.tmp | python -c "import sys +for line in sys.stdin.readlines(): + splits = line.strip().split() + if splits[-1] == '|': + out_line = line.strip() + ' $sox -t wav - -c 1 -b 16 -t wav - rate $freq |' + else: + out_line = 'cat {0} {1} | $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'.format(splits[0], ' '.join(splits[1:])) + print (out_line)" > ${dir}/wav.scp +rm $dir/wav.scp.tmp + diff --git a/egs/wsj/s5/utils/data/shift_and_combine_feats.sh b/egs/wsj/s5/utils/data/shift_and_combine_feats.sh new file mode 100755 index 00000000000..217b7768078 --- /dev/null +++ b/egs/wsj/s5/utils/data/shift_and_combine_feats.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# Copyright 2017 Hossein Hadian + +# Apache 2.0 + +write_utt2orig= # if provided, this script will write + # a mapping of shifted utterance ids + # to the original ones into the file + # specified by this option + +echo "$0 $@" # Print the command line for logging +if [ -f path.sh ]; then . ./path.sh; fi +. utils/parse_options.sh + +if [ $# != 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 3 data/train data/train_fs3" + echo "For use in perturbing data for discriminative training and alignment of" + echo "frame-subsampled systems, this script uses utils/data/shift_feats.sh" + echo "and utils/data/combine_data.sh to shift the features" + echo " different ways and combine them." + echo "E.g. if is 3, this script will combine" + echo "the data frame-shifted by -1, 0 and 1 (c.f. shift-feats)." + exit 1 +fi + +frame_subsampling_factor=$1 +srcdir=$2 +destdir=$3 + +if [ ! -f $srcdir/feats.scp ]; then + echo "$0: expected $srcdir/feats.scp to exist" + exit 1 +fi + +if [ -f $destdir/feats.scp ]; then + echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)" + exit 1 +fi + +if [ ! -z $write_utt2orig ]; then + awk '{print $1 " " $1}' $srcdir/feats.scp >$write_utt2orig +fi + +tmp_shift_destdirs=() +for frame_shift in `seq $[-(frame_subsampling_factor/2)] $[-(frame_subsampling_factor/2) + frame_subsampling_factor - 1]`; do + if [ "$frame_shift" == 0 ]; then continue; fi + utils/data/shift_feats.sh $frame_shift $srcdir ${destdir}_fs$frame_shift || exit 1 + tmp_shift_destdirs+=("${destdir}_fs$frame_shift") + if [ ! -z $write_utt2orig ]; then + awk -v prefix="fs$frame_shift-" '{printf("%s%s %s\n", prefix, $1, $1);}' $srcdir/feats.scp >>$write_utt2orig + fi +done +utils/data/combine_data.sh $destdir $srcdir ${tmp_shift_destdirs[@]} || exit 1 +rm -r ${tmp_shift_destdirs[@]} + +utils/validate_data_dir.sh $destdir + +src_nf=`cat $srcdir/feats.scp | wc -l` +dest_nf=`cat $destdir/feats.scp | wc -l` +if [ $[src_nf*frame_subsampling_factor] -ne $dest_nf ]; then + echo "There was a problem. Expected number of feature lines in destination dir to be $[src_nf*frame_subsampling_factor];" + exit 1; +fi + +echo "$0: Successfully generated $frame_subsampling_factor-way shifted version of data in $srcdir, in $destdir" diff --git a/egs/wsj/s5/utils/data/shift_feats.sh b/egs/wsj/s5/utils/data/shift_feats.sh new file mode 100755 index 00000000000..2ae7b2435d3 --- /dev/null +++ b/egs/wsj/s5/utils/data/shift_feats.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# 2017 Hossein Hadian +# Apache 2.0 + +echo "$0 $@" # Print the command line for logging +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo " Usage: $0 " + echo "e.g.: $0 -1 data/train data/train_fs-1" + echo "The script creates a new data directory with the features modified" + echo "using the program shift-feats with the specified frame-shift." + echo "This program automatically adds the prefix 'fs-' to the" + echo "utterance and speaker names. See also utils/data/shift_and_combine_feats.sh" + exit 1 +fi + +frame_shift=$1 +srcdir=$2 +destdir=$3 + + +if [ "$destdir" == "$srcdir" ]; then + echo "$0: this script requires and to be different." + exit 1 +fi + +if [ ! -f $srcdir/feats.scp ]; then + echo "$0: no such file $srcdir/feats.scp" + exit 1; +fi + +utt_prefix="fs$frame_shift-" +spk_prefix="fs$frame_shift-" + +mkdir -p $destdir +utils/copy_data_dir.sh --utt-prefix $utt_prefix --spk-prefix $spk_prefix \ + $srcdir $destdir + +if grep --quiet "'" $srcdir/feats.scp; then + echo "$0: the input features already use single quotes. Can't proceed." + exit 1; +fi + +awk -v shift=$frame_shift 'NF == 2 {uttid=$1; feat=$2; qt="";} \ +NF > 2 {idx=index($0, " "); uttid=$1; feat=substr($0, idx + 1); qt="\x27";} \ +NF {print uttid " shift-feats --print-args=false --shift=" shift, qt feat qt " - |";}' \ + $destdir/feats.scp >$destdir/feats_shifted.scp +mv -f $destdir/feats_shifted.scp $destdir/feats.scp + +echo "$0: Done" + diff --git a/egs/wsj/s5/utils/data/subsegment_data_dir.sh b/egs/wsj/s5/utils/data/subsegment_data_dir.sh index 18a00c3df7d..4c664f16441 100755 --- a/egs/wsj/s5/utils/data/subsegment_data_dir.sh +++ b/egs/wsj/s5/utils/data/subsegment_data_dir.sh @@ -24,14 +24,15 @@ segment_end_padding=0.0 . utils/parse_options.sh -if [ $# != 4 ]; then +if [ $# != 4 ] && [ $# != 3 ]; then echo "Usage: " - echo " $0 [options] " + echo " $0 [options] [] " echo "This script sub-segments a data directory. is to" echo "have lines of the form " echo "and is of the form ... ." echo "This script appropriately combines the with the original" echo "segments file, if necessary, and if not, creates a segments file." + echo " is an optional argument." echo "e.g.:" echo " $0 data/train [options] exp/tri3b_resegment/segments exp/tri3b_resegment/text data/train_resegmented" echo " Options:" @@ -50,11 +51,23 @@ export LC_ALL=C srcdir=$1 subsegments=$2 -new_text=$3 -dir=$4 +add_subsegment_text=false +if [ $# -eq 4 ]; then + new_text=$3 + dir=$4 + add_subsegment_text=true -for f in "$subsegments" "$new_text" "$srcdir/utt2spk"; do + if [ ! -f "$new_text" ]; then + echo "$0: no such file $new_text" + exit 1 + fi + +else + dir=$3 +fi + +for f in "$subsegments" "$srcdir/utt2spk"; do if [ ! -f "$f" ]; then echo "$0: no such file $f" exit 1; @@ -65,9 +78,11 @@ if ! mkdir -p $dir; then echo "$0: failed to create directory $dir" fi -if ! cmp <(awk '{print $1}' <$subsegments) <(awk '{print $1}' <$new_text); then - echo "$0: expected the first fields of the files $subsegments and $new_text to be identical" - exit 1 +if $add_subsegment_text; then + if ! cmp <(awk '{print $1}' <$subsegments) <(awk '{print $1}' <$new_text); then + echo "$0: expected the first fields of the files $subsegments and $new_text to be identical" + exit 1 + fi fi # create the utt2spk in $dir @@ -86,8 +101,11 @@ awk '{print $1, $2}' < $subsegments > $dir/new2old_utt utils/apply_map.pl -f 2 $srcdir/utt2spk < $dir/new2old_utt >$dir/utt2spk # .. and the new spk2utt file. utils/utt2spk_to_spk2utt.pl <$dir/utt2spk >$dir/spk2utt -# the new text file is just what the user provides. -cp $new_text $dir/text + +if $add_subsegment_text; then + # the new text file is just what the user provides. + cp $new_text $dir/text +fi # copy the source wav.scp cp $srcdir/wav.scp $dir @@ -125,6 +143,10 @@ if [ -f $srcdir/feats.scp ]; then frame_shift=$(utils/data/get_frame_shift.sh $srcdir) echo "$0: note: frame shift is $frame_shift [affects feats.scp]" + utils/data/get_utt2num_frames.sh --cmd "run.pl" --nj 1 $srcdir + awk '{print $1" "$2}' $subsegments | \ + utils/apply_map.pl -f 2 $srcdir/utt2num_frames > \ + $dir/utt2max_frames # The subsegments format is . # e.g. 'utt_foo-1 utt_foo 7.21 8.93' @@ -147,10 +169,22 @@ if [ -f $srcdir/feats.scp ]; then # utt_foo-1 some command|[721:892] # Lastly, utils/data/normalize_data_range.pl will only do something nontrivial if # the original data-dir already had data-ranges in square brackets. - awk -v s=$frame_shift '{print $1, $2, int(($3/s)+0.5), int(($4/s)-0.5);}' <$subsegments| \ + cat $subsegments | awk -v s=$frame_shift '{print $1, $2, int(($3/s)+0.5), int(($4/s)-0.5);}' | \ utils/apply_map.pl -f 2 $srcdir/feats.scp | \ awk '{p=NF-1; for (n=1;n$dir/feats.scp + utils/data/normalize_data_range.pl | \ + utils/data/fix_subsegmented_feats.pl $dir/utt2max_frames >$dir/feats.scp + + cat $dir/feats.scp | perl -ne 'm/^(\S+) .+\[(\d+):(\d+)\]$/; print "$1 " . ($3-$2+1) . "\n"' > \ + $dir/utt2num_frames + + if [ -f $srcdir/vad.scp ]; then + cat $subsegments | awk -v s=$frame_shift '{print $1, $2, int(($3/s)+0.5), int(($4/s)-0.5);}' | \ + utils/apply_map.pl -f 2 $srcdir/vad.scp | \ + awk '{p=NF-1; for (n=1;n$dir/vad.scp + fi fi @@ -184,6 +218,7 @@ utils/data/fix_data_dir.sh $dir validate_opts= [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats" [ ! -f $srcdir/wav.scp ] && validate_opts="$validate_opts --no-wav" +! $add_subsegment_text && validate_opts="$validate_opts --no-text" utils/data/validate_data_dir.sh $validate_opts $dir diff --git a/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh b/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh index f9d2890ea39..6cb14068769 100755 --- a/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh +++ b/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh @@ -58,14 +58,14 @@ utils/validate_dict_dir.pl $srcdir; if [ -f $srcdir/lexicon.txt ]; then src_lex=$srcdir/lexicon.txt perl -ane 'print join(" ", split(" ", $_)) . "\n";' < $src_lex |\ - sort > $dir/lexicon.txt + sort -u > $dir/lexicon.txt elif [ -f $srcdir/lexiconp.txt ]; then echo "$0: removing the pron-probs from $srcdir/lexiconp.txt to create $dir/lexicon.txt" # the Perl command below normalizes the spaces (avoid double space). src_lex=$srcdir/lexiconp.txt awk '{$2 = ""; print $0;}' <$srcdir/lexiconp.txt |\ perl -ane 'print join(" ", split(" " ,$_)) . "\n";' |\ - sort > $dir/lexicon.txt || exit 1; + sort -u > $dir/lexicon.txt || exit 1; fi diff --git a/egs/wsj/s5/utils/filter_scps.pl b/egs/wsj/s5/utils/filter_scps.pl index 0d9e0fe4837..418f8f73e1b 100755 --- a/egs/wsj/s5/utils/filter_scps.pl +++ b/egs/wsj/s5/utils/filter_scps.pl @@ -165,6 +165,6 @@ print STDERR "filter_scps.pl: warning: some input lines did not get output\n"; } if ($warn_multiply_covered && $print_warnings) { - print STDERR "filter_scps.pl: warning: some input lines were output to multiple files\n"; + print STDERR "filter_scps.pl: warning: some input lines were output to multiple files [OK if splitting per utt] " . + join(" ", @ARGV) . "\n"; } - diff --git a/egs/wsj/s5/utils/fix_data_dir.sh b/egs/wsj/s5/utils/fix_data_dir.sh index 0333d628544..8ebfc8d49fe 100755 --- a/egs/wsj/s5/utils/fix_data_dir.sh +++ b/egs/wsj/s5/utils/fix_data_dir.sh @@ -6,6 +6,11 @@ # It puts the original contents of data-dir into # data-dir/.backup +utt_extra_files= +spk_extra_files= + +. utils/parse_options.sh + if [ $# != 1 ]; then echo "Usage: utils/data/fix_data_dir.sh " echo "e.g.: utils/data/fix_data_dir.sh data/train" @@ -22,12 +27,13 @@ mkdir -p $data/.backup [ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1; +set -e -o pipefail -u + tmpdir=$(mktemp -d /tmp/kaldi.XXXX); trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM export LC_ALL=C - function check_sorted { file=$1 sort -k1,1 -u <$file >$file.tmp @@ -54,8 +60,8 @@ function filter_file { cp $file_to_filter ${file_to_filter}.tmp utils/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter if ! cmp ${file_to_filter}.tmp $file_to_filter >&/dev/null; then - length1=`cat ${file_to_filter}.tmp | wc -l` - length2=`cat ${file_to_filter} | wc -l` + length1=$(cat ${file_to_filter}.tmp | wc -l) + length2=$(cat ${file_to_filter} | wc -l) if [ $length1 -ne $length2 ]; then echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter." fi @@ -77,7 +83,7 @@ function filter_recordings { exit 1; fi awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings - n1=`cat $tmpdir/recordings | wc -l` + n1=$(cat $tmpdir/recordings | wc -l) [ ! -s $tmpdir/recordings ] && \ echo "Empty list of recordings (bad file $data/segments)?" && exit 1; utils/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp @@ -91,7 +97,7 @@ function filter_recordings { filter_file $tmpdir/recordings $data/wav.scp [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel - + true fi } @@ -110,7 +116,7 @@ function filter_speakers { filter_file $tmpdir/speakers $data/spk2utt utils/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk - for s in cmvn.scp spk2gender; do + for s in cmvn.scp spk2gender $spk_extra_files; do f=$data/$s if [ -f $f ]; then filter_file $tmpdir/speakers $f @@ -158,7 +164,7 @@ function filter_utts { fi fi - for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2dur utt2num_frames $maybe_wav; do + for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2dur utt2num_frames $maybe_wav $utt_extra_files; do if [ -f $data/$x ]; then cp $data/$x $data/.backup/$x if ! cmp -s $data/$x <( utils/filter_scp.pl $tmpdir/utts $data/$x ) ; then diff --git a/egs/wsj/s5/utils/format_lm_sri.sh b/egs/wsj/s5/utils/format_lm_sri.sh index 1acacf7ae89..4ef31d925ca 100755 --- a/egs/wsj/s5/utils/format_lm_sri.sh +++ b/egs/wsj/s5/utils/format_lm_sri.sh @@ -61,20 +61,9 @@ done loc=`which change-lm-vocab` if [ -z $loc ]; then - if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... - sdir=`pwd`/../../../tools/srilm/bin/i686-m64 - else - sdir=`pwd`/../../../tools/srilm/bin/i686 - fi - if [ -f $sdir/../change-lm-vocab ]; then - echo Using SRILM tools from $sdir - export PATH=$PATH:$sdir:$sdir/.. - else - echo You appear to not have SRILM tools installed, either on your path, - echo or installed in $sdir. cd to ../../../tools and run - echo extras/install_srilm.sh. - exit 1 - fi + echo You appear to not have SRILM tools installed. + echo cd to $KALDI_ROOT/tools and run extras/install_srilm.sh. + exit 1 fi echo "Converting '$lm' to FST" diff --git a/egs/wsj/s5/utils/lang/make_unk_lm.sh b/egs/wsj/s5/utils/lang/make_unk_lm.sh index b46ab128b93..2564c53ad4d 100755 --- a/egs/wsj/s5/utils/lang/make_unk_lm.sh +++ b/egs/wsj/s5/utils/lang/make_unk_lm.sh @@ -141,6 +141,7 @@ awk -v dir=$dir -v ff=$first_phone_field \ { ok=1; for (n=ff; n<=NF; n++) { if ($n in sil) ok=0; } if (ok && NF>=ff) { for (n=ff;n<=NF;n++) printf("%s ",$n); print ""; } else { print("make_unk_lm.sh: info: not including dict line: ", $0) >"/dev/stderr" }}' <$src_dict >$dir/training.txt +cat $dir/training.txt | awk '{for(n=1;n<=NF;n++) seen[$n]=1; } END{for (k in seen) print k;}' > $dir/all_nonsil_phones num_dict_lines=$(wc -l <$src_dict) num_train_lines=$(wc -l < $dir/training.txt) @@ -180,7 +181,6 @@ if $use_pocolm; then cat $dir/training.txt | awk -v h=$heldout_ratio '{if(NR%h == 0) print; }' > $dir/pocolm/text/dev.txt cat $dir/training.txt | awk -v h=$heldout_ratio '{if(NR%h != 0) print; }' > $dir/pocolm/text/train.txt - cat $dir/training.txt | awk '{for(n=1;n<=NF;n++) seen[$n]=1; } END{for (k in seen) print k;}' > $dir/all_nonsil_phones # the following options are because we expect the amount of data to be small, # all the data subsampling isn't really needed and will increase the chance of diff --git a/egs/wsj/s5/utils/make_lexicon_fst.pl b/egs/wsj/s5/utils/make_lexicon_fst.pl index bcf0f4df13a..f97129c05cb 100755 --- a/egs/wsj/s5/utils/make_lexicon_fst.pl +++ b/egs/wsj/s5/utils/make_lexicon_fst.pl @@ -21,21 +21,24 @@ $pron_probs = 0; -if ($ARGV[0] eq "--pron-probs") { +if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) { $pron_probs = 1; shift @ARGV; } if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) { - print STDERR - "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt -Creates a lexicon FST that transduces phones to words, and may allow optional silence. -Note: ordinarily, each line of lexicon.txt is: word phone1 phone2 ... phoneN; if the --pron-probs option is -used, each line is: word pronunciation-probability phone1 phone2 ... phoneN. The probability 'prob' will -typically be between zero and one, and note that it's generally helpful to normalize so the largest one -for each word is 1.0, but this is your responsibility. The silence disambiguation symbol, e.g. something -like #5, is used only when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst, and was -introduced to fix a particular case of non-determinism of decoding graphs.\n"; + print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n"; + print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n"; + print STDERR "Note: ordinarily, each line of lexicon.txt is:\n"; + print STDERR " word phone1 phone2 ... phoneN;\n"; + print STDERR "if the --pron-probs option is used, each line is:\n"; + print STDERR " word pronunciation-probability phone1 phone2 ... phoneN.\n\n"; + print STDERR "The probability 'prob' will typically be between zero and one, and note that\n"; + print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n"; + print STDERR "this is your responsibility.\n\n"; + print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n"; + print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n"; + print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n"; exit(1); } diff --git a/egs/wsj/s5/utils/mkgraph.sh b/egs/wsj/s5/utils/mkgraph.sh index c62f0ccb14f..65ff3c3c79d 100755 --- a/egs/wsj/s5/utils/mkgraph.sh +++ b/egs/wsj/s5/utils/mkgraph.sh @@ -21,7 +21,7 @@ loopscale=0.1 remove_oov=false for x in `seq 4`; do - [ "$1" == "--mono" -o "$1" == "left-biphone" -o "$1" == "--quinphone" ] && shift && \ + [ "$1" == "--mono" -o "$1" == "--left-biphone" -o "$1" == "--quinphone" ] && shift && \ echo "WARNING: the --mono, --left-biphone and --quinphone options are now deprecated and ignored." [ "$1" == "--remove-oov" ] && remove_oov=true && shift; [ "$1" == "--transition-scale" ] && tscale=$2 && shift 2; @@ -75,39 +75,49 @@ fi N=$(tree-info $tree | grep "context-width" | cut -d' ' -f2) || { echo "Error when getting context-width"; exit 1; } P=$(tree-info $tree | grep "central-position" | cut -d' ' -f2) || { echo "Error when getting central-position"; exit 1; } -[[ -f $2/frame_subsampling_factor && $loopscale != 1.0 ]] && \ +[[ -f $2/frame_subsampling_factor && "$loopscale" == "0.1" ]] && \ echo "$0: WARNING: chain models need '--self-loop-scale 1.0'"; mkdir -p $lang/tmp +trap "rm -f $lang/tmp/LG.fst.$$" EXIT HUP INT PIPE TERM # Note: [[ ]] is like [ ] but enables certain extra constructs, e.g. || in # place of -o if [[ ! -s $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \ $lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \ fstminimizeencoded | fstpushspecial | \ - fstarcsort --sort_type=ilabel > $lang/tmp/LG.fst || exit 1; + fstarcsort --sort_type=ilabel > $lang/tmp/LG.fst.$$ || exit 1; + mv $lang/tmp/LG.fst.$$ $lang/tmp/LG.fst fstisstochastic $lang/tmp/LG.fst || echo "[info]: LG not stochastic." fi - clg=$lang/tmp/CLG_${N}_${P}.fst - -if [[ ! -s $clg || $clg -ot $lang/tmp/LG.fst ]]; then +clg_tmp=$clg.$$ +ilabels=$lang/tmp/ilabels_${N}_${P} +ilabels_tmp=$ilabels.$$ +trap "rm -f $clg_tmp $ilabels_tmp" EXIT HUP INT PIPE TERM +if [[ ! -s $clg || $clg -ot $lang/tmp/LG.fst \ + || ! -s $ilabels || $ilabels -ot $lang/tmp/LG.fst ]]; then fstcomposecontext --context-size=$N --central-position=$P \ --read-disambig-syms=$lang/phones/disambig.int \ --write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.int \ - $lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst |\ - fstarcsort --sort_type=ilabel > $clg - fstisstochastic $clg || echo "[info]: CLG not stochastic." + $ilabels_tmp < $lang/tmp/LG.fst |\ + fstarcsort --sort_type=ilabel > $clg_tmp + mv $clg_tmp $clg + mv $ilabels_tmp $ilabels + fstisstochastic $clg || echo "[info]: CLG not stochastic." fi +trap "rm -f $dir/Ha.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model \ || $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then make-h-transducer --disambig-syms-out=$dir/disambig_tid.int \ --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \ - > $dir/Ha.fst || exit 1; + > $dir/Ha.fst.$$ || exit 1; + mv $dir/Ha.fst.$$ $dir/Ha.fst fi +trap "rm -f $dir/HCLGa.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \ $dir/HCLGa.fst -ot $clg ]]; then if $remove_oov; then @@ -117,14 +127,16 @@ if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \ fi fsttablecompose $dir/Ha.fst "$clg" | fstdeterminizestar --use-log=true \ | fstrmsymbols $dir/disambig_tid.int | fstrmepslocal | \ - fstminimizeencoded > $dir/HCLGa.fst || exit 1; + fstminimizeencoded > $dir/HCLGa.fst.$$ || exit 1; + mv $dir/HCLGa.fst.$$ $dir/HCLGa.fst fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic" fi +trap "rm -f $dir/HCLG.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then add-self-loops --self-loop-scale=$loopscale --reorder=true \ - $model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1; - + $model < $dir/HCLGa.fst > $dir/HCLG.fst.$$ || exit 1; + mv $dir/HCLG.fst.$$ $dir/HCLG.fst if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then # No point doing this test if transition-scale not 1, as it is bound to fail. fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic." diff --git a/egs/wsj/s5/utils/perturb_data_dir_speed.sh b/egs/wsj/s5/utils/perturb_data_dir_speed.sh index 20ff86755eb..e3d56d58b9c 100755 --- a/egs/wsj/s5/utils/perturb_data_dir_speed.sh +++ b/egs/wsj/s5/utils/perturb_data_dir_speed.sh @@ -112,4 +112,9 @@ cat $srcdir/utt2dur | utils/apply_map.pl -f 1 $destdir/utt_map | \ rm $destdir/spk_map $destdir/utt_map 2>/dev/null echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir" -utils/validate_data_dir.sh --no-feats $destdir + +if [ -f $srcdir/text ]; then + utils/validate_data_dir.sh --no-feats $destdir +else + utils/validate_data_dir.sh --no-feats --no-text $destdir +fi diff --git a/egs/wsj/s5/utils/prepare_lang.sh b/egs/wsj/s5/utils/prepare_lang.sh index 054210cdd23..47670a2065a 100755 --- a/egs/wsj/s5/utils/prepare_lang.sh +++ b/egs/wsj/s5/utils/prepare_lang.sh @@ -64,6 +64,9 @@ unk_fst= # if you want to model the unknown-word () phone_symbol_table= # if set, use a specified phones.txt file. extra_word_disambig_syms= # if set, add disambiguation symbols from this file (one per line) # to phones/disambig.txt, phones/wdisambig.txt and words.txt +num_extra_phone_disambig_syms=1 # Standard one phone disambiguation symbol is used for optional silence. + # Increasing this number does not harm, but is only useful if you later + # want to introduce this labels to L_disambig.fst # end configuration sections echo "$0 $@" # Print the command line for logging @@ -284,7 +287,7 @@ if "$silprob"; then else ndisambig=$(utils/add_lex_disambig.pl $unk_opt --pron-probs $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt) fi -ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. +ndisambig=$[$ndisambig+$num_extra_phone_disambig_syms]; # add (at least) one disambig symbol for silence in lexicon FST. echo $ndisambig > $tmpdir/lex_ndisambig # Format of lexiconp_disambig.txt: @@ -473,6 +476,12 @@ fi silphonelist=`cat $dir/phones/silence.csl` nonsilphonelist=`cat $dir/phones/nonsilence.csl` + +# Note: it's OK, after generating the 'lang' directory, to overwrite the topo file +# with another one of your choice if the 'topo' file you want can't be generated by +# utils/gen_topo.pl. We do this in the 'chain' recipes. Of course, the 'topo' file +# should cover all the phones. Try running utils/validate_lang.pl to check that +# everything is OK after modifying the topo file. utils/gen_topo.pl $num_nonsil_states $num_sil_states $nonsilphonelist $silphonelist >$dir/topo diff --git a/egs/wsj/s5/utils/queue.pl b/egs/wsj/s5/utils/queue.pl index 424b07ff612..10fd3b1a885 100755 --- a/egs/wsj/s5/utils/queue.pl +++ b/egs/wsj/s5/utils/queue.pl @@ -65,8 +65,8 @@ my $jobname; my $jobstart; my $jobend; - my $array_job = 0; +my $sge_job_id; sub print_usage() { print STDERR @@ -90,6 +90,14 @@ () exit 1; } +sub caught_signal { + if ( defined $sge_job_id ) { # Signal trapped after submitting jobs + my $signal = $!; + system ("qdel $sge_job_id"); + die "Caught a signal: $signal , deleting SGE task: $sge_job_id and exiting\n"; + } +} + if (@ARGV < 2) { print_usage(); } @@ -179,6 +187,9 @@ () # A more detailed description of the ways the options would be handled is at # the top of this file. +$SIG{INT} = \&caught_signal; +$SIG{TERM} = \&caught_signal; + my $opened_config_file = 1; open CONFIG, "<$config" or $opened_config_file = 0; @@ -418,7 +429,6 @@ () } } -my $sge_job_id; if (! $sync) { # We're not submitting with -sync y, so we # need to wait for the jobs to finish. We wait for the # sync-files we "touched" in the script to exist. diff --git a/egs/wsj/s5/utils/slurm.pl b/egs/wsj/s5/utils/slurm.pl index a332e19cb1d..27e5fce9c01 100755 --- a/egs/wsj/s5/utils/slurm.pl +++ b/egs/wsj/s5/utils/slurm.pl @@ -397,9 +397,12 @@ sub exec_command { print Q " unset CUDA_VISIBLE_DEVICES.\n"; print Q "fi\n"; print Q "time1=\`date +\"%s\"\`\n"; -print Q " ( $cmd ) 2>>$logfile >>$logfile\n"; +print Q " ( $cmd ) &>>$logfile\n"; print Q "ret=\$?\n"; +print Q "sync || true"; print Q "time2=\`date +\"%s\"\`\n"; +print Q "echo '#' Accounting: begin_time=\$time1 >>$logfile\n"; +print Q "echo '#' Accounting: end_time=\$time2 >>$logfile\n"; print Q "echo '#' Accounting: time=\$((\$time2-\$time1)) threads=$num_threads >>$logfile\n"; print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n"; print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137; diff --git a/egs/wsj/s5/utils/split_data.sh b/egs/wsj/s5/utils/split_data.sh index e44a4ab6359..94ba4f555ce 100755 --- a/egs/wsj/s5/utils/split_data.sh +++ b/egs/wsj/s5/utils/split_data.sh @@ -16,20 +16,28 @@ # limitations under the License. split_per_spk=true +split_per_reco=false if [ "$1" == "--per-utt" ]; then split_per_spk=false shift +elif [ "$1" == "--per-reco" ]; then + split_per_spk=false + split_per_reco=true + shift fi if [ $# != 2 ]; then - echo "Usage: $0 [--per-utt] " + echo "Usage: $0 [--per-utt|--per-reco] " echo "E.g.: $0 data/train 50" echo "It creates its output in e.g. data/train/split50/{1,2,3,...50}, or if the " echo "--per-utt option was given, in e.g. data/train/split50utt/{1,2,3,...50}." + echo "If the --per-reco option was given, in e.g. data/train/split50reco/{1,2,3,...50}." echo "" echo "This script will not split the data-dir if it detects that the output is newer than the input." echo "By default it splits per speaker (so each speaker is in only one split dir)," echo "but with the --per-utt option it will ignore the speaker information while splitting." + echo "But if --per-reco option is given, it splits per recording " + echo "(so each recording is in only one split dir)" exit 1 fi @@ -41,6 +49,14 @@ if ! [ "$numsplit" -gt 0 ]; then exit 1; fi +if $split_per_spk; then + warning_opt= +else + # suppress warnings from filter_scps.pl about 'some input lines were output + # to multiple files'. + warning_opt="--no-warn" +fi + n=0; feats="" wavs="" @@ -59,10 +75,14 @@ if [ -f $data/text ] && [ $nu -ne $nt ]; then echo "** use utils/fix_data_dir.sh to fix this." fi - if $split_per_spk; then utt2spk_opt="--utt2spk=$data/utt2spk" utt="" +elif $split_per_reco; then + utils/data/get_reco2utt.sh $data + utils/spk2utt_to_utt2spk.pl $data/reco2utt > $data/utt2reco + utt2spk_opt="--utt2spk=$data/utt2reco" + utt="reco" else utt2spk_opt= utt="utt" @@ -86,6 +106,7 @@ if ! $need_to_split; then fi utt2spks=$(for n in `seq $numsplit`; do echo $data/split${numsplit}${utt}/$n/utt2spk; done) +utt2recos=$(for n in `seq $numsplit`; do echo $data/split${numsplit}${utt}/$n/utt2reco; done) directories=$(for n in `seq $numsplit`; do echo $data/split${numsplit}${utt}/$n; done) @@ -100,11 +121,20 @@ fi which lockfile >&/dev/null && lockfile -l 60 $data/.split_lock trap 'rm -f $data/.split_lock' EXIT HUP INT PIPE TERM -utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1 +if $split_per_reco; then + utils/split_scp.pl $utt2spk_opt $data/utt2reco $utt2recos || exit 1 +else + utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1 +fi for n in `seq $numsplit`; do dsn=$data/split${numsplit}${utt}/$n - utils/utt2spk_to_spk2utt.pl $dsn/utt2spk > $dsn/spk2utt || exit 1; + + if $split_per_reco; then + utils/filter_scp.pl $dsn/utt2reco $data/utt2spk > $dsn/utt2spk + fi + + utils/utt2spk_to_spk2utt.pl $dsn/utt2spk > $dsn/spk2utt || exit 1 done maybe_wav_scp= @@ -114,7 +144,7 @@ if [ ! -f $data/segments ]; then fi # split some things that are indexed by utterance. -for f in feats.scp text vad.scp utt2lang $maybe_wav_scp; do +for f in feats.scp text vad.scp utt2lang $maybe_wav_scp utt2dur utt2num_frames; do if [ -f $data/$f ]; then utils/filter_scps.pl JOB=1:$numsplit \ $data/split${numsplit}${utt}/JOB/utt2spk $data/$f $data/split${numsplit}${utt}/JOB/$f || exit 1; @@ -124,9 +154,6 @@ done # split some things that are indexed by speaker for f in spk2gender spk2warp cmvn.scp; do if [ -f $data/$f ]; then - ! $split_per_spk && warning_opt="--no-warn" - # suppress warnings from filter_scps.pl about 'some input lines were output - # to multiple files', which is expected in this case. utils/filter_scps.pl $warning_opt JOB=1:$numsplit \ $data/split${numsplit}${utt}/JOB/spk2utt $data/$f $data/split${numsplit}${utt}/JOB/$f || exit 1; fi @@ -140,15 +167,21 @@ if [ -f $data/segments ]; then awk '{print $2;}' $dsn/segments | sort | uniq > $dsn/tmp.reco # recording-ids. done if [ -f $data/reco2file_and_channel ]; then - utils/filter_scps.pl JOB=1:$numsplit \ + utils/filter_scps.pl $warning_opt JOB=1:$numsplit \ $data/split${numsplit}${utt}/JOB/tmp.reco $data/reco2file_and_channel \ $data/split${numsplit}${utt}/JOB/reco2file_and_channel || exit 1 fi if [ -f $data/wav.scp ]; then - utils/filter_scps.pl JOB=1:$numsplit \ + utils/filter_scps.pl $warning_opt JOB=1:$numsplit \ $data/split${numsplit}${utt}/JOB/tmp.reco $data/wav.scp \ $data/split${numsplit}${utt}/JOB/wav.scp || exit 1 fi + if [ -f $data/reco2utt ]; then + utils/filter_scps.pl JOB=1:$numsplit \ + $data/split${numsplit}${utt}/JOB/tmp.reco $data/reco2utt \ + $data/split${numsplit}${utt}/JOB/reco2utt || exit 1 + fi + for f in $data/split${numsplit}${utt}/*/tmp.reco; do rm $f; done fi diff --git a/egs/wsj/s5/utils/ssh.pl b/egs/wsj/s5/utils/ssh.pl index 8f2755a5ccb..5d3e3e44d71 100755 --- a/egs/wsj/s5/utils/ssh.pl +++ b/egs/wsj/s5/utils/ssh.pl @@ -161,6 +161,7 @@ # bash commands. print S "set -e\n"; # if any of the later commands fails, we want it to exit. print S "cd $cwd\n"; + print S ". ./path.sh\n"; print S "mkdir -p $logdir\n"; print S "time1=\`date +\"%s\"\`\n"; print S "( echo '#' Running on \`hostname\`\n"; diff --git a/egs/wsj/s5/utils/subset_data_dir.sh b/egs/wsj/s5/utils/subset_data_dir.sh index 5fe3217ddad..9533d0216c9 100755 --- a/egs/wsj/s5/utils/subset_data_dir.sh +++ b/egs/wsj/s5/utils/subset_data_dir.sh @@ -108,6 +108,7 @@ function do_filtering { [ -f $srcdir/vad.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp [ -f $srcdir/utt2lang ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang [ -f $srcdir/utt2dur ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur + [ -f $srcdir/utt2uniq ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp [ -f $srcdir/spk2warp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp [ -f $srcdir/utt2warp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp @@ -126,6 +127,10 @@ function do_filtering { [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm rm $destdir/reco + else + awk '{print $1;}' $destdir/wav.scp | sort | uniq > $destdir/reco + [ -f $srcdir/reco2file_and_channel ] && \ + utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel fi srcutts=`cat $srcdir/utt2spk | wc -l` destutts=`cat $destdir/utt2spk | wc -l` diff --git a/egs/wsj/s5/utils/validate_data_dir.sh b/egs/wsj/s5/utils/validate_data_dir.sh index 49c929207b9..7e93b0f8400 100755 --- a/egs/wsj/s5/utils/validate_data_dir.sh +++ b/egs/wsj/s5/utils/validate_data_dir.sh @@ -22,6 +22,8 @@ done if [ $# -ne 1 ]; then echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] " + echo "The --no-xxx options mean that the script does not require " + echo "xxx.scp to be present, but it will check it if it is present." echo "e.g.: $0 data/train" exit 1; fi @@ -132,7 +134,7 @@ if [ -f $data/wav.scp ]; then check_sorted_and_uniq $data/segments # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids. ! cat $data/segments | \ - awk '{if (NF != 4 || ($4 <= $3 && $4 != -1)) { print "Bad line in segments file", $0; exit(1); }}' && \ + awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \ echo "$0: badly formatted segments file" && exit 1; segments_len=`cat $data/segments | wc -l` diff --git a/egs/wsj/s5/utils/validate_lang.pl b/egs/wsj/s5/utils/validate_lang.pl index 008c54ac752..2e8125b1dd7 100755 --- a/egs/wsj/s5/utils/validate_lang.pl +++ b/egs/wsj/s5/utils/validate_lang.pl @@ -758,8 +758,10 @@ sub check_summation { # prepare_lang.sh), the regular L.fst may contain some disambiguation # symbols. if (! defined $is_disambig{$phone}) { - if ($phone == "<>") { + if ($phone eq "<>") { $state = "eos"; + } elsif ($phone == 0) { + $exit = 1; print "--> ERROR: unexpected phone sequence=$phoneseq, wordseq=$wordseq\n"; last; } else { $state = $wbtype{$phone}; } diff --git a/egs/wsj/s5/utils/write_kwslist.pl b/egs/wsj/s5/utils/write_kwslist.pl index b2f67815df9..18071fa7671 100755 --- a/egs/wsj/s5/utils/write_kwslist.pl +++ b/egs/wsj/s5/utils/write_kwslist.pl @@ -32,8 +32,9 @@ --remove-NO : Remove the "NO" decision instances (boolean, default = false) --segments : Segments file from Kaldi (string, default = "") --system-id : System ID (string, default = "") - --verbose : Verbose level (higher --> more kws section) (integer, default 0) - --YES-cutoff : Only keep "\$YES-cutoff" yeses for each kw (int, default = -1) + --verbose : Verbose level (higher --> more kws section) (integer, default = 0) + --YES-cutoff : Only keep "\$YES-cutoff" yeses for each kw (int, default = -1) + --nbest | Output upto nbest hits into the kwlist (int, default = -1) EOU @@ -55,6 +56,7 @@ my $remove_dup = "false"; my $remove_NO = "false"; my $YES_cutoff = -1; +my $nbest_max = -1; GetOptions('segments=s' => \$segment, 'flen=f' => \$flen, 'beta=f' => \$beta, @@ -72,7 +74,8 @@ 'duptime=f' => \$duptime, 'remove-dup=s' => \$remove_dup, 'YES-cutoff=i' => \$YES_cutoff, - 'remove-NO=s' => \$remove_NO); + 'remove-NO=s' => \$remove_NO, + 'nbest=i' => \$nbest_max) or die "Cannot continue\n"; ($normalize eq "true" || $normalize eq "false") || die "$0: Bad value for option --normalize\n"; ($remove_dup eq "true" || $remove_dup eq "false") || die "$0: Bad value for option --remove-dup\n"; @@ -134,12 +137,18 @@ sub PrintKwslist { # Start printing $kwslist .= "[0]\" language=\"$info->[1]\" system_id=\"$info->[2]\">\n"; my $prev_kw = ""; + my $nbest = $nbest_max; foreach my $kwentry (@{$KWS}) { + if (($prev_kw eq $kwentry->[0]) && ($nbest le 0) && ($nbest_max gt 0)) { + next; + } if ($prev_kw ne $kwentry->[0]) { if ($prev_kw ne "") {$kwslist .= " \n";} $kwslist .= " [0]\" search_time=\"1\" oov_count=\"0\">\n"; $prev_kw = $kwentry->[0]; + $nbest = $nbest_max; } + $nbest -= 1 if $nbest_max gt 0; my $score = sprintf("%g", $kwentry->[5]); $kwslist .= " [1]\" channel=\"$kwentry->[2]\" tbeg=\"$kwentry->[3]\" dur=\"$kwentry->[4]\" score=\"$score\" decision=\"$kwentry->[6]\""; if (defined($kwentry->[7])) {$kwslist .= " threshold=\"$kwentry->[7]\"";} diff --git a/src/.version b/src/.version new file mode 100644 index 00000000000..a75b92f1ed7 --- /dev/null +++ b/src/.version @@ -0,0 +1 @@ +5.1 diff --git a/src/Doxyfile b/src/Doxyfile index f5e874be3ad..a6c0b434ff2 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -453,9 +453,9 @@ WARN_LOGFILE = # the lines after "doc itf" are copied from SUBDIRS in the Makefile. INPUT = doc itf \ - base matrix util feat tree thread gmm transform sgmm \ + base matrix util feat tree thread gmm transform \ fstext hmm lm decoder lat cudamatrix nnet \ - bin fstbin gmmbin fgmmbin sgmmbin featbin \ + bin fstbin gmmbin fgmmbin featbin \ nnetbin latbin sgmm2 sgmm2bin nnet2 nnet2bin nnet3 nnet3bin \ kwsbin ivector ivectorbin @@ -503,7 +503,7 @@ EXCLUDE_PATTERNS = # directories that contain example code fragments that are included (see # the \include command). -EXAMPLE_PATH = +EXAMPLE_PATH = doc # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp diff --git a/src/INSTALL b/src/INSTALL index 3f7a01928ba..f40a514c4b6 100644 --- a/src/INSTALL +++ b/src/INSTALL @@ -6,14 +6,24 @@ compilation, see ../windows/INSTALL. You must first have completed the installation steps in ../tools/INSTALL (compiling OpenFst; getting ATLAS and CLAPACK headers). -The installation instructions are: -./configure --shared -make depend -make - -Note that "make" takes a long time; you can speed it up by running make -in parallel if you have multiple CPUs, for instance - make depend -j 8 - make -j 8 +The installation instructions are + + ./configure --shared + make depend + make + +Note that "make" takes a long time. You can speed it up by running make +in parallel if you have multiple CPUs, e.g. to use 8 CPUs + + make depend -j 8 + make -j 8 + +Kaldi requires a relatively recent C++ compiler with C++11 support, +e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. If your system +default compiler does not support C++11, you can specify a C++11 compliant +compiler by setting the CXX environment variable, e.g. + + CXX=g++-4.8 ./configure --shared + For more information, see documentation at http://kaldi-asr.org/doc/ and click on "The build process (how Kaldi is compiled)". diff --git a/src/Makefile b/src/Makefile index 9905be869a0..b7ac6f60bd4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -5,17 +5,17 @@ SHELL := /bin/bash -SUBDIRS = base matrix util feat tree thread gmm transform sgmm \ - fstext hmm lm decoder lat kws cudamatrix nnet \ - bin fstbin gmmbin fgmmbin sgmmbin featbin \ +SUBDIRS = base matrix util feat tree thread gmm transform \ + fstext hmm simplehmm lm decoder lat kws cudamatrix nnet segmenter \ + bin fstbin gmmbin fgmmbin featbin \ nnetbin latbin sgmm2 sgmm2bin nnet2 nnet3 chain nnet3bin nnet2bin kwsbin \ - ivector ivectorbin online2 online2bin lmbin chainbin + ivector ivectorbin online2 online2bin lmbin chainbin segmenterbin simplehmmbin -MEMTESTDIRS = base matrix util feat tree thread gmm transform sgmm \ - fstext hmm lm decoder lat nnet kws chain \ - bin fstbin gmmbin fgmmbin sgmmbin featbin \ +MEMTESTDIRS = base matrix util feat tree thread gmm transform \ + fstext hmm simplehmm lm decoder lat nnet kws chain segmenter \ + bin fstbin gmmbin fgmmbin featbin \ nnetbin latbin sgmm2 nnet2 nnet3 nnet2bin nnet3bin sgmm2bin kwsbin \ - ivector ivectorbin online2 online2bin lmbin + ivector ivectorbin online2 online2bin lmbin segmenterbin simplehmmbin CUDAMEMTESTDIR = cudamatrix @@ -31,9 +31,15 @@ include kaldi.mk # Reset the default goal, so that the all target will become default .DEFAULT_GOAL := -all: checkversion test_dependencies kaldi.mk mklibdir $(SUBDIRS) +all: + $(MAKE) checkversion + $(MAKE) kaldi.mk + $(MAKE) mklibdir + $(MAKE) subdirs -echo Done +subdirs: $(SUBDIRS) + mklibdir: test -d $(KALDILIBDIR) || mkdir $(KALDILIBDIR) @@ -51,8 +57,10 @@ checkversion: ifeq ($(shell ./configure --version),$(CONFIGURE_VERSION)) @echo "The version of configure script matches kaldi.mk version. Good." else + @echo "" @echo "The kaldi.mk file was generated using a different version of configure script. Please rerun the configure again" @test -f ./kaldi.mk && echo "Hint: Previous configure command line: " && head -n 2 ./kaldi.mk | grep configure | sed 's/^# *//g' + @echo "" @false endif @@ -88,23 +96,12 @@ kaldi.mk: @[ -f kaldi.mk ] || { echo "kaldi.mk does not exist; you have to run ./configure"; exit 1; } # Compile optional stuff -ext: test_dependencies ext_depend $(SUBDIRS) $(EXT_SUBDIRS) +ext: ext_depend $(SUBDIRS) $(EXT_SUBDIRS) -echo Done -ifndef OPENFST_VER -$(error Please rerun configure: OPENFST_VER is not defined, likely kaldi.mk was produced by older configure script.) -endif -# Note: OPENFST_VER is determined by configure and added to kaldi.mk -OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") -test_dependencies: -ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10302)","1") - $(error OpenFst $(OPENFST_VER) is not supported. You now need OpenFst >= 1.3.2.) -endif - check_portaudio: @[ -d ../tools/portaudio ] || ( cd ../tools; ./install_portaudio.sh ) - clean: rmlibdir -for x in $(SUBDIRS) $(EXT_SUBDIRS); do $(MAKE) -C $$x clean; done @@ -145,7 +142,7 @@ $(SUBDIRS) : mklibdir $(MAKE) -C $@ .PHONY: $(EXT_SUBDIRS) -$(EXT_SUBDIRS) : mklibdir +$(EXT_SUBDIRS) : mklibdir ext_depend $(MAKE) -C $@ @@ -153,9 +150,9 @@ $(EXT_SUBDIRS) : mklibdir # this is necessary for correct parallel compilation #1)The tools depend on all the libraries -bin fstbin gmmbin fgmmbin sgmmbin sgmm2bin featbin nnetbin nnet2bin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin: \ - base matrix util feat tree thread gmm transform sgmm sgmm2 fstext hmm \ - lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 +bin fstbin gmmbin fgmmbin sgmm2bin featbin nnetbin nnet2bin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin segmenterbin simplehmmbin: \ + base matrix util feat tree thread gmm transform sgmm2 fstext hmm simplehmm \ + lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 segmenter #2)The libraries have inter-dependencies base: base/.depend.mk @@ -166,12 +163,11 @@ feat: base matrix util gmm transform tree thread tree: base util thread matrix gmm: base util matrix tree thread transform: base util matrix gmm tree thread -sgmm: base util matrix gmm tree transform thread hmm sgmm2: base util matrix gmm tree transform thread hmm fstext: base util thread matrix tree hmm: base tree matrix util thread lm: base util thread matrix fstext -decoder: base util thread matrix gmm sgmm hmm tree transform lat +decoder: base util thread matrix gmm hmm tree transform lat lat: base util thread hmm tree matrix cudamatrix: base util thread matrix nnet: base util hmm tree thread matrix cudamatrix @@ -179,10 +175,11 @@ nnet2: base util matrix thread lat gmm hmm tree transform cudamatrix nnet3: base util matrix thread lat gmm hmm tree transform cudamatrix chain fstext chain: lat hmm tree fstext matrix cudamatrix util thread base ivector: base util matrix thread transform tree gmm +segmenter: base matrix util gmm thread tree +simplehmm: base tree matrix util thread hmm #3)Dependencies for optional parts of Kaldi -onlinebin: base matrix util feat tree gmm transform sgmm sgmm2 fstext hmm lm decoder lat cudamatrix nnet nnet2 online thread -# python-kaldi-decoding: base matrix util feat tree thread gmm transform sgmm sgmm2 fstext hmm decoder lat online +onlinebin: base matrix util feat tree gmm transform sgmm2 fstext hmm lm decoder lat cudamatrix nnet nnet2 online thread +# python-kaldi-decoding: base matrix util feat tree thread gmm transform sgmm2 fstext hmm decoder lat online online: decoder gmm transform feat matrix util base lat hmm thread tree online2: decoder gmm transform feat matrix util base lat hmm thread tree ivector cudamatrix nnet2 nnet3 chain kws: base util thread hmm tree matrix lat - diff --git a/src/base/Makefile b/src/base/Makefile index 8db3b86d021..583c6badcf2 100644 --- a/src/base/Makefile +++ b/src/base/Makefile @@ -1,3 +1,16 @@ +# Whenever make is run in this directory, call ./get_version.sh as the +# first thing. This script regenereates ./version.h if necessary, e.g. +# if it does not already exist or if the version number has changed. +LOG := $(shell ./get_version.sh; echo " $$?") +ifneq ($(strip $(LOG)), 0) + RC := $(lastword $(LOG)) + OUT := $(wordlist 1,$(shell echo $$(($(words $(LOG))-1))),$(LOG)) + ifeq ($(RC),0) + $(info $(OUT)) + else + $(error $(OUT)) + endif +endif all: @@ -9,7 +22,6 @@ OBJFILES = kaldi-math.o kaldi-error.o io-funcs.o kaldi-utils.o LIBNAME = kaldi-base -ADDLIBS = +ADDLIBS = include ../makefiles/default_rules.mk - diff --git a/src/base/get_version.sh b/src/base/get_version.sh new file mode 100755 index 00000000000..d6c6c975a4d --- /dev/null +++ b/src/base/get_version.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash + +# Copyright 2017 University of Southern California (Author: Dogan Can) + +# See ../../COPYING for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# Kaldi versioning is loosely based on the semantic versioning scheme. This +# script tries to work out the version string from the partial version number +# specified in src/.version along with the recent git history. By convention +# src/.version specifies the first two components (MAJOR.MINOR) of the version +# number. The third component (PATCH) is determined by counting how many +# commits there are that are newer than than the last commit modifiying +# src/.version. If there are uncommitted changes in the src/ directory, then +# the version string is extended with a suffix (~N) specifiying the number of +# files with uncommitted changes. The last component of the version string is +# the abbreviated hash of the HEAD commit. If git history is not available or +# if the file src/.short_version exists, then the version string defaults to +# the number specified in src/.version. + +set -e + +# Change working directory to the directory where this script is located. +cd "$(dirname ${BASH_SOURCE[0]})" + +# Read the partial version number specified in the first line of src/.version. +version=$(head -1 ../.version) + +if [ -e ../.short_version ]; then + echo "$0: File src/.short_version exists." + echo "$0: Stopping the construction of full version number from git history." +elif ! [[ $version =~ ^[0-9][0-9]*.[0-9][0-9]*$ ]]; then + echo "$0: The version number \"$version\" specified in src/.version is not" \ + "in MAJOR.MINOR format." + echo "$0: Stopping the construction of full version number from git history." +elif ! which git >&/dev/null; then + echo "$0: Git is not installed." + echo "$0: Using the version number \"$version\" specified in src/.version." +elif [ "$(git rev-parse --is-inside-work-tree 2>/dev/null)" != true ]; then + echo "$0: Git history is not available." + echo "$0: Using the version number \"$version\" specified in src/.version." +else + # Figure out patch number. + version_commit=$(git log -1 --pretty=oneline ../.version | awk '{print $1}') + patch_number=$(git rev-list ${version_commit}..HEAD | wc -l | awk '{print $1}') + version="$version.$patch_number" + + # Check for uncommitted changes in src/. + uncommitted_changes=$(git diff-index HEAD -- .. | wc -l | awk '{print $1}') + if [ $uncommitted_changes -gt 0 ]; then + # Add suffix ~N if there are N files in src/ with uncommitted changes + version="$version~$uncommitted_changes" + fi + + # Figure out HEAD commit SHA-1. + head_commit=$(git log -1 --pretty=oneline | awk '{print $1}') + head_commit_short=$(git log -1 --oneline --abbrev=4 | awk '{print $1}') + version="$version-${head_commit_short}" +fi + +# Empty version number is not allowed. +if [ -z "$version" ]; then + version="?" +fi + +# Write version info to a temporary file. +temp=$(mktemp /tmp/temp.XXXXXX) +trap 'rm -f "$temp"' EXIT +echo "// This file was automatically created by ./get_version.sh." > $temp +echo "// It is only included by ./kaldi-error.cc." >> $temp +echo "#define KALDI_VERSION \"$version\"" >> $temp +if [ -n "$head_commit" ]; then + echo "#define KALDI_GIT_HEAD \"$head_commit\"" >> $temp +fi + +# Overwrite ./version.h with the temporary file if they are different. +if ! cmp -s $temp version.h; then + cp $temp version.h + chmod 644 version.h +fi diff --git a/src/base/kaldi-error.cc b/src/base/kaldi-error.cc index 62f26df4c98..f2ce1edf37d 100644 --- a/src/base/kaldi-error.cc +++ b/src/base/kaldi-error.cc @@ -31,6 +31,7 @@ #include "base/kaldi-common.h" #include "base/kaldi-error.h" +#include "base/version.h" namespace kaldi { @@ -40,14 +41,13 @@ int32 g_kaldi_verbose_level = 0; const char *g_program_name = NULL; static LogHandler g_log_handler = NULL; -// If the program name was set (g_program_name != ""), the function -// GetProgramName returns the program name (without the path) followed by a -// colon, e.g. "gmm-align:". Otherwise it returns the empty string "". +// If the program name was set (g_program_name != ""), GetProgramName +// returns the program name (without the path), e.g. "gmm-align". +// Otherwise it returns the empty string "". const char *GetProgramName() { return g_program_name == NULL ? "" : g_program_name; } - /***** HELPER FUNCTIONS *****/ // Given a filename like "/a/b/c/d/e/f.cc", GetShortFileName @@ -184,12 +184,13 @@ void MessageLogger::HandleMessage(const LogMessageEnvelope &envelope, header << "ASSERTION_FAILED ("; break; default: - abort(); // coding errror (unknown 'severity'), + abort(); // coding error (unknown 'severity'), } } // fill the other info from the envelope, - header << GetProgramName() << envelope.func << "():" - << envelope.file << ':' << envelope.line << ")"; + header << GetProgramName() << "[" KALDI_VERSION "]" << ':' + << envelope.func << "():" << envelope.file << ':' << envelope.line + << ")"; // Printing the message, if (envelope.severity >= LogMessageEnvelope::kWarning) { diff --git a/src/base/kaldi-utils.h b/src/base/kaldi-utils.h index 47c60b4b01d..bd2da25dce7 100644 --- a/src/base/kaldi-utils.h +++ b/src/base/kaldi-utils.h @@ -113,8 +113,7 @@ void Sleep(float seconds); (reinterpret_cast(&a))[1]=t;} -// Makes copy constructor and operator= private. Same as in compat.h of OpenFst -// toolkit. +// Makes copy constructor and operator= private. #define KALDI_DISALLOW_COPY_AND_ASSIGN(type) \ type(const type&); \ void operator = (const type&) @@ -146,14 +145,4 @@ template<> class KaldiCompileTimeAssert { # define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10); #endif -#define KALDI_STRTOD(cur_cstr, end_cstr) strtod(cur_cstr, end_cstr) - -#ifdef _MSC_VER -# define KALDI_STRTOF(cur_cstr, end_cstr) \ - static_cast(strtod(cur_cstr, end_cstr)); -#else -# define KALDI_STRTOF(cur_cstr, end_cstr) strtof(cur_cstr, end_cstr); -#endif - #endif // KALDI_BASE_KALDI_UTILS_H_ - diff --git a/src/bin/Makefile b/src/bin/Makefile index 687040889b3..1948ba2d681 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -24,7 +24,8 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \ matrix-logprob matrix-sum \ build-pfile-from-ali get-post-on-ali tree-info am-info \ vector-sum matrix-sum-rows est-pca sum-lda-accs sum-mllt-accs \ - transform-vec align-text matrix-dim + transform-vec align-text matrix-dim weight-pdf-post weight-matrix \ + matrix-add-offset matrix-dot-product compute-fscore OBJFILES = diff --git a/src/bin/ali-to-phones.cc b/src/bin/ali-to-phones.cc index b370dbc7f18..2a76000cfae 100644 --- a/src/bin/ali-to-phones.cc +++ b/src/bin/ali-to-phones.cc @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) { "Usage: ali-to-phones [options] " "\n" "e.g.: \n" - " ali-to-phones 1.mdl ark:1.ali ark:phones.tra\n" + " ali-to-phones 1.mdl ark:1.ali ark:-\n" "or:\n" " ali-to-phones --ctm-output 1.mdl ark:1.ali 1.ctm\n" "See also: show-alignments lattice-align-phones\n"; diff --git a/src/bin/align-equal.cc b/src/bin/align-equal.cc index 3d35ee33daa..a3bc40dc236 100644 --- a/src/bin/align-equal.cc +++ b/src/bin/align-equal.cc @@ -36,10 +36,13 @@ int main(int argc, char *argv[]) { using fst::VectorFst; using fst::StdArc; - const char *usage = "Write equally spaced alignments of utterances (to get training started)\n" - "Usage: align-equal \n" + const char *usage = "Write equally spaced alignments of utterances " + "(to get training started)\n" + "Usage: align-equal " + " \n" "e.g.: \n" - " align-equal 1.tree 1.mdl lex.fst scp:train.scp ark:train.tra ark:equal.ali\n"; + " align-equal 1.tree 1.mdl lex.fst scp:train.scp " + "'ark:sym2int.pl -f 2- words.txt text|' ark:equal.ali\n"; ParseOptions po(usage); std::string disambig_rxfilename; diff --git a/src/bin/compile-train-graphs.cc b/src/bin/compile-train-graphs.cc index 6636ef88878..874d079376e 100644 --- a/src/bin/compile-train-graphs.cc +++ b/src/bin/compile-train-graphs.cc @@ -37,9 +37,11 @@ int main(int argc, char *argv[]) { const char *usage = "Creates training graphs (without transition-probabilities, by default)\n" "\n" - "Usage: compile-train-graphs [options] \n" + "Usage: compile-train-graphs [options] " + " \n" "e.g.: \n" - " compile-train-graphs tree 1.mdl lex.fst ark:train.tra ark:graphs.fsts\n"; + " compile-train-graphs tree 1.mdl lex.fst " + "'ark:sym2int.pl -f 2- words.txt text|' ark:graphs.fsts\n"; ParseOptions po(usage); TrainingGraphCompilerOptions gopts; diff --git a/src/bin/compute-fscore.cc b/src/bin/compute-fscore.cc new file mode 100644 index 00000000000..eb231fe361e --- /dev/null +++ b/src/bin/compute-fscore.cc @@ -0,0 +1,153 @@ +// bin/compute-fscore.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" + +int main(int argc, char *argv[]) { + using namespace kaldi; + typedef kaldi::int32 int32; + + try { + const char *usage = + "Compute F1-score, precision, recall etc.\n" + "Takes two alignment files and computes statistics\n" + "\n" + "Usage: compute-fscore [options] \n" + " e.g.: compute-fscore ark:data/train/text ark:hyp_text\n"; + + ParseOptions po(usage); + + std::string mode = "strict"; + std::string mask_rspecifier; + + po.Register("mode", &mode, + "Scoring mode: \"present\"|\"all\"|\"strict\":\n" + " \"present\" means score those we have transcriptions for\n" + " \"all\" means treat absent transcriptions as empty\n" + " \"strict\" means die if all in ref not also in hyp"); + po.Register("mask", &mask_rspecifier, + "Only score on frames where mask is 1"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string ref_rspecifier = po.GetArg(1); + std::string hyp_rspecifier = po.GetArg(2); + + if (mode != "strict" && mode != "present" && mode != "all") { + KALDI_ERR << "--mode option invalid: expected \"present\"|\"all\"|\"strict\", got " + << mode; + } + + int64 num_tp = 0, num_fp = 0, num_tn = 0, num_fn = 0, num_frames = 0; + int32 num_absent_sents = 0; + + // Both text and integers are loaded as vector of strings, + SequentialInt32VectorReader ref_reader(ref_rspecifier); + RandomAccessInt32VectorReader hyp_reader(hyp_rspecifier); + RandomAccessInt32VectorReader mask_reader(mask_rspecifier); + + // Main loop, accumulate WER stats, + for (; !ref_reader.Done(); ref_reader.Next()) { + const std::string &key = ref_reader.Key(); + const std::vector &ref_ali = ref_reader.Value(); + std::vector hyp_ali; + if (!hyp_reader.HasKey(key)) { + if (mode == "strict") + KALDI_ERR << "No hypothesis for key " << key << " and strict " + "mode specifier."; + num_absent_sents++; + if (mode == "present") // do not score this one. + continue; + } else { + hyp_ali = hyp_reader.Value(key); + } + + std::vector mask_ali; + if (!mask_rspecifier.empty()) { + if (!mask_reader.HasKey(key)) { + if (mode == "strict") + KALDI_ERR << "No hypothesis for key " << key << " and strict " + "mode specifier."; + num_absent_sents++; + if (mode == "present") // do not score this one. + continue; + } else { + mask_ali = mask_reader.Value(key); + } + } + + for (int32 i = 0; i < ref_ali.size(); i++) { + if ( (i < hyp_ali.size() && hyp_ali[i] != 0 && hyp_ali[i] != 1) || + (i < ref_ali.size() && ref_ali[i] != 0 && ref_ali[i] != 1) || + (i < mask_ali.size() && mask_ali[i] != 0 && mask_ali[i] != 1) ) { + KALDI_ERR << "Expecting alignment to be 0s or 1s"; + } + + if (!mask_rspecifier.empty() && (std::abs(static_cast(ref_ali.size()) - static_cast(mask_ali.size())) > 2) ) + KALDI_ERR << "Length mismatch: mask vs ref"; + + if (!mask_rspecifier.empty() && (i > mask_ali.size() || mask_ali[i] == 0)) continue; + num_frames++; + + if (ref_ali[i] == 1 && i > hyp_ali.size()) { num_fn++; continue; } + if (ref_ali[i] == 0 && i > hyp_ali.size()) { num_tn++; continue; } + + if (ref_ali[i] == 1 && hyp_ali[i] == 1) num_tp++; + else if (ref_ali[i] == 0 && hyp_ali[i] == 1) num_fp++; + else if (ref_ali[i] == 1 && hyp_ali[i] == 0) num_fn++; + else if (ref_ali[i] == 0 && hyp_ali[i] == 0) num_tn++; + else + KALDI_ERR << "Unknown condition"; + } + } + + // Print the ouptut, + std::cout.precision(2); + std::cerr.precision(2); + + BaseFloat precision = static_cast(num_tp) / (num_tp + num_fp); + BaseFloat recall = static_cast(num_tp) / (num_tp + num_fn); + + std::cout << "F1 " << 2 * precision * recall / (precision + recall) << "\n"; + std::cout << "Precision " << precision << "\n"; + std::cout << "Recall " << recall << "\n"; + std::cout << "Specificity " + << static_cast(num_tn) / (num_tn + num_fp) << "\n"; + std::cout << "Accuracy " + << static_cast(num_tp + num_tn) / num_frames << "\n"; + + std::cerr << "TP " << num_tp << "\n"; + std::cerr << "FP " << num_fp << "\n"; + std::cerr << "TN " << num_tn << "\n"; + std::cerr << "FN " << num_fn << "\n"; + std::cerr << "Length " << num_frames << "\n"; + + return 0; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/bin/convert-ali.cc b/src/bin/convert-ali.cc index 3a52b7904a0..89fe838638c 100644 --- a/src/bin/convert-ali.cc +++ b/src/bin/convert-ali.cc @@ -39,6 +39,7 @@ int main(int argc, char *argv[]) { int32 frame_subsampling_factor = 1; bool reorder = true; + bool repeat_frames = false; std::string phone_map_rxfilename; ParseOptions po(usage); @@ -48,6 +49,11 @@ int main(int argc, char *argv[]) { po.Register("reorder", &reorder, "True if you want the converted alignments to be 'reordered' " "versus the way they appear in the HmmTopology object"); + po.Register("repeat-frames", &repeat_frames, + "Only relevant when frame-subsampling-factor != 1. If true, " + "repeat frames of alignment by 'frame-subsampling-factor' " + "after alignment conversion, to keep the alignment the same " + "length as the input alignment."); po.Register("frame-subsampling-factor", &frame_subsampling_factor, "Can be used in converting alignments to reduced frame rates."); @@ -98,6 +104,7 @@ int main(int argc, char *argv[]) { new_ctx_dep, old_alignment, frame_subsampling_factor, + repeat_frames, reorder, (phone_map_rxfilename != "" ? &phone_map : NULL), &new_alignment)) { diff --git a/src/bin/copy-matrix.cc b/src/bin/copy-matrix.cc index d7b8181c64c..56f2e51d90f 100644 --- a/src/bin/copy-matrix.cc +++ b/src/bin/copy-matrix.cc @@ -36,16 +36,30 @@ int main(int argc, char *argv[]) { " e.g.: copy-matrix --binary=false 1.mat -\n" " copy-matrix ark:2.trans ark,t:-\n" "See also: copy-feats\n"; - + bool binary = true; + bool apply_log = false; + bool apply_exp = false; + bool apply_softmax_per_row = false; + BaseFloat apply_power = 1.0; BaseFloat scale = 1.0; + ParseOptions po(usage); po.Register("binary", &binary, "Write in binary mode (only relevant if output is a wxfilename)"); po.Register("scale", &scale, "This option can be used to scale the matrices being copied."); - + po.Register("apply-log", &apply_log, + "This option can be used to apply log on the matrices. " + "Must be avoided if matrix has negative quantities."); + po.Register("apply-exp", &apply_exp, + "This option can be used to apply exp on the matrices"); + po.Register("apply-power", &apply_power, + "This option can be used to apply a power on the matrices"); + po.Register("apply-softmax-per-row", &apply_softmax_per_row, + "This option can be used to apply softmax per row of the matrices"); + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -53,6 +67,10 @@ int main(int argc, char *argv[]) { exit(1); } + if ( (apply_log && apply_exp) || (apply_softmax_per_row && apply_exp) || + (apply_softmax_per_row && apply_log) ) + KALDI_ERR << "Only one of apply-log, apply-exp and " + << "apply-softmax-per-row can be given"; std::string matrix_in_fn = po.GetArg(1), matrix_out_fn = po.GetArg(2); @@ -68,11 +86,15 @@ int main(int argc, char *argv[]) { if (in_is_rspecifier != out_is_wspecifier) KALDI_ERR << "Cannot mix archives with regular files (copying matrices)"; - + if (!in_is_rspecifier) { Matrix mat; ReadKaldiObject(matrix_in_fn, &mat); if (scale != 1.0) mat.Scale(scale); + if (apply_log) mat.ApplyLog(); + if (apply_exp) mat.ApplyExp(); + if (apply_softmax_per_row) mat.ApplySoftMaxPerRow(); + if (apply_power != 1.0) mat.ApplyPow(apply_power); Output ko(matrix_out_fn, binary); mat.Write(ko.Stream(), binary); KALDI_LOG << "Copied matrix to " << matrix_out_fn; @@ -82,9 +104,14 @@ int main(int argc, char *argv[]) { BaseFloatMatrixWriter writer(matrix_out_fn); SequentialBaseFloatMatrixReader reader(matrix_in_fn); for (; !reader.Done(); reader.Next(), num_done++) { - if (scale != 1.0) { + if (scale != 1.0 || apply_log || apply_exp || + apply_power != 1.0 || apply_softmax_per_row) { Matrix mat(reader.Value()); - mat.Scale(scale); + if (scale != 1.0) mat.Scale(scale); + if (apply_log) mat.ApplyLog(); + if (apply_exp) mat.ApplyExp(); + if (apply_softmax_per_row) mat.ApplySoftMaxPerRow(); + if (apply_power != 1.0) mat.ApplyPow(apply_power); writer.Write(reader.Key(), mat); } else { writer.Write(reader.Key(), reader.Value()); diff --git a/src/bin/matrix-add-offset.cc b/src/bin/matrix-add-offset.cc new file mode 100644 index 00000000000..90f72ba3254 --- /dev/null +++ b/src/bin/matrix-add-offset.cc @@ -0,0 +1,84 @@ +// bin/matrix-add-offset.cc + +// Copyright 2015 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Add an offset vector to the rows of matrices in a table.\n" + "\n" + "Usage: matrix-add-offset [options] " + " \n" + "e.g.: matrix-add-offset log_post.mat neg_priors.vec log_like.mat\n" + "See also: matrix-sum-rows, matrix-sum, vector-sum\n"; + + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + std::string rspecifier = po.GetArg(1); + std::string vector_rxfilename = po.GetArg(2); + std::string wspecifier = po.GetArg(3); + + SequentialBaseFloatMatrixReader mat_reader(rspecifier); + BaseFloatMatrixWriter mat_writer(wspecifier); + + int32 num_done = 0; + + Vector vec; + { + bool binary_in; + Input ki(vector_rxfilename, &binary_in); + vec.Read(ki.Stream(), binary_in); + } + + for (; !mat_reader.Done(); mat_reader.Next()) { + std::string key = mat_reader.Key(); + Matrix mat(mat_reader.Value()); + if (vec.Dim() != mat.NumCols()) { + KALDI_ERR << "Mismatch in vector dimension and " + << "number of columns in matrix; " + << vec.Dim() << " vs " << mat.NumCols(); + } + mat.AddVecToRows(1.0, vec); + mat_writer.Write(key, mat); + num_done++; + } + + KALDI_LOG << "Added offset to " << num_done << " matrices."; + + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/bin/matrix-dot-product.cc b/src/bin/matrix-dot-product.cc new file mode 100644 index 00000000000..a292cab9a40 --- /dev/null +++ b/src/bin/matrix-dot-product.cc @@ -0,0 +1,183 @@ +// bin/matrix-dot-product.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Get element-wise dot product of matrices. Always returns a matrix " + "that is the same size as the first matrix.\n" + "If there is a mismatch in number of rows, the utterance is skipped, " + "unless the mismatch is within a tolerance. If the second matrix has " + "number of rows that is larger than the first matrix by less than the " + "specified tolerance, then a submatrix of the second matrix is " + "multiplied element-wise with the first matrix.\n" + "\n" + "Usage: matrix-dot-product [options] " + "[ ...] " + "\n" + " e.g.: matrix-dot-product ark:1.weights ark:2.weights " + "ark:combine.weights\n" + "or \n" + "Usage: matrix-dot-product [options] " + "[ ...] " + "\n" + " e.g.: matrix-sum --binary=false 1.mat 2.mat product.mat\n" + "See also: matrix-sum, matrix-sum-rows\n"; + + bool binary = true; + int32 length_tolerance = 0; + + ParseOptions po(usage); + + po.Register("binary", &binary, "If true, write output as binary (only " + "relevant for usage types two or three"); + po.Register("length-tolerance", &length_tolerance, + "Tolerance length mismatch of this many frames"); + + po.Read(argc, argv); + + if (po.NumArgs() < 2) { + po.PrintUsage(); + exit(1); + } + + int32 N = po.NumArgs(); + std::string matrix_in_fn1 = po.GetArg(1), + matrix_out_fn = po.GetArg(N); + + if (ClassifyWspecifier(matrix_out_fn, NULL, NULL, NULL) != kNoWspecifier) { + // output to table. + + // Output matrix + BaseFloatMatrixWriter matrix_writer(matrix_out_fn); + + // Input matrices + SequentialBaseFloatMatrixReader matrix_reader1(matrix_in_fn1); + std::vector + matrix_readers(N-2, + static_cast(NULL)); + std::vector matrix_in_fns(N-2); + for (int32 i = 2; i < N; ++i) { + matrix_readers[i-2] = new RandomAccessBaseFloatMatrixReader( + po.GetArg(i)); + matrix_in_fns[i-2] = po.GetArg(i); + } + int32 n_utts = 0, n_total_matrices = 0, + n_success = 0, n_missing = 0, n_other_errors = 0; + + for (; !matrix_reader1.Done(); matrix_reader1.Next()) { + std::string key = matrix_reader1.Key(); + Matrix matrix1 = matrix_reader1.Value(); + matrix_reader1.FreeCurrent(); + n_utts++; + n_total_matrices++; + + Matrix matrix_out(matrix1); + + int32 i = 0; + for (i = 0; i < N-2; ++i) { + bool failed = false; // Indicates failure for this key. + if (matrix_readers[i]->HasKey(key)) { + const Matrix &matrix2 = matrix_readers[i]->Value(key); + n_total_matrices++; + if (SameDim(matrix2, matrix_out)) { + matrix_out.MulElements(matrix2); + } else { + KALDI_WARN << "Dimension mismatch for utterance " << key + << " : " << matrix2.NumRows() << " by " + << matrix2.NumCols() << " for " + << "system " << (i + 2) << ", rspecifier: " + << matrix_in_fns[i] << " vs " << matrix_out.NumRows() + << " by " << matrix_out.NumCols() + << " primary matrix, rspecifier:" << matrix_in_fn1; + if (matrix2.NumRows() - matrix_out.NumRows() <= + length_tolerance) { + KALDI_WARN << "Tolerated length mismatch for key " << key; + matrix_out.MulElements(matrix2.Range(0, matrix_out.NumRows(), + 0, matrix2.NumCols())); + } else { + KALDI_WARN << "Skipping key " << key; + failed = true; + n_other_errors++; + } + } + } else { + KALDI_WARN << "No matrix found for utterance " << key << " for " + << "system " << (i + 2) << ", rspecifier: " + << matrix_in_fns[i]; + failed = true; + n_missing++; + } + + if (failed) break; + } + + if (i != N-2) // Skipping utterance + continue; + + matrix_writer.Write(key, matrix_out); + n_success++; + } + + KALDI_LOG << "Processed " << n_utts << " utterances: with a total of " + << n_total_matrices << " matrices across " << (N-1) + << " different systems."; + KALDI_LOG << "Produced output for " << n_success << " utterances; " + << n_missing << " total missing matrices and skipped " + << n_other_errors << "matrices."; + + DeletePointers(&matrix_readers); + + return (n_success != 0 && n_missing < (n_success - n_missing)) ? 0 : 1; + } else { + for (int32 i = 1; i < N; i++) { + if (ClassifyRspecifier(po.GetArg(i), NULL, NULL) != kNoRspecifier) { + KALDI_ERR << "Wrong usage: if last argument is not " + << "table, the other arguments must not be tables."; + } + } + + Matrix mat1; + ReadKaldiObject(po.GetArg(1), &mat1); + + for (int32 i = 2; i < N; i++) { + Matrix mat; + ReadKaldiObject(po.GetArg(i), &mat); + + mat1.MulElements(mat); + } + + WriteKaldiObject(mat1, po.GetArg(N), binary); + KALDI_LOG << "Multiplied " << (po.NumArgs() - 1) << " matrices; " + << "wrote product to " << PrintableWxfilename(po.GetArg(N)); + + return 0; + } + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/bin/matrix-sum-rows.cc b/src/bin/matrix-sum-rows.cc index 7e60483eef2..ee6504ba2b1 100644 --- a/src/bin/matrix-sum-rows.cc +++ b/src/bin/matrix-sum-rows.cc @@ -34,9 +34,13 @@ int main(int argc, char *argv[]) { "e.g.: matrix-sum-rows ark:- ark:- | vector-sum ark:- sum.vec\n" "See also: matrix-sum, vector-sum\n"; + bool do_average = false; ParseOptions po(usage); + po.Register("do-average", &do_average, + "Do average instead of sum"); + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -45,28 +49,28 @@ int main(int argc, char *argv[]) { } std::string rspecifier = po.GetArg(1); std::string wspecifier = po.GetArg(2); - + SequentialBaseFloatMatrixReader mat_reader(rspecifier); BaseFloatVectorWriter vec_writer(wspecifier); - + int32 num_done = 0; int64 num_rows_done = 0; - + for (; !mat_reader.Done(); mat_reader.Next()) { std::string key = mat_reader.Key(); Matrix mat(mat_reader.Value()); Vector vec(mat.NumCols()); - vec.AddRowSumMat(1.0, mat, 0.0); + vec.AddRowSumMat(!do_average ? 1.0 : 1.0 / mat.NumRows(), mat, 0.0); // Do the summation in double, to minimize roundoff. Vector float_vec(vec); vec_writer.Write(key, float_vec); num_done++; num_rows_done += mat.NumRows(); } - + KALDI_LOG << "Summed rows " << num_done << " matrices, " << num_rows_done << " rows in total."; - + return (num_done != 0 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); diff --git a/src/bin/phones-to-prons.cc b/src/bin/phones-to-prons.cc index f9b9291a90b..0d7ab12c232 100644 --- a/src/bin/phones-to-prons.cc +++ b/src/bin/phones-to-prons.cc @@ -80,7 +80,8 @@ int main(int argc, char *argv[]) { " \n" "e.g.: \n" " ali-to-phones 1.mdl ark:1.ali ark:- | \\\n" - " phones-to-prons L_align.fst 46 47 ark:- 1.tra ark:1.prons\n"; + " phones-to-prons L_align.fst 46 47 ark:- " + "'ark:sym2int.pl -f 2- words.txt text|' ark:1.prons\n"; ParseOptions po(usage); po.Read(argc, argv); @@ -170,11 +171,7 @@ int main(int argc, char *argv[]) { << "not reach end-state, or mismatched lexicon.)"; if (g_kaldi_verbose_level >= 2) { KALDI_LOG << "phn2word FST is below:"; -#ifdef HAVE_OPENFST_GE_10400 fst::FstPrinter fstprinter(phn2word, NULL, NULL, NULL, false, true, "\t"); -#else - fst::FstPrinter fstprinter(phn2word, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cerr, "standard error"); KALDI_LOG << "phone sequence is: "; for (size_t i = 0; i < phones.size(); i++) @@ -219,5 +216,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/bin/prons-to-wordali.cc b/src/bin/prons-to-wordali.cc index 8e89d7cc644..a6331043500 100644 --- a/src/bin/prons-to-wordali.cc +++ b/src/bin/prons-to-wordali.cc @@ -52,8 +52,8 @@ int main(int argc, char *argv[]) { " \n" "e.g.: \n" " ali-to-phones 1.mdl ark:1.ali ark:- | \\\n" - " phones-to-prons L_align.fst 46 47 ark:- 1.tra ark:- | \\\n" - " prons-to-wordali ark:- \\\n" + " phones-to-prons L_align.fst 46 47 ark:- 'ark:sym2int.pl -f 2- words.txt text|' \\\n" + " ark:- | prons-to-wordali ark:- \\\n" " \"ark:ali-to-phones --write-lengths 1.mdl ark:1.ali ark:-|\" ark:1.wali\n"; ParseOptions po(usage); diff --git a/src/bin/vector-scale.cc b/src/bin/vector-scale.cc index 60d4d3121d2..ea68ae31ad0 100644 --- a/src/bin/vector-scale.cc +++ b/src/bin/vector-scale.cc @@ -30,11 +30,14 @@ int main(int argc, char *argv[]) { const char *usage = "Scale a set of vectors in a Table (useful for speaker vectors and " "per-frame weights)\n" - "Usage: vector-scale [options] \n"; + "Usage: vector-scale [options] \n"; ParseOptions po(usage); BaseFloat scale = 1.0; + bool binary = false; + po.Register("binary", &binary, "If true, write output as binary " + "not relevant for archives"); po.Register("scale", &scale, "Scaling factor for vectors"); po.Read(argc, argv); @@ -43,17 +46,33 @@ int main(int argc, char *argv[]) { exit(1); } - std::string rspecifier = po.GetArg(1); - std::string wspecifier = po.GetArg(2); + std::string vector_in_fn = po.GetArg(1); + std::string vector_out_fn = po.GetArg(2); - BaseFloatVectorWriter vec_writer(wspecifier); - - SequentialBaseFloatVectorReader vec_reader(rspecifier); - for (; !vec_reader.Done(); vec_reader.Next()) { - Vector vec(vec_reader.Value()); + if (ClassifyWspecifier(vector_in_fn, NULL, NULL, NULL) != kNoWspecifier) { + if (ClassifyRspecifier(vector_in_fn, NULL, NULL) == kNoRspecifier) { + KALDI_ERR << "Cannot mix archives and regular files"; + } + BaseFloatVectorWriter vec_writer(vector_out_fn); + SequentialBaseFloatVectorReader vec_reader(vector_in_fn); + for (; !vec_reader.Done(); vec_reader.Next()) { + Vector vec(vec_reader.Value()); + vec.Scale(scale); + vec_writer.Write(vec_reader.Key(), vec); + } + } else { + if (ClassifyRspecifier(vector_in_fn, NULL, NULL) != kNoRspecifier) { + KALDI_ERR << "Cannot mix archives and regular files"; + } + bool binary_in; + Input ki(vector_in_fn, &binary_in); + Vector vec; + vec.Read(ki.Stream(), binary_in); vec.Scale(scale); - vec_writer.Write(vec_reader.Key(), vec); + Output ko(vector_out_fn, binary); + vec.Write(ko.Stream(), binary); } + return 0; } catch(const std::exception &e) { std::cerr << e.what(); diff --git a/src/bin/weight-matrix.cc b/src/bin/weight-matrix.cc new file mode 100644 index 00000000000..c6823b8da29 --- /dev/null +++ b/src/bin/weight-matrix.cc @@ -0,0 +1,84 @@ +// bin/weight-matrix.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + typedef kaldi::int32 int32; + + const char *usage = + "Takes archives (typically per-utterance) of features and " + "per-frame weights,\n" + "and weights the features by the per-frame weights\n" + "\n" + "Usage: weight-matrix " + "\n"; + + ParseOptions po(usage); + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string matrix_rspecifier = po.GetArg(1), + weights_rspecifier = po.GetArg(2), + matrix_wspecifier = po.GetArg(3); + + SequentialBaseFloatMatrixReader matrix_reader(matrix_rspecifier); + RandomAccessBaseFloatVectorReader weights_reader(weights_rspecifier); + BaseFloatMatrixWriter matrix_writer(matrix_wspecifier); + + int32 num_done = 0, num_err = 0; + + for (; !matrix_reader.Done(); matrix_reader.Next()) { + std::string key = matrix_reader.Key(); + Matrix mat = matrix_reader.Value(); + if (!weights_reader.HasKey(key)) { + KALDI_WARN << "No weight vectors for utterance " << key; + num_err++; + continue; + } + const Vector &weights = weights_reader.Value(key); + if (weights.Dim() != mat.NumRows()) { + KALDI_WARN << "Weights for utterance " << key + << " have wrong size, " << weights.Dim() + << " vs. " << mat.NumRows(); + num_err++; + continue; + } + mat.MulRowsVec(weights); + matrix_writer.Write(key, mat); + num_done++; + } + KALDI_LOG << "Applied per-frame weights for " << num_done + << " matrices; errors on " << num_err; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/bin/weight-pdf-post.cc b/src/bin/weight-pdf-post.cc new file mode 100644 index 00000000000..c7477a046c8 --- /dev/null +++ b/src/bin/weight-pdf-post.cc @@ -0,0 +1,154 @@ +// bin/weight-pdf-post.cc + +// Copyright 2015 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "gmm/am-diag-gmm.h" +#include "hmm/transition-model.h" +#include "hmm/hmm-utils.h" +#include "hmm/posterior.h" + +namespace kaldi { + +void WeightPdfPost(const ConstIntegerSet &pdf_set, + BaseFloat pdf_scale, + Posterior *post) { + for (size_t i = 0; i < post->size(); i++) { + std::vector > this_post; + this_post.reserve((*post)[i].size()); + for (size_t j = 0; j < (*post)[i].size(); j++) { + int32 pdf_id = (*post)[i][j].first; + BaseFloat weight = (*post)[i][j].second; + if (pdf_set.count(pdf_id) != 0) { // is a silence. + if (pdf_scale != 0.0) + this_post.push_back(std::make_pair(pdf_id, weight*pdf_scale)); + } else { + this_post.push_back(std::make_pair(pdf_id, weight)); + } + } + (*post)[i].swap(this_post); + } +} + +void WeightPdfPostDistributed(const ConstIntegerSet &pdf_set, + BaseFloat pdf_scale, + Posterior *post) { + for (size_t i = 0; i < post->size(); i++) { + std::vector > this_post; + this_post.reserve((*post)[i].size()); + BaseFloat sil_weight = 0.0, nonsil_weight = 0.0; + for (size_t j = 0; j < (*post)[i].size(); j++) { + int32 pdf_id = (*post)[i][j].first; + BaseFloat weight = (*post)[i][j].second; + if (pdf_set.count(pdf_id) != 0) + sil_weight += weight; + else + nonsil_weight += weight; + } + // This "distributed" weighting approach doesn't make sense if we have + // negative weights. + KALDI_ASSERT(sil_weight >= 0.0 && nonsil_weight >= 0.0); + if (sil_weight + nonsil_weight == 0.0) continue; + BaseFloat frame_scale = (sil_weight * pdf_scale + nonsil_weight) / + (sil_weight + nonsil_weight); + if (frame_scale != 0.0) { + for (size_t j = 0; j < (*post)[i].size(); j++) { + int32 pdf_id = (*post)[i][j].first; + BaseFloat weight = (*post)[i][j].second; + this_post.push_back(std::make_pair(pdf_id, weight * frame_scale)); + } + } + (*post)[i].swap(this_post); + } +} + +} // namespace kaldi + +int main(int argc, char *argv[]) { + using namespace kaldi; + typedef kaldi::int32 int32; + try { + const char *usage = + "Apply weight to specific pdfs or tids in posts\n" + "Usage: weight-pdf-post [options] " + " \n" + "e.g.:\n" + " weight-pdf-post 0.00001 0:2 ark:1.post ark:nosil.post\n"; + + ParseOptions po(usage); + + bool distribute = false; + + po.Register("distribute", &distribute, "If true, rather than weighting the " + "individual posteriors, apply the weighting to the " + "whole frame: " + "i.e. on time t, scale all posterior entries by " + "p(sil)*silence-weight + p(non-sil)*1.0"); + + po.Read(argc, argv); + + if (po.NumArgs() != 4) { + po.PrintUsage(); + exit(1); + } + + std::string pdf_weight_str = po.GetArg(1), + pdfs_str = po.GetArg(2), + posteriors_rspecifier = po.GetArg(3), + posteriors_wspecifier = po.GetArg(4); + + BaseFloat pdf_weight = 0.0; + if (!ConvertStringToReal(pdf_weight_str, &pdf_weight)) + KALDI_ERR << "Invalid pdf-weight parameter: expected float, got \"" + << pdf_weight << '"'; + std::vector pdfs; + if (!SplitStringToIntegers(pdfs_str, ":", false, &pdfs)) + KALDI_ERR << "Invalid pdf string string " << pdfs_str; + if (pdfs.empty()) + KALDI_WARN <<"No pdf specified, this will have no effect"; + ConstIntegerSet pdf_set(pdfs); // faster lookup. + + int32 num_posteriors = 0; + SequentialPosteriorReader posterior_reader(posteriors_rspecifier); + PosteriorWriter posterior_writer(posteriors_wspecifier); + + for (; !posterior_reader.Done(); posterior_reader.Next()) { + num_posteriors++; + // Posterior is vector > > + Posterior post = posterior_reader.Value(); + // Posterior is vector > > + if (distribute) + WeightPdfPostDistributed(pdf_set, + pdf_weight, &post); + else + WeightPdfPost(pdf_set, + pdf_weight, &post); + + posterior_writer.Write(posterior_reader.Key(), post); + } + KALDI_LOG << "Done " << num_posteriors << " posteriors."; + return (num_posteriors != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/bin/weight-post.cc b/src/bin/weight-post.cc index d536896eaaa..bbaad465195 100644 --- a/src/bin/weight-post.cc +++ b/src/bin/weight-post.cc @@ -26,32 +26,38 @@ int main(int argc, char *argv[]) { try { using namespace kaldi; - typedef kaldi::int32 int32; + typedef kaldi::int32 int32; + + int32 length_tolerance = 2; const char *usage = "Takes archives (typically per-utterance) of posteriors and per-frame weights,\n" "and weights the posteriors by the per-frame weights\n" "\n" "Usage: weight-post \n"; - + ParseOptions po(usage); + + po.Register("length-tolerance", &length_tolerance, + "Tolerate this many frames of length mismatch"); + po.Read(argc, argv); if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); } - + std::string post_rspecifier = po.GetArg(1), weights_rspecifier = po.GetArg(2), post_wspecifier = po.GetArg(3); SequentialPosteriorReader posterior_reader(post_rspecifier); RandomAccessBaseFloatVectorReader weights_reader(weights_rspecifier); - PosteriorWriter post_writer(post_wspecifier); - + PosteriorWriter post_writer(post_wspecifier); + int32 num_done = 0, num_err = 0; - + for (; !posterior_reader.Done(); posterior_reader.Next()) { std::string key = posterior_reader.Key(); Posterior post = posterior_reader.Value(); @@ -61,7 +67,8 @@ int main(int argc, char *argv[]) { continue; } const Vector &weights = weights_reader.Value(key); - if (weights.Dim() != static_cast(post.size())) { + if (std::abs(weights.Dim() - static_cast(post.size())) > + length_tolerance) { KALDI_WARN << "Weights for utterance " << key << " have wrong size, " << weights.Dim() << " vs. " << post.size(); @@ -71,7 +78,7 @@ int main(int argc, char *argv[]) { for (size_t i = 0; i < post.size(); i++) { if (weights(i) == 0.0) post[i].clear(); for (size_t j = 0; j < post[i].size(); j++) - post[i][j].second *= weights(i); + post[i][j].second *= i < weights.Dim() ? weights(i) : 0.0; } post_writer.Write(key, post); num_done++; diff --git a/src/chain/chain-den-graph.cc b/src/chain/chain-den-graph.cc index 6f494a0c562..5386f959b1f 100644 --- a/src/chain/chain-den-graph.cc +++ b/src/chain/chain-den-graph.cc @@ -186,7 +186,7 @@ void MinimizeAcceptorNoPush(fst::StdVectorFst *fst) { fst::EncodeMapper encoder(fst::kEncodeLabels | fst::kEncodeWeights, fst::ENCODE); fst::Encode(fst, &encoder); - fst::AcceptorMinimize(fst); + fst::internal::AcceptorMinimize(fst); fst::Decode(fst, encoder); } diff --git a/src/chain/chain-supervision-test.cc b/src/chain/chain-supervision-test.cc index 0f0a3009ccd..33d3c74e3a3 100644 --- a/src/chain/chain-supervision-test.cc +++ b/src/chain/chain-supervision-test.cc @@ -606,22 +606,22 @@ void TestRanges() { int main() { using namespace kaldi; - - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); else CuDevice::Instantiate().SelectGpuId("yes"); #endif - for (int32 i = 0; i < 5; i++) { + for (int32 i = 0; i < 3; i++) { kaldi::chain::ChainSupervisionTest(); kaldi::chain::BreadthFirstTest(); } kaldi::chain::TestRanges(); #if HAVE_CUDA == 1 - CuDevice::Instantiate().PrintProfile(); -#endif } + CuDevice::Instantiate().PrintProfile(); +#endif } diff --git a/src/chain/chain-supervision.cc b/src/chain/chain-supervision.cc index aad1320e0a0..b5597b15667 100644 --- a/src/chain/chain-supervision.cc +++ b/src/chain/chain-supervision.cc @@ -804,26 +804,5 @@ void GetWeightsForRanges(int32 range_length, } -void GetWeightsForRangesNew(int32 range_length, - int32 num_frames_zeroed, - const std::vector &range_starts, - std::vector > *weights) { - KALDI_ASSERT(range_length > 0 && num_frames_zeroed * 2 < range_length); - int32 num_ranges = range_starts.size(); - weights->resize(num_ranges); - for (int32 i = 0; i < num_ranges; i++) { - (*weights)[i].Resize(range_length); - (*weights)[i].Set(1.0); - } - if (num_frames_zeroed == 0) - return; - for (int32 i = 1; i < num_ranges; i++) - (*weights)[i].Range(0, num_frames_zeroed).Set(0.0); - for (int32 i = 0; i + 1 < num_ranges; i++) - (*weights)[i].Range(range_length - num_frames_zeroed, - num_frames_zeroed).Set(0.0); -} - - } // namespace chain } // namespace kaldi diff --git a/src/chain/chain-supervision.h b/src/chain/chain-supervision.h index 2dda8baf1e4..a94f68ade90 100644 --- a/src/chain/chain-supervision.h +++ b/src/chain/chain-supervision.h @@ -402,27 +402,6 @@ void GetWeightsForRanges(int32 range_length, std::vector > *weights); -/// This is a newer version of GetWeightsForRanges with a simpler behavior -/// than GetWeightsForRanges and a different purpose. Instead of aiming to -/// create weights that sum to one over the whole file, the purpose is to -/// zero out the derivative weights for a certain number of frames to each -/// side of every 'cut point' in the numerator lattice [by numerator lattice, -/// what I mean is the FST that we automatically generate from the numerator -/// alignment or lattice]. So we don't zero out the weights for the very -/// beginning or very end of each original utterance, just those where -/// we split the utterance into pieces. We believe there is an incentive -/// for the network to produce deletions near the edges, and this aims to fix -/// this problem. -/// range_length is the length of each range of times (so range_starts[0] -/// represents the start of a range of t values of length 'range_length' -/// and so range_starts[1] etc.), and num_frames_zeroed is the number of frames -/// on each side of the cut point on which we are supposed to zero out the -/// derivative. -void GetWeightsForRangesNew(int32 range_length, - int32 num_frames_zeroed, - const std::vector &range_starts, - std::vector > *weights); - typedef TableWriter > SupervisionWriter; typedef SequentialTableReader > SequentialSupervisionReader; diff --git a/src/chain/chain-training.cc b/src/chain/chain-training.cc index 1bf0201fbfa..53de69a0e07 100644 --- a/src/chain/chain-training.cc +++ b/src/chain/chain-training.cc @@ -30,7 +30,7 @@ void ComputeChainObjfAndDeriv(const ChainTrainingOptions &opts, const Supervision &supervision, const CuMatrixBase &nnet_output, BaseFloat *objf, - BaseFloat *l2_term, + BaseFloat *l2_term, BaseFloat *weight, CuMatrixBase *nnet_output_deriv, CuMatrixBase *xent_output_deriv) { @@ -86,7 +86,7 @@ void ComputeChainObjfAndDeriv(const ChainTrainingOptions &opts, // for different frames of the sequences. As expected, they are // smaller towards the edges of the sequences (due to the penalization // of 'incorrect' pdf-ids. - if (GetVerboseLevel() >= 1) { + if (GetVerboseLevel() >= 1 && nnet_output_deriv != NULL) { int32 tot_frames = nnet_output_deriv->NumRows(), frames_per_sequence = supervision.frames_per_sequence, num_sequences = supervision.num_sequences; diff --git a/src/chainbin/nnet3-chain-acc-lda-stats.cc b/src/chainbin/nnet3-chain-acc-lda-stats.cc index 3f092879b6e..b195f5ba1fb 100644 --- a/src/chainbin/nnet3-chain-acc-lda-stats.cc +++ b/src/chainbin/nnet3-chain-acc-lda-stats.cc @@ -54,7 +54,7 @@ class NnetChainLdaStatsAccumulator { NnetComputer computer(options, computation, nnet_, NULL); computer.AcceptInputs(nnet_, eg.inputs); - computer.Forward(); + computer.Run(); const CuMatrixBase &nnet_output = computer.GetOutput("output"); AccStatsFromOutput(eg, nnet_output); } @@ -202,5 +202,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/chainbin/nnet3-chain-compute-prob.cc b/src/chainbin/nnet3-chain-compute-prob.cc index 7f9d688777a..830f1e8cee4 100644 --- a/src/chainbin/nnet3-chain-compute-prob.cc +++ b/src/chainbin/nnet3-chain-compute-prob.cc @@ -84,5 +84,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/chainbin/nnet3-chain-copy-egs.cc b/src/chainbin/nnet3-chain-copy-egs.cc index b0c963595a1..fddaa6c9952 100644 --- a/src/chainbin/nnet3-chain-copy-egs.cc +++ b/src/chainbin/nnet3-chain-copy-egs.cc @@ -201,6 +201,7 @@ void ModifyChainExampleContext(const NnetChainExample &eg, int32 right_context, const int32 frame_subsampling_factor, NnetChainExample *eg_out) { + static bool warned_left = false, warned_right = false; int32 min_input_t, max_input_t, min_output_t, max_output_t; if (!ContainsSingleExample(eg, &min_input_t, &max_input_t, @@ -208,19 +209,31 @@ void ModifyChainExampleContext(const NnetChainExample &eg, KALDI_ERR << "Too late to perform frame selection/context reduction on " << "these examples (already merged?)"; if (left_context != -1) { - if (min_input_t > min_output_t - left_context) - KALDI_ERR << "You requested --left-context=" << left_context - << ", but example only has left-context of " - << (min_output_t - min_input_t); + int32 observed_left_context = min_output_t - min_input_t; + if (!warned_left && observed_left_context < left_context) { + warned_left = true; + KALDI_WARN << "You requested --left-context=" << left_context + << ", but example only has left-context of " + << observed_left_context + << " (will warn only once; this may be harmless if " + "using any --*left-context-initial options)"; + } min_input_t = std::max(min_input_t, min_output_t - left_context); } if (right_context != -1) { - if (max_input_t < max_output_t + right_context + frame_subsampling_factor - 1) - KALDI_ERR << "You requested --right-context=" << right_context - << ", but example only has right-context of " - << (max_input_t - max_output_t - frame_subsampling_factor + 1); - max_input_t = std::min(max_input_t, max_output_t + right_context - + frame_subsampling_factor - 1); + int32 observed_right_context = max_input_t - max_output_t; + + if (right_context != -1) { + if (!warned_right && observed_right_context < right_context) { + warned_right = true; + KALDI_ERR << "You requested --right-context=" << right_context + << ", but example only has right-context of " + << observed_right_context + << " (will warn only once; this may be harmless if " + "using any --*right-context-final options."; + } + max_input_t = std::min(max_input_t, max_output_t + right_context); + } } FilterExample(eg, min_input_t, max_input_t, @@ -252,7 +265,6 @@ int main(int argc, char *argv[]) { bool random = false; int32 srand_seed = 0; int32 frame_shift = 0; - int32 truncate_deriv_weights = 0; int32 frame_subsampling_factor = -1; BaseFloat keep_proportion = 1.0; int32 left_context = -1, right_context = -1; @@ -269,9 +281,6 @@ int main(int argc, char *argv[]) { "in the supervision data (excluding iVector data) - useful in " "augmenting data. Note, the outputs will remain at the closest " "exact multiples of the frame subsampling factor"); - po.Register("truncate-deriv-weights", &truncate_deriv_weights, - "If nonzero, the number of initial/final subsample frames that " - "will have their derivatives' weights set to zero."); po.Register("left-context", &left_context, "Can be used to truncate the " "feature left-context that we output."); po.Register("right-context", &right_context, "Can be used to truncate the " @@ -307,7 +316,7 @@ int main(int argc, char *argv[]) { // count is normally 1; could be 0, or possibly >1. int32 count = GetCount(keep_proportion); std::string key = example_reader.Key(); - if (frame_shift == 0 && truncate_deriv_weights == 0 && + if (frame_shift == 0 && left_context == -1 && right_context == -1) { const NnetChainExample &eg = example_reader.Value(); for (int32 c = 0; c < count; c++) { @@ -325,8 +334,6 @@ int main(int argc, char *argv[]) { frame_subsampling_factor, &eg_out); else eg_out.Swap(&eg); - if (truncate_deriv_weights != 0) - TruncateDerivWeights(truncate_deriv_weights, &eg_out); for (int32 c = 0; c < count; c++) { int32 index = (random ? Rand() : num_written) % num_outputs; example_writers[index]->Write(key, eg_out); diff --git a/src/chainbin/nnet3-chain-get-egs.cc b/src/chainbin/nnet3-chain-get-egs.cc index cc463d179da..bf1e87d2452 100644 --- a/src/chainbin/nnet3-chain-get-egs.cc +++ b/src/chainbin/nnet3-chain-get-egs.cc @@ -41,155 +41,103 @@ namespace nnet3 { static bool ProcessFile(const fst::StdVectorFst &normalization_fst, const MatrixBase &feats, const MatrixBase *ivector_feats, + int32 ivector_period, const chain::Supervision &supervision, const std::string &utt_id, bool compress, - int32 left_context, - int32 right_context, - int32 frames_per_eg, - int32 frames_overlap_per_eg, - int32 frame_subsampling_factor, - int32 cut_zero_frames, - int64 *num_frames_written, - int64 *num_egs_written, + UtteranceSplitter *utt_splitter, NnetChainExampleWriter *example_writer) { KALDI_ASSERT(supervision.num_sequences == 1); - int32 num_feature_frames = feats.NumRows(), - num_output_frames = supervision.frames_per_sequence, - num_feature_frames_subsampled = - (num_feature_frames + frame_subsampling_factor - 1)/ - frame_subsampling_factor; - if (num_output_frames != num_feature_frames_subsampled) { - // we tolerate deviations in the num-frames if they are very small (1 output - // frame). - - if (abs(num_output_frames - num_feature_frames_subsampled) > 1) { - KALDI_ERR << "Mismatch in num-frames: chain supervision has " - << num_output_frames - << " versus features/frame_subsampling_factor = " - << num_feature_frames << " / " << frame_subsampling_factor - << " = " << num_feature_frames_subsampled - << ": check that --frame-subsampling-factor option is set " - << "the same as to chain-get-supervision."; - } - int32 new_num_feature_frames = - num_output_frames * frame_subsampling_factor; - // add a few frames at the end to make it match up. - Matrix feats_new(new_num_feature_frames, feats.NumCols(), - kUndefined); - int32 min_feature_frames = std::min(num_feature_frames, - new_num_feature_frames); - feats_new.RowRange(0, min_feature_frames).CopyFromMat( - feats.RowRange(0, min_feature_frames)); - for (int32 i = num_feature_frames; i < new_num_feature_frames; i++) - feats_new.Row(i).CopyFromVec(feats.Row(num_feature_frames - 1)); - return ProcessFile(normalization_fst, feats_new, ivector_feats, - supervision, utt_id, compress, left_context, right_context, - frames_per_eg, frames_overlap_per_eg, frame_subsampling_factor, - cut_zero_frames, num_frames_written, num_egs_written, - example_writer); - } + int32 num_input_frames = feats.NumRows(), + num_output_frames = supervision.frames_per_sequence; - KALDI_ASSERT(frames_per_eg % frame_subsampling_factor == 0); + if (!utt_splitter->LengthsMatch(utt_id, num_input_frames, num_output_frames)) + return false; // LengthsMatch() will have printed a warning. - int32 frames_per_eg_subsampled = frames_per_eg / frame_subsampling_factor, - frames_overlap_subsampled = frames_overlap_per_eg / frame_subsampling_factor, - frames_shift_subsampled = frames_per_eg_subsampled - frames_overlap_subsampled; + std::vector chunks; - if (num_feature_frames_subsampled < frames_per_eg_subsampled) { - KALDI_WARN << "Length of features for utterance " << utt_id - << " is less than than the frames_per_eg (after sub-sampling)."; - return false; - } + utt_splitter->GetChunksForUtterance(num_input_frames, &chunks); - // we don't do any padding, as it would be a bit tricky to pad the 'chain' supervision. - // Instead we select ranges of frames that fully fit within the file; these - // might slightly overlap with each other or have gaps. - std::vector range_starts_subsampled; - chain::SplitIntoRanges(num_feature_frames_subsampled - - frames_overlap_subsampled, - frames_shift_subsampled, - &range_starts_subsampled); - // The 'deriv_weights' make sure we don't count frames twice, and also ensure - // that we tend to avoid having nonzero weights on the derivatives that are - // too close to the edge of the corresponding 'range' (these derivatives close - // to the edge are not as accurate as they could be, because when we split we - // don't know the correct alphas and betas). - std::vector > deriv_weights; - if (cut_zero_frames >= 0) - chain::GetWeightsForRangesNew(frames_per_eg_subsampled, - cut_zero_frames / frame_subsampling_factor, - range_starts_subsampled, - &deriv_weights); - else - chain::GetWeightsForRanges(frames_per_eg_subsampled, - range_starts_subsampled, - &deriv_weights); - - if (range_starts_subsampled.empty()) { - KALDI_WARN << "No output for utterance " << utt_id - << " (num-frames=" << num_feature_frames - << ") because too short for --frames-per-eg=" - << frames_per_eg; + if (chunks.empty()) { + KALDI_WARN << "Not producing egs for utterance " << utt_id + << " because it is too short: " + << num_input_frames << " frames."; return false; } - chain::SupervisionSplitter splitter(supervision); - for (size_t i = 0; i < range_starts_subsampled.size(); i++) { - int32 range_start_subsampled = range_starts_subsampled[i], - range_start = range_start_subsampled * frame_subsampling_factor; + int32 frame_subsampling_factor = utt_splitter->Config().frame_subsampling_factor; + + chain::SupervisionSplitter sup_splitter(supervision); + + for (size_t c = 0; c < chunks.size(); c++) { + ChunkTimeInfo &chunk = chunks[c]; + + int32 start_frame_subsampled = chunk.first_frame / frame_subsampling_factor, + num_frames_subsampled = chunk.num_frames / frame_subsampling_factor; chain::Supervision supervision_part; - splitter.GetFrameRange(range_start_subsampled, - frames_per_eg_subsampled, - &supervision_part); + sup_splitter.GetFrameRange(start_frame_subsampled, + num_frames_subsampled, + &supervision_part); if (normalization_fst.NumStates() > 0 && !AddWeightToSupervisionFst(normalization_fst, &supervision_part)) { - KALDI_WARN << "For utterance " << utt_id << ", frames " - << range_start << " to " << (range_start + frames_per_eg) + KALDI_WARN << "For utterance " << utt_id << ", feature frames " + << chunk.first_frame << " to " + << (chunk.first_frame + chunk.num_frames) << ", FST was empty after composing with normalization FST. " << "This should be extremely rare (a few per corpus, at most)"; - return false; } int32 first_frame = 0; // we shift the time-indexes of all these parts so // that the supervised part starts from frame 0. + + SubVector output_weights( + &(chunk.output_weights[0]), + static_cast(chunk.output_weights.size())); + NnetChainSupervision nnet_supervision("output", supervision_part, - deriv_weights[i], - first_frame, frame_subsampling_factor); + output_weights, + first_frame, + frame_subsampling_factor); NnetChainExample nnet_chain_eg; nnet_chain_eg.outputs.resize(1); nnet_chain_eg.outputs[0].Swap(&nnet_supervision); nnet_chain_eg.inputs.resize(ivector_feats != NULL ? 2 : 1); - int32 tot_frames = left_context + frames_per_eg + right_context; - Matrix input_frames(tot_frames, feats.NumCols(), kUndefined); + int32 tot_input_frames = chunk.left_context + chunk.num_frames + + chunk.right_context; - // Set up "input_frames". - for (int32 j = -left_context; j < frames_per_eg + right_context; j++) { - int32 t = range_start + j; - if (t < 0) t = 0; - if (t >= feats.NumRows()) t = feats.NumRows() - 1; - SubVector src(feats, t), - dest(input_frames, j + left_context); + Matrix input_frames(tot_input_frames, feats.NumCols(), + kUndefined); + + int32 start_frame = chunk.first_frame - chunk.left_context; + for (int32 t = start_frame; t < start_frame + tot_input_frames; t++) { + int32 t2 = t; + if (t2 < 0) t2 = 0; + if (t2 >= num_input_frames) t2 = num_input_frames - 1; + int32 j = t - start_frame; + SubVector src(feats, t2), + dest(input_frames, j); dest.CopyFromVec(src); } - NnetIo input_io("input", - left_context, - input_frames); + NnetIo input_io("input", -chunk.left_context, input_frames); nnet_chain_eg.inputs[0].Swap(&input_io); if (ivector_feats != NULL) { // if applicable, add the iVector feature. // choose iVector from a random frame in the chunk - int32 ivector_frame = RandInt(range_start, range_start + frames_per_eg - 1); - KALDI_ASSERT(ivector_feats->NumRows() > 0); - if (ivector_frame >= ivector_feats->NumRows()) - ivector_frame = ivector_feats->NumRows() - 1; + int32 ivector_frame = RandInt(start_frame, + start_frame + num_input_frames - 1), + ivector_frame_subsampled = ivector_frame / ivector_period; + if (ivector_frame_subsampled < 0) + ivector_frame_subsampled = 0; + if (ivector_frame_subsampled >= ivector_feats->NumRows()) + ivector_frame_subsampled = ivector_feats->NumRows() - 1; Matrix ivector(1, ivector_feats->NumCols()); - ivector.Row(0).CopyFromVec(ivector_feats->Row(ivector_frame)); + ivector.Row(0).CopyFromVec(ivector_feats->Row(ivector_frame_subsampled)); NnetIo ivector_io("ivector", 0, ivector); nnet_chain_eg.inputs[1].Swap(&ivector_io); } @@ -198,13 +146,10 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, nnet_chain_eg.Compress(); std::ostringstream os; - os << utt_id << "-" << range_start; + os << utt_id << "-" << chunk.first_frame; std::string key = os.str(); // key is - - *num_frames_written += frames_per_eg; - *num_egs_written += 1; - example_writer->Write(key, nnet_chain_eg); } return true; @@ -239,44 +184,31 @@ int main(int argc, char *argv[]) { "chain-get-supervision.\n"; bool compress = true; - int32 left_context = 0, right_context = 0, num_frames = 1, - num_frames_overlap = 0, length_tolerance = 100, - cut_zero_frames = -1, - frame_subsampling_factor = 1; + int32 length_tolerance = 100, online_ivector_period = 1; + + ExampleGenerationConfig eg_config; // controls num-frames, + // left/right-context, etc. int32 srand_seed = 0; - std::string ivector_rspecifier; + std::string online_ivector_rspecifier; ParseOptions po(usage); po.Register("compress", &compress, "If true, write egs in " - "compressed format (recommended)"); - po.Register("cut-zero-frames", &cut_zero_frames, "Number of frames " - "(measured before subsampling) to zero the derivative on each " - "side of a cut point (if set, activates new-style derivative " - "weights)"); - po.Register("left-context", &left_context, "Number of frames of left " - "context the neural net requires."); - po.Register("right-context", &right_context, "Number of frames of right " - "context the neural net requires."); - po.Register("num-frames", &num_frames, "Number of frames with labels " - "that each example contains. Will be rounded up to a multiple " - "of --frame-subsampling-factor."); - po.Register("num-frames-overlap", &num_frames_overlap, "Number of frames of " - "overlap between each example (could be useful in conjunction " - "--min-deriv-time and --max-deriv-time, to avoid wasting data). " - "Each time we shift by --num-frames minus --num-frames-overlap."); - po.Register("ivectors", &ivector_rspecifier, "Rspecifier of ivector " - "features, as a matrix."); - po.Register("srand", &srand_seed, "Seed for random number generator " - "(only relevant if --pick-random-ivector=true)"); + "compressed format."); + po.Register("ivectors", &online_ivector_rspecifier, "Alias for " + "--online-ivectors option, for back compatibility"); + po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier of " + "ivector features, as a matrix."); + po.Register("online-ivector-period", &online_ivector_period, "Number of " + "frames between iVectors in matrices supplied to the " + "--online-ivectors option"); + po.Register("srand", &srand_seed, "Seed for random number generator "); po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); - po.Register("frame-subsampling-factor", &frame_subsampling_factor, "Used " - "if the frame-rate at the output will be less than the " - "frame-rate of the input"); + eg_config.Register(&po); po.Read(argc, argv); - + srand(srand_seed); if (po.NumArgs() < 3 || po.NumArgs() > 4) { @@ -284,12 +216,6 @@ int main(int argc, char *argv[]) { exit(1); } - if (num_frames <= 0 || left_context < 0 || right_context < 0 || - length_tolerance < 0 || frame_subsampling_factor <= 0) - KALDI_ERR << "One of the integer options is out of the allowed range."; - RoundUpNumFrames(frame_subsampling_factor, - &num_frames, &num_frames_overlap); - std::string normalization_fst_rxfilename, feature_rspecifier, @@ -307,6 +233,9 @@ int main(int argc, char *argv[]) { examples_wspecifier = po.GetArg(4); } + eg_config.ComputeDerived(); + UtteranceSplitter utt_splitter(eg_config); + fst::StdVectorFst normalization_fst; if (!normalization_fst_rxfilename.empty()) { ReadFstKaldi(normalization_fst_rxfilename, &normalization_fst); @@ -317,10 +246,10 @@ int main(int argc, char *argv[]) { chain::RandomAccessSupervisionReader supervision_reader( supervision_rspecifier); NnetChainExampleWriter example_writer(examples_wspecifier); - RandomAccessBaseFloatMatrixReader ivector_reader(ivector_rspecifier); + RandomAccessBaseFloatMatrixReader online_ivector_reader( + online_ivector_rspecifier); - int32 num_done = 0, num_err = 0; - int64 num_frames_written = 0, num_egs_written = 0; + int32 num_err = 0; for (; !feat_reader.Done(); feat_reader.Next()) { std::string key = feat_reader.Key(); @@ -330,45 +259,41 @@ int main(int argc, char *argv[]) { num_err++; } else { const chain::Supervision &supervision = supervision_reader.Value(key); - const Matrix *ivector_feats = NULL; - if (!ivector_rspecifier.empty()) { - if (!ivector_reader.HasKey(key)) { + const Matrix *online_ivector_feats = NULL; + if (!online_ivector_rspecifier.empty()) { + if (!online_ivector_reader.HasKey(key)) { KALDI_WARN << "No iVectors for utterance " << key; num_err++; continue; } else { // this address will be valid until we call HasKey() or Value() // again. - ivector_feats = &(ivector_reader.Value(key)); + online_ivector_feats = &(online_ivector_reader.Value(key)); } } - if (ivector_feats != NULL && - (abs(feats.NumRows() - ivector_feats->NumRows()) > length_tolerance - || ivector_feats->NumRows() == 0)) { + if (online_ivector_feats != NULL && + (abs(feats.NumRows() - (online_ivector_feats->NumRows() * + online_ivector_period)) > length_tolerance + || online_ivector_feats->NumRows() == 0)) { KALDI_WARN << "Length difference between feats " << feats.NumRows() - << " and iVectors " << ivector_feats->NumRows() - << " exceeds tolerance " << length_tolerance; + << " and iVectors " << online_ivector_feats->NumRows() + << "exceeds tolerance " << length_tolerance; num_err++; continue; } - if (ProcessFile(normalization_fst, feats, ivector_feats, supervision, - key, compress, - left_context, right_context, num_frames, - num_frames_overlap, frame_subsampling_factor, - cut_zero_frames, &num_frames_written, &num_egs_written, - &example_writer)) - num_done++; - else + + if (!ProcessFile(normalization_fst, feats, + online_ivector_feats, online_ivector_period, + supervision, key, compress, + &utt_splitter, &example_writer)) num_err++; } } - - KALDI_LOG << "Finished generating nnet3-chain examples, " - << "successfully processed " << num_done - << " feature files, wrote " << num_egs_written << " examples, " - << " with " << num_frames_written << " frames in total; " - << num_err << " files had errors."; - return (num_egs_written == 0 || num_err > num_done ? 1 : 0); + if (num_err > 0) + KALDI_WARN << num_err << " utterances had errors and could " + "not be processed."; + // utt_splitter prints stats in its destructor. + return utt_splitter.ExitStatus(); } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; diff --git a/src/chainbin/nnet3-chain-merge-egs.cc b/src/chainbin/nnet3-chain-merge-egs.cc index 45dca4051f3..a3686d2fc30 100644 --- a/src/chainbin/nnet3-chain-merge-egs.cc +++ b/src/chainbin/nnet3-chain-merge-egs.cc @@ -41,14 +41,11 @@ int main(int argc, char *argv[]) { "nnet3-chain-merge-egs --minibatch-size=128 ark:1.cegs ark:- | nnet3-chain-train-simple ... \n" "See also nnet3-chain-copy-egs\n"; - bool compress = false; - int32 minibatch_size = 64; + + ExampleMergingConfig merging_config("64"); // 64 is default minibatch size. ParseOptions po(usage); - po.Register("minibatch-size", &minibatch_size, "Target size of minibatches " - "when merging (see also --measure-output-frames)"); - po.Register("compress", &compress, "If true, compress the output examples " - "(not recommended unless you are writing to disk"); + merging_config.Register(&po); po.Read(argc, argv); @@ -63,39 +60,17 @@ int main(int argc, char *argv[]) { SequentialNnetChainExampleReader example_reader(examples_rspecifier); NnetChainExampleWriter example_writer(examples_wspecifier); - std::vector examples; - examples.reserve(minibatch_size); - - int64 num_read = 0, num_written = 0; - while (!example_reader.Done()) { + merging_config.ComputeDerived(); + ChainExampleMerger merger(merging_config, &example_writer); + for (; !example_reader.Done(); example_reader.Next()) { const NnetChainExample &cur_eg = example_reader.Value(); - examples.resize(examples.size() + 1); - examples.back() = cur_eg; - - bool minibatch_ready = - static_cast(examples.size()) >= minibatch_size; - - // Do Next() now, so we can test example_reader.Done() below . - example_reader.Next(); - num_read++; - - if (minibatch_ready || (example_reader.Done() && !examples.empty())) { - NnetChainExample merged_eg; - MergeChainExamples(compress, &examples, &merged_eg); - std::ostringstream ostr; - ostr << "merged-" << num_written; - num_written++; - std::string output_key = ostr.str(); - example_writer.Write(output_key, merged_eg); - examples.clear(); - } + merger.AcceptExample(new NnetChainExample(cur_eg)); } - KALDI_LOG << "Merged " << num_read << " egs to " << num_written << '.'; - return (num_written != 0 ? 0 : 1); + // the merger itself prints the necessary diagnostics. + merger.Finish(); + return merger.ExitStatus(); } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; } } - - diff --git a/src/configure b/src/configure index d4122f1808e..4bfe6bc8470 100755 --- a/src/configure +++ b/src/configure @@ -1,11 +1,11 @@ #!/bin/bash -# + # This configure script is hand-generated, not auto-generated. # It creates the file kaldi.mk, which is %included by the Makefiles # in the subdirectories. -# The file kaldi.mk is editable by hand-- for example, you may want to +# The file kaldi.mk is editable by hand -- for example, you may want to # remove the options -g -O0 -DKALDI_PARANOID, or edit the -# -DKALDI_DOUBLE_PRECISION option (to be 1 not 0), +# DOUBLE_PRECISION variable (to be 1 not 0). # Example command lines: @@ -15,17 +15,94 @@ # ./configure --mkl-root=/opt/intel/mkl --threaded-math=yes # ./configure --mkl-root=/opt/intel/mkl --threaded-math=yes --mkl-threading=tbb # # This is for MKL 11.3, which does not seem to provide Intel OMP libs -# ./configure --openblas-root=../tools/OpenBLAS/install # before doing -# # this, cd to ../tools and type "make openblas". Note: -# # this is not working correctly on all platforms, do "make test" +# ./configure --openblas-root=../tools/OpenBLAS/install +# # Before doing this, cd to ../tools and type "make openblas". +# # Note: this is not working correctly on all platforms, do "make test" # # and look out for segmentation faults. # ./configure --atlas-root=../tools/ATLAS/build # ./configure --use-cuda=no # disable CUDA detection (will build cpu-only # # version of kaldi even on CUDA-enabled machine +# ./configure --static --fst-root=/opt/cross/armv8hf \ +# --atlas-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf +# # Cross compile for armv8hf, this assumes that you have openfst built +# # with the armv8-rpi3-linux-gnueabihf toolchain and installed to +# # /opt/cross/armv8hf. It also assumes that you have an ATLAS library +# # built for the target install to /opt/cross/armv8hf and that the +# # armv8-rpi3-linux-gnueabihf toolchain is available in your path +# ./configure --static --openblas-root=/opt/cross/arm-linux-androideabi \ +# --fst-root=/opt/cross/arm-linux-androideabi --fst-version=1.4.1 \ +# --android-incdir=/opt/cross/arm-linux-androideabi/sysroot/usr/include \ +# --host=arm-linux-androideabi +# # Cross compile for Android on arm. The only difference here is the +# # addition of the the --android-includes flag because the toolchains +# # produced by the Android NDK don't always include the C++ stdlib +# # headers in the normal cross compile include path. + +# This should be incremented after any significant change to the configure +# script, i.e. any change affecting kaldi.mk or the build system as a whole. +CONFIGURE_VERSION=6 + +if ! [ -x "$PWD/configure" ]; then + echo 'You must run "configure" from the src/ directory.' + exit 1 +fi -#This should be incremented after every significant change of the configure script -#I.e. after each change that affects the kaldi.mk or the build system as whole -CONFIGURE_VERSION=4 +function usage { + cat < + LDFLAGS Additional linker flags, e.g. -L + LDLIBS Additional libraries to pass to the linker, e.g. -l + +EOF +} function rel2abs { if [ ! -z "$1" ]; then @@ -50,149 +127,6 @@ function is_set { fi } - - -## First do some checks. These verify that all the things are -## here that should be here. -if ! [ -x "$PWD/configure" ]; then - echo 'You must run "configure" from the src/ directory.' - exit 1 -fi - -## Default locations for FST and linear algebra libraries. -MATHLIB='ATLAS' -ATLASROOT=`rel2abs ../tools/ATLAS/` -FSTROOT=`rel2abs ../tools/openfst` - -# Avoid using any variables that are set in the shell. -unset MKLROOT -unset CLAPACKROOT -unset OPENBLASROOT -unset MKLLIBDIR - -function usage { - echo 'Usage: ./configure [--static|--shared] [--threaded-atlas={yes|no}] [--atlas-root=ATLASROOT] [--fst-root=FSTROOT] - [--openblas-root=OPENBLASROOOT] [--clapack-root=CLAPACKROOT] [--mkl-root=MKLROOT] [--mkl-libdir=MKLLIBDIR] - [--omp-libdir=OMPDIR] [--static-fst={yes|no}] [--static-math={yes|no}] [--threaded-math={yes|no}] [--mathlib=ATLAS|MKL|CLAPACK|OPENBLAS] - [--use-cuda={yes|no}] [--cudatk-dir=CUDATKDIR][--mkl-threading=sequential|iomp|tbb|gomp]'; -} - -threaded_atlas=false # By default, use the un-threaded version of ATLAS. -threaded_math=${threaded_atlas} -static_math=false -static_fst=false -use_cuda=true -dynamic_kaldi=false -mkl_threading=sequential - -cmd_line="$0 $@" # Save the command line to include in kaldi.mk - -while [ $# -gt 0 ]; -do - case "$1" in - --help) - usage; exit 0 ;; - --version) - echo $CONFIGURE_VERSION; exit 0 ;; - --static) - dynamic_kaldi=false; - static_math=true; - static_fst=true; - shift ;; - --shared) - dynamic_kaldi=true; - static_math=false; - static_fst=false; - shift ;; - --atlas-root=*) - ATLASROOT=`read_dirname $1`; - shift ;; - --threaded-atlas=yes) - threaded_atlas=true; - shift ;; - --threaded-atlas=no) - threaded_atlas=false; - shift ;; - --threaded-math=yes) - threaded_atlas=true; - threaded_math=true; - mkl_threading=iomp - shift ;; - --threaded-math=no) - threaded_atlas=false; - threaded_math=false; - mkl_threading=sequential - shift ;; - --use-cuda=yes) - use_cuda=true; - shift ;; - --use-cuda=no) - use_cuda=false; - shift ;; - --static-math=yes) - static_math=true; - shift ;; - --static-math=no) - static_math=false; - shift ;; - --static-fst=yes) - static_fst=true; - shift ;; - --static-fst=no) - static_fst=false; - shift ;; - --mkl-threading=sequential) - threaded_atlas=false; - threaded_math=false; - mkl_threading=sequential; - shift ;; - --mkl-threading=*) - mkl_threading=`expr "X$1" : '[^=]*=\(.*\)'`; - threaded_atlas=true; - threaded_math=true; - shift ;; - --fst-root=*) - FSTROOT=`read_dirname $1`; - shift ;; - --clapack-root=*) - CLAPACKROOT=`read_dirname $1`; - shift ;; - --openblas-root=*) - OPENBLASROOT=`read_dirname $1`; - shift ;; - --mkl-root=*) - MKLROOT=`read_dirname $1`; - shift ;; - --mkl-libdir=*) - MKLLIBDIR=`read_dirname $1`; - shift ;; - --speex-root=*) - SPEEXROOT=`read_dirname $1`; - shift ;; - --speex-libdir=*) - SPEEXLIBDIR=`read_dirname $1`; - shift ;; - --speex-includedir=*) - SPEEXINCLUDEDIR=`read_dirname $1`; - shift ;; - --omp-libdir=*) - OMPLIBDIR=`read_dirname $1`; - shift ;; - --mathlib=*) - MATHLIB=`expr "X$1" : '[^=]*=\(.*\)'`; - shift ;; - --cudatk-dir=*) - CUDATKDIR=`read_dirname $1`; - shift ;; #CUDA is used in src/cudamatrix and src/nnet{,bin} only - *) echo "Unknown argument: $1, exiting"; usage; exit 1 ;; - esac -done - -# the idea here is that if you change the configuration options from using -# CUDA to not using it, or vice versa, we want to recompile all parts of the -# code that may use a GPU. Touching this file is a way to force this. -touch cudamatrix/cu-common.h 2>/dev/null - function failure { echo "***configure failed: $* ***" >&2 if [ -f kaldi.mk ]; then rm kaldi.mk; fi @@ -203,43 +137,6 @@ function check_exists { if [ ! -f $1 ]; then failure "$1 not found."; fi } -function check_for_bad_gcc { - if which gcc >&/dev/null; then # gcc is on the path - gcc_version=$(gcc -dumpspecs 2>&1 | grep -A1 -F '*version:' | grep -v version) - if [ "$gcc_version" == "4.8.2" ] || [ "$gcc_version" == "4.8.1" ]; then - echo "*** WARNING: your version of gcc seems to be 4.8.1 or 4.8.2. ***" - echo "*** These versions of gcc has a bug in nth_element ***" - echo "*** in its implementation of the standard library ***" - echo "*** This will cause Kaldi to crash (make test ***" - echo "*** would fail). Please either upgrade or downgrade gcc. ***" - exit 1 - fi - fi -} - -function check_for_slow_expf { - cd probe - rm -f exp-test - make -f Makefile.slow_expf 1>/dev/null - ./exp-test - if [ $? -eq 1 ]; then - echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" - echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" - echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk - fi - cd .. -} - - -function exit_success { - check_for_bad_gcc; - check_for_slow_expf; - echo "SUCCESS" - exit 0; -} - - - function check_library { local libpath=$1 local libname=$2 @@ -250,16 +147,62 @@ function check_library { return 1 } +function check_compiler { + COMPILER=$1 + if ! which $COMPILER >&/dev/null; then + failure "$COMPILER is not installed. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + else + COMPILER_VER_INFO=$($COMPILER --version 2>/dev/null) + if [[ $COMPILER_VER_INFO == *"g++"* ]]; then + GCC_VER=$($COMPILER -dumpversion) + GCC_VER_NUM=$(echo $GCC_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") + if [ $GCC_VER_NUM -lt 40700 ]; then + failure "$COMPILER (g++-$GCC_VER) is not supported. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + elif [ $GCC_VER_NUM == 40801 ] || [ $GCC_VER_NUM == 40802 ]; then + failure "$COMPILER (g++-$GCC_VER) is not supported. + GCC 4.8.1 and 4.8.2 have a bug in the implementation of + the nth_element algorithm provided by the standard library. + This will cause Kaldi to crash (make test would fail). + Please use another C++ compiler with C++11 support. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + fi + elif [[ $COMPILER_VER_INFO == *"Apple"* ]]; then + CLANG_VER=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") + CLANG_VER_NUM=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*clang-\([0-9]*\).*/\1/") + if [ $CLANG_VER_NUM -lt 500 ]; then + failure "$COMPILER (Apple clang-$CLANG_VER) is not supported. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + fi + elif [[ $COMPILER_VER_INFO == *"LLVM"* ]]; then + CLANG_VER=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") + CLANG_VER_NUM=$(echo $CLANG_VER | sed 's/\./ /g' | xargs printf "%d%02d") + if [ $CLANG_VER_NUM -lt 303 ]; then + failure "$COMPILER (LLVM clang-$CLANG_VER) is not supported. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + fi + fi + fi +} +function check_for_slow_expf { + # We cannot run this test if we are cross compiling. + if [[ "$TARGET_ARCH" == "`uname -m`" ]] ; then + cd probe + rm -f exp-test + make -f Makefile.slow_expf 1>/dev/null + ./exp-test + if [ $? -eq 1 ]; then + echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" + echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" + echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk + fi + cd .. + fi +} -#Check if at least one of these variables is set -#If yes, we want to switch to using the MKL -is_set $MKLLIBDIR && echo "Force-configuring KALDI to use MKL" && export MATHLIB="MKL" -is_set $MKLROOT && echo "Force-configuring KALDI to use MKL"&& export MATHLIB="MKL" -is_set $CLAPACKROOT && echo "Force-configuring KALDI to use CLAPACK"&& export MATHLIB="CLAPACK" -is_set $OPENBLASROOT && echo "Force-configuring KALDI to use OPENBLAS"&& export MATHLIB="OPENBLAS" - -#MKL functions +# MKL functions function linux_configure_mkllibdir { local mklroot=$1 @@ -278,7 +221,6 @@ function linux_configure_mkl_includes { failure "Could not find the MKL include directory" } - function linux_configure_mkl_libraries { local mkllibdir=$1 local static=$2 @@ -414,13 +356,13 @@ function linux_configure_mkl_threading { echo "$OMP_LINK_LINE" } -## -## CUDA is used only in selected directories including src/cudamatrix, src/nnet* -## and src/chain*. It is used to accelerate the neural network training, the -## rest of kaldi runs on CPUs. -## + +# CUDA is used only in selected directories including src/cudamatrix, src/nnet* +# and src/chain*. It is used to accelerate the neural network training. +# The rest of Kaldi runs on CPUs. + function configure_cuda { - #check for CUDA toolkit in the system + # Check for CUDA toolkit in the system if [ ! -d "$CUDATKDIR" ]; then for base in /Developer/NVIDIA/CUDA-6.0 /usr/local/share/cuda /usr/local/cuda /pkgs_local/cuda-3.2/ /opt/nvidia_cuda/cuda-6.0/ /usr/; do if [ -f $base/bin/nvcc ]; then @@ -433,9 +375,15 @@ function configure_cuda { if [ ! -f $CUDATKDIR/bin/nvcc ]; then failure "Cannnot find nvcc in CUDATKDIR=$CUDATKDIR" fi + + if [[ "$TARGET_ARCH" != "`uname -m`" ]] ; then + failure "Cannot cross compile with CUDA support" + fi + echo "Using CUDA toolkit $CUDATKDIR (nvcc compiler and runtime libraries)" echo >> kaldi.mk - echo "#Next section enables CUDA for compilation" >> kaldi.mk + echo "# CUDA configuration" >> kaldi.mk + echo >> kaldi.mk echo CUDA = true >> kaldi.mk echo CUDATKDIR = $CUDATKDIR >> kaldi.mk @@ -454,8 +402,9 @@ function configure_cuda { *) echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1 ;; esac echo "CUDA_ARCH = $CUDA_ARCH" >> kaldi.mk + echo >> kaldi.mk - # 64bit/32bit? + # 64bit/32bit? We do not support cross compilation with CUDA so, use direct calls to uname -m here if [ "`uname -m`" == "x86_64" ]; then if [ "`uname`" == "Darwin" ]; then sed 's/lib64/lib/g' < makefiles/cuda_64bit.mk >> kaldi.mk @@ -463,7 +412,7 @@ function configure_cuda { cat makefiles/cuda_64bit.mk >> kaldi.mk fi elif [ "`uname -m`" == "ppc64le" ]; then - cat makefiles/cuda_ppc64le.mk >> kaldi.mk + cat makefiles/cuda_64bit.mk >> kaldi.mk else cat makefiles/cuda_32bit.mk >> kaldi.mk fi @@ -476,10 +425,10 @@ function configure_cuda { } function linux_configure_speex { - #check whether the user has called tools/extras/install_speex.sh or not + # Check whether the user has called tools/extras/install_speex.sh or not [ ! -z "$SPEEXROOT" ] || SPEEXROOT=`pwd`/../tools/speex [ ! -z "$SPEEXLIBDIR" ] || SPEEXLIBDIR="$SPEEXROOT"/lib - [ ! -z "$SPEEXINCLUDEDIR" ] || SPEEXINCLUDEDIR="$SPEEXROOT"/include + [ ! -z "$SPEEXINCDIR" ] || SPEEXINCDIR="$SPEEXROOT"/include static_speex=$1 if [ "foo"$static_speex == "foo" ]; then static_speex=false @@ -496,9 +445,9 @@ function linux_configure_speex { return fi - if [ -f $SPEEXINCLUDEDIR/speex/speex.h ]; then + if [ -f $SPEEXINCDIR/speex/speex.h ]; then echo >> kaldi.mk - echo CXXFLAGS += -DHAVE_SPEEX -I${SPEEXINCLUDEDIR} >> kaldi.mk + echo CXXFLAGS += -DHAVE_SPEEX -I${SPEEXINCDIR} >> kaldi.mk if $static_speex; then echo LDLIBS += $SPEEXLIBDIR/libspeex.a @@ -513,42 +462,32 @@ function linux_configure_speex { fi } -function fix_cxx_flag { - CXXCOMPILER=`grep "CXX = " kaldi.mk | awk '{print $3}'` - if [ $CXXCOMPILER=="g++" ]; then - $CXXCOMPILER -dumpversion | \ - awk '{if(NR==1 && $1<"4.4") print "sed \"s/-Wno-unused-local-typedefs//g\" \ - kaldi.mk > tmpf; mv tmpf kaldi.mk; "}' | sh - +function linux_atlas_failure { + echo ATLASINC = $ATLASROOT/include >> kaldi.mk + echo ATLASLIBS = [somewhere]/liblapack.a [somewhere]/libcblas.a [somewhere]/libatlas.a [somewhere]/libf77blas.a $ATLASLIBDIR >> kaldi.mk + echo >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then + cat makefiles/linux_atlas_arm.mk >> kaldi.mk + elif [[ "$TARGET_ARCH" == ppc64le ]]; then + cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk + else + cat makefiles/linux_atlas.mk >> kaldi.mk fi -} - -function linux_atlas_failure { # function we use when we couldn't find - # ATLAS libs. - echo ATLASINC = $ATLASROOT/include >> kaldi.mk - echo ATLASLIBS = [somewhere]/liblapack.a [somewhere]/libcblas.a [somewhere]/libatlas.a [somewhere]/libf77blas.a $ATLASLIBDIR >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then - cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then - cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk - else - cat makefiles/linux_atlas.mk >> kaldi.mk - fi - fix_cxx_flag - echo "** $* ***" - echo "** ERROR **" - echo "** Configure cannot proceed automatically." - echo "** If you know that you have ATLAS installed somewhere on your machine, you" - echo "** may be able to proceed by replacing [somewhere] in kaldi.mk with a directory." - echo "** If you have sudo (root) access you could install the ATLAS package on your" - echo "** machine, e.g. 'sudo apt-get install libatlas-dev libatlas-base-dev' or" - echo "** 'sudo yum install atlas.x86_64' or 'sudo zypper install libatlas3-devel'," - echo "** or on cygwin, install atlas from the installer GUI; and then run ./configure" - echo "** again." - echo "**" - echo "** Otherwise (or if you prefer OpenBLAS for speed), you could go the OpenBLAS" - echo "** route: cd to ../tools, type 'extras/install_openblas.sh', cd back to here," - echo "** and type './configure --openblas-root=../tools/OpenBLAS/install'" - exit 1; + echo "** $* ***" + echo "** ERROR **" + echo "** Configure cannot proceed automatically." + echo "** If you know that you have ATLAS installed somewhere on your machine, you" + echo "** may be able to proceed by replacing [somewhere] in kaldi.mk with a directory." + echo "** If you have sudo (root) access you could install the ATLAS package on your" + echo "** machine, e.g. 'sudo apt-get install libatlas-dev libatlas-base-dev' or" + echo "** 'sudo yum install atlas.x86_64' or 'sudo zypper install libatlas3-devel'," + echo "** or on cygwin, install atlas from the installer GUI; and then run ./configure" + echo "** again." + echo "**" + echo "** Otherwise (or if you prefer OpenBLAS for speed), you could go the OpenBLAS" + echo "** route: cd to ../tools, type 'extras/install_openblas.sh', cd back to here," + echo "** and type './configure --openblas-root=../tools/OpenBLAS/install'" + exit 1; } function linux_check_static { @@ -558,7 +497,7 @@ function linux_check_static { if [ -f $dir/libatlas.a ]; then # candidate... # Note: on the next line, the variable assignment # LANG=en_US should apply just to the program called on that line. - if LANG=en_US gcc -o test_linking test_linking.cc -u ATL_flushcache $dir/libatlas.a 2>&1 | grep -i "incompatible" >/dev/null; then + if LANG=en_US $CXX -o test_linking test_linking.cc -u ATL_flushcache $dir/libatlas.a 2>&1 | grep -i "incompatible" >/dev/null; then echo "Directory $dir may contain ATLAS libraries but seems to be wrong architecture"; rm test_linking test_linking.cc 2>/dev/null return 1; @@ -583,18 +522,17 @@ function linux_configure_debian_ubuntu { fi echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + echo >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for Debian/Ubuntu Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda linux_configure_speex - exit_success; } function linux_configure_debian_ubuntu3 { @@ -608,18 +546,17 @@ function linux_configure_debian_ubuntu3 { fi echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + echo >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for Debian/Ubuntu Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda linux_configure_speex - exit_success; } function linux_configure_debian7 { @@ -635,19 +572,17 @@ function linux_configure_debian7 { [ -z "$libdir" ] && echo "Error getting libdir in linux_configure_debian7" && exit 1; echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk - echo - if [[ "`uname -m`" == arm* ]]; then + echo >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for Debian 7 [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda linux_configure_speex - exit_success; } function linux_configure_redhat { @@ -660,18 +595,16 @@ function linux_configure_redhat { [ -z "$libdir" ] && echo "Error getting libdir in linux_configure_redhat" && exit 1; echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk - echo - if [[ "`uname -m`" == arm* ]]; then + echo >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for red hat [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda - exit_success; } function linux_configure_redhat_fat { @@ -687,21 +620,18 @@ function linux_configure_redhat_fat { [ -z "$libdir" ] && echo "Error getting libdir in linux_configure_redhat_fat" && exit 1; echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk - echo - if [[ "`uname -m`" == arm* ]]; then + echo >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for red hat [dynamic libraries, fat] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda - exit_success; } - function linux_configure_static { if $threaded_atlas; then pt=pt; else pt=""; fi @@ -747,18 +677,17 @@ function linux_configure_static { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + echo >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag $use_cuda && configure_cuda linux_configure_speex echo "Successfully configured for Linux [static libraries] with ATLASLIBS =$ATLASLIBS" - exit_success; } function linux_check_dynamic { @@ -832,152 +761,390 @@ function linux_configure_dynamic { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + echo >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag $use_cuda && configure_cuda linux_configure_speex echo "Successfully configured for Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" - exit_success; } -echo "Configuring ..." +############################# CONFIGURATION ############################# -if [ ! -f makefiles/common.mk ]; then - failure makefiles/common.mk not found -fi +# If configuration sets any of these variables, we will switch the external +# math library. Here we unset them so that we can check later. +unset MKLROOT +unset CLAPACKROOT +unset OPENBLASROOT +unset MKLLIBDIR + +# This variable identifies the type of system where built programs and +# libraries will run. It is set by the configure script when cross compiling. +unset HOST + +# These environment variables can be used to override the default toolchain. +CXX=${CXX:-g++} +AR=${AR:-ar} +AS=${AS:-as} +RANLIB=${RANLIB:-ranlib} + +# These environment variables can be used to provide additional flags to the +# compiler/linker. We want these flags to override the flags determined by the +# configure script, so we append them to the appropriate variables (CXXFLAGS, +# LDFLAGS and LDLIBS) after those variables are set by the configure script. +ENV_CXXFLAGS=$CXXFLAGS +ENV_LDFLAGS=$LDFLAGS +ENV_LDLIBS=$LDLIBS + +# Default configuration +double_precision=false +dynamic_kaldi=false +use_cuda=true +static_fst=false +static_math=false +threaded_atlas=false +mkl_threading=sequential +android=false + +MATHLIB='ATLAS' +ATLASROOT=`rel2abs ../tools/ATLAS/` +FSTROOT=`rel2abs ../tools/openfst` +# Save the command line to include in kaldi.mk +cmd_line="$0 $@" -echo "Checking OpenFST library in $FSTROOT ..." -if [ ! -f $FSTROOT/include/fst/fst.h ]; then - failure "Could not find file $FSTROOT/include/fst/fst.h: - you may not have installed OpenFst. See ../tools/INSTALL" +while [ $# -gt 0 ]; +do + case "$1" in + --help) + usage; exit 0 ;; + --version) + echo $CONFIGURE_VERSION; exit 0 ;; + --static) + dynamic_kaldi=false; + static_math=true; + static_fst=true; + shift ;; + --shared) + dynamic_kaldi=true; + static_math=false; + static_fst=false; + shift ;; + --double-precision) + double_precision=true; + shift ;; + --double-precision=yes) + double_precision=true; + shift ;; + --double-precision=no) + double_precision=false; + shift ;; + --atlas-root=*) + ATLASROOT=`read_dirname $1`; + shift ;; + --threaded-atlas) + threaded_atlas=true; + shift ;; + --threaded-atlas=yes) + threaded_atlas=true; + shift ;; + --threaded-atlas=no) + threaded_atlas=false; + shift ;; + --threaded-math) + threaded_atlas=true; + mkl_threading=iomp + shift ;; + --threaded-math=yes) + threaded_atlas=true; + mkl_threading=iomp + shift ;; + --threaded-math=no) + threaded_atlas=false; + mkl_threading=sequential + shift ;; + --use-cuda) + use_cuda=true; + shift ;; + --use-cuda=yes) + use_cuda=true; + shift ;; + --use-cuda=no) + use_cuda=false; + shift ;; + --static-math) + static_math=true; + shift ;; + --static-math=yes) + static_math=true; + shift ;; + --static-math=no) + static_math=false; + shift ;; + --static-fst) + static_fst=true; + shift ;; + --static-fst=yes) + static_fst=true; + shift ;; + --static-fst=no) + static_fst=false; + shift ;; + --mkl-threading=sequential) + threaded_atlas=false; + mkl_threading=sequential; + shift ;; + --mkl-threading=*) + mkl_threading=`expr "X$1" : '[^=]*=\(.*\)'`; + threaded_atlas=true; + shift ;; + --fst-root=*) + FSTROOT=`read_dirname $1`; + shift ;; + --clapack-root=*) + CLAPACKROOT=`read_dirname $1`; + shift ;; + --openblas-root=*) + OPENBLASROOT=`read_dirname $1`; + shift ;; + --mkl-root=*) + MKLROOT=`read_dirname $1`; + shift ;; + --mkl-libdir=*) + MKLLIBDIR=`read_dirname $1`; + shift ;; + --speex-root=*) + SPEEXROOT=`read_dirname $1`; + shift ;; + --speex-libdir=*) + SPEEXLIBDIR=`read_dirname $1`; + shift ;; + --speex-incdir=*) + SPEEXINCDIR=`read_dirname $1`; + shift ;; + --omp-libdir=*) + OMPLIBDIR=`read_dirname $1`; + shift ;; + --mathlib=*) + MATHLIB=`expr "X$1" : '[^=]*=\(.*\)'`; + shift ;; + --cudatk-dir=*) + CUDATKDIR=`read_dirname $1`; + shift ;; #CUDA is used in src/cudamatrix and src/nnet{,bin} only + --fst-version=*) + OPENFST_VER=`expr "X$1" : '[^=]*=\(.*\)'`; + shift;; + --host=*) + # The type of system where built programs and libraries will run. + # It should be in the format cpu-vendor-os. If specified, this script + # will infer the target architecture from the specified host triple. + HOST=`expr "X$1" : '[^=]*=\(.*\)'`; + shift ;; + --android-incdir=*) + android=true; + threaded_math=false; + static_math=true; + static_fst=true; + dynamic_kaldi=false; + MATHLIB='OPENBLAS'; + ANDROIDINC=`read_dirname $1`; + shift;; + *) echo "Unknown argument: $1, exiting"; usage; exit 1 ;; + esac +done + +# The idea here is that if you change the configuration options from using +# CUDA to not using it, or vice versa, we want to recompile all parts of the +# code that may use a GPU. Touching this file is a way to force this. +touch cudamatrix/cu-common.h 2>/dev/null + +if $android && [[ "$CXX" != *clang++* ]] ; then + failure "Android build requires clang++. Make sure you have clang++ installed + on your system and then override the default compiler by setting CXX, e.g. + CXX=clang++ ./configure" fi -echo Checking OpenFst library was patched. -if ! grep "multiple repeated" $FSTROOT/include/fst/minimize.h >/dev/null; then - echo "** ERROR **" - echo "** $FSTROOT/include/fst/minimize.h seems not to be patched:" - echo "patch not applied? FST tools will not work in our recipe." - exit 1; + +# If HOST is set +# 1. We prepend it to CXX, AR, AS and RANLIB. +# 2. We parse the target architecture from the HOST triple. +# Otherwise we set the target architecture to the output of `uname -m`. +if is_set $HOST; then + CXX="$HOST-$CXX" + AR="$HOST-$AR" + AS="$HOST-$AS" + RANLIB="$HOST-$RANLIB" + + # The host triple will be something like "armv8-rpi3-linux-gnueabihf". We + # need the first field which is the target architecture for this build. The + # following command will take the host triple "armv8-rpi3-linux-gnueabihf" + # and return ["armv8", "rpi3", "linux", "gnueabihf"] in PARTS. + IFS='-' read -ra PARTS <<< "$HOST" + # The first field in the PARTS list is the target architecture. + TARGET_ARCH="$PARTS" + if [[ "$TARGET_ARCH" != arm* && "$TARGET_ARCH" != ppc64le && "$TARGET_ARCH" != x86* ]] ; then + # We currently only support building for x86[_64], arm*, and ppc64le. + # If TARGET_ARCH was read from the HOST variable, it must be one of these. + failure "$TARGET_ARCH is not a supported architecture. + Supported architectures: x86[_64], arm*, ppc64le." + fi +else + TARGET_ARCH="`uname -m`" fi -# back up the old one in case we modified it +# If one of these variables is set, we switch the external math library. +is_set $MKLLIBDIR && echo "Configuring KALDI to use MKL" && export MATHLIB="MKL" +is_set $MKLROOT && echo "Configuring KALDI to use MKL"&& export MATHLIB="MKL" +is_set $CLAPACKROOT && echo "Configuring KALDI to use CLAPACK"&& export MATHLIB="CLAPACK" +is_set $OPENBLASROOT && echo "Configuring KALDI to use OPENBLAS"&& export MATHLIB="OPENBLAS" + +echo "Configuring ..." + +# Back up the old kaldi.mk in case we modified it if [ -f kaldi.mk ]; then - echo "Backing up kaldi.mk to kaldi.mk.bak" + echo "Backing up kaldi.mk to kaldi.mk.bak ..." cp kaldi.mk kaldi.mk.bak fi -printf "# This file was generated using the following command:\n# $cmd_line\n\n" > kaldi.mk -cat makefiles/common.mk >> kaldi.mk +# Generate the new kaldi.mk file +echo "# This file was generated using the following command:" > kaldi.mk +echo "# $cmd_line" >> kaldi.mk +echo >> kaldi.mk +echo "CONFIGURE_VERSION := $CONFIGURE_VERSION" >> kaldi.mk +echo >> kaldi.mk + +echo "# Toolchain configuration" >> kaldi.mk +echo >> kaldi.mk +echo "CXX = $CXX" >> kaldi.mk +echo "AR = $AR" >> kaldi.mk +echo "AS = $AS" >> kaldi.mk +echo "RANLIB = $RANLIB" >> kaldi.mk +echo >> kaldi.mk + +echo "Checking compiler $CXX ..." +check_compiler $CXX + +echo "# Base configuration" >> kaldi.mk +echo >> kaldi.mk if $dynamic_kaldi ; then -KALDILIBDIR=`pwd`/lib -echo "KALDI_FLAVOR := dynamic" >> kaldi.mk -echo "KALDILIBDIR := $KALDILIBDIR" >> kaldi.mk + KALDILIBDIR=`pwd`/lib + echo "KALDI_FLAVOR := dynamic" >> kaldi.mk + echo "KALDILIBDIR := $KALDILIBDIR" >> kaldi.mk fi -echo "CONFIGURE_VERSION := $CONFIGURE_VERSION" >> kaldi.mk -echo "FSTROOT = $FSTROOT" >> kaldi.mk - -# Check installed OpenFst version and add C++11 flags if OpenFst >= 1.4 -OPENFST_VER="${OPENFST_VER:-`grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::'`}" -echo "OPENFST_VER = $OPENFST_VER" >> kaldi.mk -OPENFST_VER_NUM=`echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d"` -if [ $OPENFST_VER_NUM -ge 10400 ]; then - echo "OPENFST_GE_10400 = 1" >> kaldi.mk - echo "EXTRA_CXXFLAGS += -DHAVE_OPENFST_GE_10400 -std=c++0x" >> kaldi.mk +if $double_precision; then + echo "DOUBLE_PRECISION = 1" >> kaldi.mk +else + echo "DOUBLE_PRECISION = 0" >> kaldi.mk +fi +echo "Checking OpenFst library in $FSTROOT ..." +if [ ! -f $FSTROOT/include/fst/fst.h ]; then + failure "Could not find file $FSTROOT/include/fst/fst.h: + you may not have installed OpenFst. See ../tools/INSTALL" +fi +OPENFST_VER=${OPENFST_VER:-$(grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::')} +OPENFST_VER_NUM=$(echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") +if [ $OPENFST_VER_NUM -lt 10600 ]; then + failure "OpenFst-$OPENFST_VER is not supported. You need OpenFst >= 1.6.0.)" +fi +echo "OPENFSTINC = $FSTROOT/include" >> kaldi.mk +if $static_fst ; then + OPENFSTLIBS="$FSTROOT/lib/libfst.a" else - echo "OPENFST_GE_10400 = 0" >> kaldi.mk + if [ "`uname`" == "Darwin" ]; then + OPENFSTLIBS="$FSTROOT/lib/libfst.dylib" + OPENFSTLDFLAGS="-Wl,-rpath -Wl,${FSTROOT}/lib" + elif [ "`uname`" == "Linux" ]; then + OPENFSTLIBS="$FSTROOT/lib/libfst.so" + OPENFSTLDFLAGS="-Wl,-rpath=${FSTROOT}/lib" + else + failure "Dynamic libraries are not supported on this platform. + Run configure with --static --static-fst=no flag." + fi fi +if [ ! -f "$OPENFSTLIBS" ]; then + failure "Static=[$static_fst] OpenFST library not found: See ../tools/INSTALL" +fi +echo "OPENFSTLIBS = $OPENFSTLIBS" >> kaldi.mk +echo "OPENFSTLDFLAGS = $OPENFSTLDFLAGS" >> kaldi.mk +echo >> kaldi.mk -# Most of the OS-specific steps below will append to kaldi.mk +# OS-specific steps given below append to kaldi.mk echo "Doing OS specific configurations ..." -# Check for Darwin at first, because we later call uname -o (for Cygwin) -# which crashes on Darwin. Also the linear algebra libraries on Macs are -# used differently (through the Accelerate framework) than on Linux. -if [ "`uname`" == "Darwin" ]; then - $use_cuda && configure_cuda - echo "On Darwin: checking for Accelerate framework ..." +if $android ; then + if [ -z $ANDROIDINC ] ; then + failure "--android-incdir must be specified for android builds." + fi + + if ! is_set $HOST; then + failure "HOST must be specified for android builds." + fi + + OPENBLASROOT=`rel2abs "$OPENBLASROOT"` + if [ -z "$OPENBLASROOT" ]; then + failure "The location of OPENBLAS must be specified for android builds + using --openblas-root (and it must exist)" + fi + if [ ! -f $OPENBLASROOT/lib/libopenblas.a ]; then + failure "Expected to find the file $OPENBLASROOT/lib/libopenblas.a" + fi + echo "Using OpenBLAS as the linear algebra library." + + OPENBLASLIBS="$OPENBLASROOT/lib/libopenblas.a $OPENBLASROOT/lib/libclapack.a $OPENBLASROOT/lib/liblapack.a $OPENBLASROOT/lib/libblas.a $OPENBLASROOT/lib/libf2c.a" + echo "OPENBLASINC = $OPENBLASROOT/include" >> kaldi.mk + echo "OPENBLASLIBS = $OPENBLASLIBS" >> kaldi.mk + echo "ANDROIDINC = $ANDROIDINC" >> kaldi.mk + + cat makefiles/android_openblas.mk >> kaldi.mk + + echo "Successfully configured for Android with OpenBLAS from $OPENBLASROOT." + +elif [ "`uname`" == "Darwin" ]; then + # Check for Darwin first, because we later call uname -o (for Cygwin) + # which crashes on Darwin. + + echo "On Darwin: Checking for Accelerate framework ..." if [ ! -e /System/Library/Frameworks/Accelerate.framework ]; then - failure "Need the Accelerate.framework to compile on Darwin." + failure "Need the Accelerate framework to compile on Darwin." fi - if [ ! -f $FSTROOT/lib/libfst.a ]; then - failure "Static OpenFST library not found: See ../tools/INSTALL" + OSX_VER=$(sw_vers | grep ProductVersion | awk '{print $2}' | awk '{split($0,a,"."); print a[1] "." a[2]; }') + OSX_VER_NUM=$(echo $OSX_VER | sed 's/\./ /g' | xargs printf "%d%02d") + echo "Configuring for OS X version $OSX_VER ..." + if [ $OSX_VER_NUM -ge 1005 ]; then + cat makefiles/darwin.mk >> kaldi.mk + else + failure "Mac OS X version '$OSX_VER' is not supported." fi - # posix_memalign and gcc -rdynamic options not present on OS X 10.5.* - osx_ver=`sw_vers | grep ProductVersion | awk '{print $2}' | awk '{split($0,a,"."); print a[1] "." a[2]; }'` - echo "Configuring for OS X version $osx_ver ..." - if [ "$osx_ver" == "10.5" ]; then - check_exists makefiles/darwin_10_5.mk - cat makefiles/darwin_10_5.mk >> kaldi.mk - elif [ "$osx_ver" == "10.6" ]; then - check_exists makefiles/darwin_10_6.mk - cat makefiles/darwin_10_6.mk >> kaldi.mk - elif [ "$osx_ver" == "10.7" ]; then - check_exists makefiles/darwin_10_7.mk - cat makefiles/darwin_10_7.mk >> kaldi.mk - elif [ "$osx_ver" == "10.8" ]; then - check_exists makefiles/darwin_10_8.mk - cat makefiles/darwin_10_8.mk >> kaldi.mk - elif [ "$osx_ver" == "10.9" ]; then - check_exists makefiles/darwin_10_9.mk - cat makefiles/darwin_10_9.mk >> kaldi.mk - elif [ "$osx_ver" == "10.10" ]; then - check_exists makefiles/darwin_10_10.mk - cat makefiles/darwin_10_10.mk >> kaldi.mk - elif [ "$osx_ver" == "10.11" ]; then - check_exists makefiles/darwin_10_11.mk - cat makefiles/darwin_10_11.mk >> kaldi.mk + + if [ $OSX_VER_NUM == 1011 ]; then echo "**BAD WARNING**: You are using OS X El Capitan. Some versions of this OS" echo "**BAD WARNING**: have a bug in the BLAS implementation that affects Kaldi." echo "**BAD WARNING**: After compiling, cd to matrix/ and type 'make test'. The" echo "**BAD WARNING**: test will fail if the problem exists in your version. " echo "**BAD WARNING**: Eventually this issue will be fixed by system updates from" - echo "**BAD WARNING** Apple. Unexplained crashes with reports of NaNs will" - echo "**BAD WARNING** be caused by this bug, but some recipes will (sometimes) work." + echo "**BAD WARNING**: Apple. Unexplained crashes with reports of NaNs will" + echo "**BAD WARNING**: be caused by this bug, but some recipes will (sometimes) work." sleep 1; echo -n .; sleep 1; echo -n .; sleep 1; echo . - elif [ "$osx_ver" == "10.12" ]; then - check_exists makefiles/darwin_10_12.mk - cat makefiles/darwin_10_12.mk >> kaldi.mk - else - failure "OS X version '$osx_ver' not supported" fi - echo "Configuration succeeded for platform Darwin." - exit_success; -fi - -if [ "`uname -o`" == "Cygwin" ]; then - echo "On Cygwin: checking for linear algebra libraries ..." - if [ ! -f ../tools/CLAPACK/clapack.h ]; then - failure "could not find file ../tools/CLAPACK/clapack.h" - fi - if [ ! -f /usr/lib/lapack/cygblas-0.dll ]; then - failure "please first install package liblapack0" - fi - cat makefiles/cygwin.mk >> kaldi.mk - echo "Configuration succeeded for platform cygwin" - exit_success; -fi + echo "Successfully configured for Darwin with Accelerate framework." + $use_cuda && configure_cuda -if [ "`uname`" == "Linux" ]; then - if $static_fst ; then - OPENFSTLIBS="$FSTROOT/lib/libfst.a" - fst_type='a' - else - OPENFSTLIBS="-L${FSTROOT}/lib -lfst" - OPENFSTLDFLAGS="-Wl,-rpath=${FSTROOT}/lib" - fst_type='so' +elif [ "`uname -o`" == "Cygwin" ]; then + echo "On Cygwin: Checking for linear algebra libraries ..." + if [ ! -f ../tools/CLAPACK/clapack.h ]; then + failure "could not find file ../tools/CLAPACK/clapack.h" fi - if [ ! -f "$FSTROOT/lib/libfst.${fst_type}" ]; then - failure "Static=[$static_fst] OpenFST library not found: See ../tools/INSTALL" + if [ ! -f /usr/lib/lapack/cygblas-0.dll ]; then + failure "please first install package liblapack0" fi - echo OPENFSTLIBS = $OPENFSTLIBS >> kaldi.mk - echo OPENFSTLDFLAGS = $OPENFSTLDFLAGS >> kaldi.mk + cat makefiles/cygwin.mk >> kaldi.mk + echo "Successfully configured for Cygwin with CLAPACK." +elif [ "`uname`" == "Linux" ]; then echo "On Linux: Checking for linear algebra header files ..." if [ "$MATHLIB" == "ATLAS" ]; then if [ ! -f $ATLASROOT/include/cblas.h ] || [ ! -f $ATLASROOT/include/clapack.h ] ; then @@ -1008,7 +1175,7 @@ if [ "`uname`" == "Linux" ]; then linux_configure_redhat || \ linux_configure_redhat_fat 64 || \ linux_configure_redhat_fat || \ - linux_atlas_failure "Failed to configure ATLAS lbiraries"; + linux_atlas_failure "Failed to configure ATLAS libraries"; else # Prefer dynamic to static math. linux_configure_debian_ubuntu3 || \ @@ -1021,11 +1188,11 @@ if [ "`uname`" == "Linux" ]; then linux_configure_redhat || \ linux_configure_redhat_fat 64 || \ linux_configure_redhat_fat || \ - linux_atlas_failure "Failed to configure ATLAS lbiraries"; + linux_atlas_failure "Failed to configure ATLAS libraries"; fi elif [ "$MATHLIB" == "MKL" ]; then - if [ "`uname -m`" != "x86_64" ]; then + if [ "$TARGET_ARCH" != "x86_64" ]; then failure "MKL on Linux only supported for Intel(R) 64 architecture (x86_64). See makefiles/linux_64_mkl.mk to manually configure for other platforms." fi @@ -1066,42 +1233,39 @@ if [ "`uname`" == "Linux" ]; then if [ ! -z $MKLLIBDIR ]; then echo MKLLIB = $MKLLIBDIR >> kaldi.mk fi + echo >> kaldi.mk check_exists makefiles/linux_x86_64_mkl.mk cat makefiles/linux_x86_64_mkl.mk >> kaldi.mk - fix_cxx_flag echo "MKLFLAGS = ${MKL_LINK_LINE} ${THREADING_LINE} $EXTRA_LIBS " >> kaldi.mk - + echo "Successfully configured for Linux with MKL libs from $MKLROOT" $use_cuda && configure_cuda linux_configure_speex - echo "Successfully configured for Linux with MKL libs from $MKLROOT" - exit_success; elif [ "$MATHLIB" == "CLAPACK" ]; then if [ -z "$CLAPACKROOT" ]; then failure "Must specify the location of CLAPACK with --clapack-root option (and it must exist)" fi if [ ! -f ../tools/CLAPACK/clapack.h ]; then - failure could not find file ../tools/CLAPACK/clapack.h + failure "could not find file ../tools/CLAPACK/clapack.h" fi if [ ! -d "$CLAPACKROOT" ]; then failure "The directory $CLAPACKROOT does not exist" fi # Also check for cblas.h and f2c.h - echo "Using CLAPACK as the linear algebra library." + echo "Using CLAPACK libs from $CLAPACKROOT as the linear algebra library." if [ ! -f makefiles/linux_clapack.mk ]; then failure "makefiles/linux_clapack.mk not found." fi - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_clapack_arm.mk >> kaldi.mk else cat makefiles/linux_clapack.mk >> kaldi.mk fi - fix_cxx_flag - echo "Warning (CLAPACK): this part of the configure process is not properly tested and will not work." + echo "Warning (CLAPACK): this part of the configure process is not properly tested and may not work." + echo "Successfully configured for Linux with CLAPACK libs from $CLAPACKROOT" $use_cuda && configure_cuda linux_configure_speex - echo "Successfully configured for Linux with CLAPACK libs from $CLAPACKROOT" - exit_success; + elif [ "$MATHLIB" == "OPENBLAS" ]; then OPENBLASROOT=`rel2abs "$OPENBLASROOT"` if [ -z "$OPENBLASROOT" ]; then @@ -1110,7 +1274,7 @@ if [ "`uname`" == "Linux" ]; then if [ ! -f $OPENBLASROOT/lib/libopenblas.so ]; then failure "Expected to find the file $OPENBLASROOT/lib/libopenblas.so" fi - echo "Your math library seems to be OpenBLAS. Configuring appropriately." + echo "Your math library seems to be OpenBLAS from $OPENBLASROOT. Configuring appropriately." if $static_math; then echo "Configuring static OpenBlas since --static-math=yes" OPENBLASLIBS="$OPENBLASROOT/lib/libopenblas.a -lgfortran" @@ -1118,23 +1282,41 @@ if [ "`uname`" == "Linux" ]; then echo "Configuring dynamically loaded OpenBlas since --static-math=no (the default)" OPENBLASLIBS="-L$OPENBLASROOT/lib -lopenblas -lgfortran -Wl,-rpath=$OPENBLASROOT/lib" fi + echo "OPENBLASINC = $OPENBLASROOT/include" >> kaldi.mk echo "OPENBLASLIBS = $OPENBLASLIBS" >> kaldi.mk - echo "OPENBLASROOT = $OPENBLASROOT" >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + echo >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_openblas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_openblas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_openblas.mk >> kaldi.mk fi - fix_cxx_flag + echo "Successfully configured for Linux with OpenBLAS from $OPENBLASROOT" $use_cuda && configure_cuda linux_configure_speex - echo "Successfully configured OpenBLAS from $OPENBLASROOT." - exit_success; + else failure "Unsupported linear algebra library '$MATHLIB'" fi +else + failure "Could not detect the platform or we have not yet worked out the + appropriate configuration for this platform. Please contact the developers." fi -failure Could not detect platform or we have not yet worked out the appropriate configuration for this platform. Please contact the developers. +# Append the flags set by environment variables last so they can be used +# to override the automatically generated configuration. +echo >> kaldi.mk +echo "# Environment configuration" >> kaldi.mk +echo >> kaldi.mk +if [ -n "$ENV_CXXFLAGS" ]; then echo "CXXFLAGS += $ENV_CXXFLAGS" >> kaldi.mk; fi +if [ -n "$ENV_LDFLAGS" ]; then echo "LDFLAGS += $ENV_LDFLAGS" >> kaldi.mk; fi +if [ -n "$ENV_LDLIBS" ]; then echo "LDLIBS += $ENV_LDLIBS" >> kaldi.mk; fi + +# We check for slow exp implementation just before we exit. This check uses +# and possibly modifies the kaldi.mk file that we just generated. +check_for_slow_expf; +echo "SUCCESS" +echo "To compile: make clean -j; make depend -j; make -j" +echo " ... or e.g. -j 10, instead of -j, to use a specified number of CPUs" +exit 0; diff --git a/src/cudamatrix/cu-allocator.h b/src/cudamatrix/cu-allocator.h index b10601b8245..c6500e95559 100644 --- a/src/cudamatrix/cu-allocator.h +++ b/src/cudamatrix/cu-allocator.h @@ -141,7 +141,7 @@ class CuMemoryAllocator { // be a multiple of 4, and num_rows will frequently be a multiple of // powers of 2 also. We need to shift right and add so that there will be // some action in the lower-order bits. - size_t operator () (const std::pair &p) const { + size_t operator () (const std::pair &p) const noexcept { size_t temp = p.first + 1867 * p.second; return temp + (temp >> 2) + (temp >> 8); } @@ -206,7 +206,7 @@ class CuMemoryAllocator { }; struct PointerHasher { - size_t operator() (const void *arg) const { + size_t operator() (const void *arg) const noexcept { // the last few bits tend to be very predictable, for alignment reasons (CUDA // allocation may align on 256 byte or 512 byte boundaries or something similar). size_t temp = reinterpret_cast(arg); diff --git a/src/cudamatrix/cu-array-test.cc b/src/cudamatrix/cu-array-test.cc index f3ebcb72ee0..863ca5dde18 100644 --- a/src/cudamatrix/cu-array-test.cc +++ b/src/cudamatrix/cu-array-test.cc @@ -116,8 +116,9 @@ static void UnitTestCuArray() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -134,8 +135,8 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-block-matrix-test.cc b/src/cudamatrix/cu-block-matrix-test.cc index 4193e61c609..387749904b1 100644 --- a/src/cudamatrix/cu-block-matrix-test.cc +++ b/src/cudamatrix/cu-block-matrix-test.cc @@ -181,8 +181,9 @@ template void CuBlockMatrixUnitTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -200,12 +201,13 @@ int main() { #else kaldi::CuBlockMatrixUnitTest(); #endif + if (loop == 0) KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-device-test.cc b/src/cudamatrix/cu-device-test.cc index ec0fa7b1f9f..8f44985ede0 100644 --- a/src/cudamatrix/cu-device-test.cc +++ b/src/cudamatrix/cu-device-test.cc @@ -99,8 +99,8 @@ void CudaMatrixResizeTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { #if HAVE_CUDA == 1 + for (int32 loop = 0; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -118,9 +118,10 @@ int main() { #else kaldi::CudaMatrixResizeTest(); #endif - } + #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h index 878ba216407..5b72a62e716 100644 --- a/src/cudamatrix/cu-kernels-ansi.h +++ b/src/cudamatrix/cu-kernels-ansi.h @@ -30,673 +30,651 @@ #if HAVE_CUDA == 1 extern "C" { -/********************************************************* - * int32 CUDA kernel calls (no template wrapper) - */ -void cuda_int32_set_const(dim3 Gr, dim3 Bl, int32_cuda *mat, int32_cuda value, - MatrixDim d); -void cuda_int32_add(dim3 Gr, dim3 Bl, int32_cuda *mat, int32_cuda value, - MatrixDim d); - -/********************************************************* - * float CUDA kernel calls - */ - -/* - * CuMatrix - */ -void cudaF_copy_upp_low(dim3 Gr, dim3 Bl, float* A, MatrixDim dimA); -void cudaF_copy_low_upp(dim3 Gr, dim3 Bl, float* A, MatrixDim dimA); -void cudaF_add_diag_vec_mat(dim3 Gr, dim3 Bl, float alpha, float *mat, - MatrixDim mat_dim, const float *vec, - const float *mat2, int mat2_row_stride, - int mat2_col_stride, float beta); -void cudaF_copy_from_tp_trans(dim3 Gr, dim3 Bl, float* A, const float* B, - MatrixDim dmat); -void cudaFD_copy_from_tp_trans(dim3 Gr, dim3 Bl, float* A, const double* B, - MatrixDim dmat); -void cudaF_copy_from_tp(dim3 Gr, dim3 Bl, float* A, const float* B, - MatrixDim dmat); -void cudaFD_copy_from_tp(dim3 Gr, dim3 Bl, float* A, const double* B, - MatrixDim dmat); -void cudaF_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); -void cudaF_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim d); -void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, - bool include_sign, MatrixDim d); -void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); -void cudaF_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val, - MatrixDim d); -void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, - const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, - int src_stride); -void cudaF_add_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, +void cudaD_add_col_sum_mat(int Gr, int Bl, double* result, const double* mat, + const MatrixDim d, const double alpha, + const double beta); +void cudaF_add_col_sum_mat(int Gr, int Bl, float* result, const float* mat, + const MatrixDim d, const float alpha, + const float beta); +void cudaD_add_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride); -void cudaF_copy_rows(dim3 Gr, dim3 Bl, float* dst, const float* src, - const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, - int src_stride); -void cudaF_copy_rows_direct(dim3 Gr, dim3 Bl, float* dst, - const float* const * src, MatrixDim dst_dim); -void cudaF_copy_to_rows_direct(dim3 Gr, dim3 Bl, float* const * dst, - const float* src, MatrixDim src_dim); -void cudaF_add_rows(dim3 Gr, dim3 Bl, float alpha, float* dst, const float* src, +void cudaF_add_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride); -void cudaF_add_rows_direct(dim3 Gr, dim3 Bl, float alpha, float* dst, - const float* const * src, MatrixDim dst_dim); -void cudaF_add_to_rows_direct(dim3 Gr, dim3 Bl, float alpha, float* const * dst, - const float* src, MatrixDim src_dim); -void cudaF_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val, - MatrixDim d); -void cudaF_set_diag(int Gr, int Bl, float* mat, float value, MatrixDim d); -void cudaF_set_diag_packed(int Gr, int Bl, float* mat, float value, int dim); +void cudaD_add_diag_mat_mat_MN(dim3 Gr, dim3 Bl, const double alpha, + const double* M, const int strid_M, + const double* N, const MatrixDim dim_N, + const double beta, double* v); +void cudaF_add_diag_mat_mat_MN(dim3 Gr, dim3 Bl, const float alpha, + const float* M, const int strid_M, + const float* N, const MatrixDim dim_N, + const float beta, float* v); +void cudaD_add_diag_mat_mat_MNT(int Gr, int Bl, const double alpha, + const double* M, const MatrixDim dim_M, + const double* N, const int stride_N, + const double beta, double* v); +void cudaF_add_diag_mat_mat_MNT(int Gr, int Bl, const float alpha, + const float* M, const MatrixDim dim_M, + const float* N, const int stride_N, + const float beta, float* v); +void cudaD_add_diag_mat_mat_MTN(dim3 Gr, dim3 Bl, const double alpha, + const double* M, const int strid_M, + const double* N, const MatrixDim dim_N, + const double beta, double* v); +void cudaF_add_diag_mat_mat_MTN(dim3 Gr, dim3 Bl, const float alpha, + const float* M, const int strid_M, + const float* N, const MatrixDim dim_N, + const float beta, float* v); +void cudaD_add_diag_packed(int Gr, int Bl, double* mat, double value, int dim); void cudaF_add_diag_packed(int Gr, int Bl, float* mat, float value, int dim); -void cudaF_set_const(dim3 Gr, dim3 Bl, float *mat, float value, MatrixDim d); -void cudaF_set_zero_above_diag(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); +void cudaD_add_diag_vec_mat(dim3 Gr, dim3 Bl, double alpha, double *mat, + MatrixDim mat_dim, const double *vec, + const double *mat2, int mat2_row_stride, + int mat2_col_stride, double beta); +void cudaF_add_diag_vec_mat(dim3 Gr, dim3 Bl, float alpha, float *mat, + MatrixDim mat_dim, const float *vec, + const float *mat2, int mat2_row_stride, + int mat2_col_stride, float beta); +void cudaD_add(dim3 Gr, dim3 Bl, double *mat, double value, MatrixDim d); void cudaF_add(dim3 Gr, dim3 Bl, float *mat, float value, MatrixDim d); -void cudaF_add_vec2(dim3 Gr, dim3 Bl, float* mat, const float* vec, - const float alpha, int dim); -void cudaF_scale_diag_packed(int Gr, int Bl, float* mat, float value, int dim); -void cudaF_scale(dim3 Gr, dim3 Bl, float *mat, float value, MatrixDim d); -void cudaF_apply_log(dim3 Gr, dim3 Bl, float *mat, MatrixDim d); -void cudaF_mul_elements(dim3 Gr, dim3 Bl, float *mat, const float *A, - MatrixDim dst_d, int src_stride); -void cudaF_div_elements(dim3 Gr, dim3 Bl, float *mat, const float *A, - MatrixDim dst_d, int src_stride); -void cudaF_max(dim3 Gr, dim3 Bl, float *mat, const float *A, MatrixDim dst_d, - int src_stride); -void cudaF_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, - MatrixDim d); -void cudaF_mul_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, - MatrixDim d); -void cudaF_mul_rows_group_mat(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride, int group_size); -void cudaF_diff_group_pnorm(dim3 Gr, dim3 Bl, float *id, const float *iv, - const float *ov, const float* od, MatrixDim id_dim, - int iv_stride, int ov_stride, int od_stride, - int group_size, float power); -void cudaF_calc_group_max_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1, - const float *x2, MatrixDim y_dim, int x1_stride, - int x2_stride, int group_size); -void cudaF_div_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *vec_div, - MatrixDim d); -void cudaF_add_mat(dim3 Gr, dim3 Bl, float alpha, const float *src, float *dst, - MatrixDim d, int src_stride, int A_trans); +void cudaD_add_mat_blockmat(dim3 Gr, dim3 Bl, double *data, MatrixDim d, + const double *Adata, int A_num_rows, int A_num_cols, + int A_row_stride, int A_col_stride, + const CuBlockMatrixData *B_cu_data, + int B_num_blocks, double alpha, double beta, + int B_trans); +void cudaF_add_mat_blockmat(dim3 Gr, dim3 Bl, float *data, MatrixDim d, + const float *Adata, int A_num_rows, int A_num_cols, + int A_row_stride, int A_col_stride, + const CuBlockMatrixData *B_cu_data, + int B_num_blocks, float alpha, float beta, + int B_trans); +void cudaD_add_mat_blocks(dim3 Gr, dim3 Bl, double alpha, const double *src, + int32_cuda num_row_blocks, int32_cuda num_col_blocks, + double *dst, MatrixDim d, int src_stride, + int A_trans); void cudaF_add_mat_blocks(dim3 Gr, dim3 Bl, float alpha, const float *src, int32_cuda num_row_blocks, int32_cuda num_col_blocks, float *dst, MatrixDim d, int src_stride, int A_trans); -void cudaF_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const float *A, const float *B, - const float *C, float *dst, MatrixDim d, - int stride_a, int stride_b, int stride_c); -void cudaF_add_vec_to_cols(dim3 Gr, dim3 Bl, float alpha, const float *col, - float beta, float *dst, MatrixDim d); -void cudaF_add_vec_to_rows(dim3 Gr, dim3 Bl, float alpha, const float *row, - float beta, float *dst, MatrixDim d); +void cudaD_add_mat_diag_vec(dim3 Gr, dim3 Bl, double alpha, double *mat, + MatrixDim mat_dim, const double *mat2, + int mat2_row_stride, int mat2_col_stride, + const double *vec, double beta); void cudaF_add_mat_diag_vec(dim3 Gr, dim3 Bl, float alpha, float *mat, MatrixDim mat_dim, const float *mat2, int mat2_row_stride, int mat2_col_stride, const float *vec, float beta); +void cudaD_add_mat(dim3 Gr, dim3 Bl, double alpha, const double *src, + double *dst, MatrixDim d, int src_stride, int A_trans); +void cudaF_add_mat(dim3 Gr, dim3 Bl, float alpha, const float *src, float *dst, + MatrixDim d, int src_stride, int A_trans); +void cudaD_add_mat_mat_elements(dim3 Gr, dim3 Bl, double *data, + const double *srcA_data, + const double *srcB_data, MatrixDim dim, + int srcA_stride, int srcB_stride, double alpha, + double beta); void cudaF_add_mat_mat_elements(dim3 Gr, dim3 Bl, float *data, const float *srcA_data, const float *srcB_data, MatrixDim dim, int srcA_stride, int srcB_stride, float alpha, float beta); -/* - * CuVector - */ -void cudaF_max_mat_cols(int Gr, int Bl, float* result, const float* mat, - const MatrixDim d); -void cudaF_min_mat_cols(int Gr, int Bl, float* result, const float* mat, - const MatrixDim d); -void cudaF_sum_mat_cols(int Gr, int Bl, float* result, const float* mat, - const MatrixDim d); -void cudaF_replace_value(int Gr, int Bl, float *v, int dim, float orig, - float changed); -void cudaF_set_bias_params(int Gr, int Bl, float* v, const float* a, - float param_1, float param_2, float param_3, - int* flag, int dim); -void cublas_copy_kaldi_fd(int Gr, int Bl, int n, const float* x, int incx, - double* y, int incy); -void cublas_copy_kaldi_df(int Gr, int Bl, int n, const double* x, int incx, - float* y, int incy); -void cudaF_vec_mul_elements(int Gr, int Bl, float* v, const float* a, int dim); -void cudaF_vec_soft_max(int Gr, int Bl, float* v, int dim); -void cudaF_vec_min(int Gr, int Bl, const float* v, float* value, int dim, - int inc); -void cudaF_vec_max(int Gr, int Bl, const float* v, float* value, int dim, - int inc); -void cudaF_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const float* A, const float* B, - MatrixDim dA, int B_stride, float* value); -void cudaF_trace_mat_mat(dim3 Gr, dim3 Bl, const float* A, const float* B, - MatrixDim dA, int B_stride, float* value); -void cudaF_add_diag_mat_mat_MNT(int Gr, int Bl, const float alpha, - const float* M, const MatrixDim dim_M, - const float* N, const int stride_N, - const float beta, float* v); -void cudaF_add_diag_mat_mat_MTN(dim3 Gr, dim3 Bl, const float alpha, - const float* M, const int strid_M, - const float* N, const MatrixDim dim_N, - const float beta, float* v); -void cudaF_add_diag_mat_mat_MN(dim3 Gr, dim3 Bl, const float alpha, - const float* M, const int strid_M, - const float* N, const MatrixDim dim_N, - const float beta, float* v); +void cudaD_add_row_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, + const double *src_data, MatrixDim src_dim, + const Int32Pair *indexes); +void cudaF_add_row_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, + const float *src_data, MatrixDim src_dim, + const Int32Pair *indexes); +void cudaD_add_rows(dim3 Gr, dim3 Bl, double alpha, double* dst, + const double* src, const MatrixIndexT_cuda* reorder, + MatrixDim dst_dim, int src_stride); +void cudaF_add_rows(dim3 Gr, dim3 Bl, float alpha, float* dst, const float* src, + const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, + int src_stride); +void cudaD_add_rows_direct(dim3 Gr, dim3 Bl, double alpha, double* dst, + const double* const * src, MatrixDim dst_dim); +void cudaF_add_rows_direct(dim3 Gr, dim3 Bl, float alpha, float* dst, + const float* const * src, MatrixDim dst_dim); +void cudaD_add_to_rows_direct(dim3 Gr, dim3 Bl, double alpha, + double* const * dst, const double* src, + MatrixDim src_dim); +void cudaF_add_to_rows_direct(dim3 Gr, dim3 Bl, float alpha, float* const * dst, + const float* src, MatrixDim src_dim); +void cudaD_add_vec2(dim3 Gr, dim3 Bl, double *mat, const double *vec, + const double alpha, int dim); +void cudaF_add_vec2(dim3 Gr, dim3 Bl, float* mat, const float* vec, + const float alpha, int dim); +void cudaD_add_vec_to_cols(dim3 Gr, dim3 Bl, double alpha, const double *col, + double beta, double *dst, MatrixDim d); +void cudaF_add_vec_to_cols(dim3 Gr, dim3 Bl, float alpha, const float *col, + float beta, float *dst, MatrixDim d); +void cudaD_add_vec_to_rows(dim3 Gr, dim3 Bl, double alpha, const double *row, + double beta, double *dst, MatrixDim d); +void cudaF_add_vec_to_rows(dim3 Gr, dim3 Bl, float alpha, const float *row, + float beta, float *dst, MatrixDim d); +void cudaD_add_vec_vec(int Gr, int Bl, double alpha, double* v, const double* x, + const double* y, double beta, int dim); void cudaF_add_vec_vec(int Gr, int Bl, float alpha, float* v, const float* x, const float* y, float beta, int dim); -void cudaF_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, - const float* mat, MatrixDim dmat, int dim); -void cudaF_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, - const float* mat, MatrixDim dmat, int dim); -void cudaF_vec_sum(int Gr, int Bl, float* v, float* value, int dim, int inc); -void cudaF_vec_copy_diag_from_packed(int Gr, int Bl, float *dst, - const float *src, int dim); -void cudaF_vec_apply_floor(int Gr, int Bl, float* v, float floor_val, - float* num, int dim); -void cudaF_vec_apply_ceiling(int Gr, int Bl, float* v, float ceiling_val, - float* num, int dim); -void cudaF_vec_apply_exp(int Gr, int Bl, float* v, int dim); -void cudaF_vec_apply_log(int Gr, int Bl, float* v, float* flag, int dim); -void cudaF_trace(int Gr, int Bl, float* mat, float* value, int dim); -void cudaF_invert_elements(dim3 Gr, dim3 Bl, float *data, MatrixDim d); -// Note: B_trans is nonzero if B is transposed. -void cudaF_add_mat_blockmat(dim3 Gr, dim3 Bl, float *data, MatrixDim d, - const float *Adata, int A_num_rows, int A_num_cols, - int A_row_stride, int A_col_stride, - const CuBlockMatrixData *B_cu_data, - int B_num_blocks, float alpha, float beta, - int B_trans); -void cudaF_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_data, - int num_blocks, const float *C_data, - int C_num_cols, int C_row_stride, int C_col_stride, - const float *D_data, int D_row_stride, - int D_col_stride, float alpha, float beta); -/* - * cu:: - */ -void cudaF_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, - MatrixDim d, int src_stride); -void cudaF_log_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, - MatrixDim y_dim, int x_stride); -void cudaF_soft_hinge(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, - int src_stride); -void cudaF_group_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, - int src_stride, int group_size, float power); -void cudaF_normalize_per_row(size_t Gr, size_t Bl, float *y, int y_stride, - const float *x, MatrixDim x_d, float tartget_rms, - bool add_log_stddev); -void cudaF_group_spec_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride, int group_size, - float power); -void cudaF_group_max(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, - int src_stride, int group_size); -void cudaF_sigmoid(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, - int src_stride); -void cudaF_heaviside(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, +void cudaD_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, double ceiling_val, + MatrixDim d); +void cudaF_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val, + MatrixDim d); +void cudaD_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); +void cudaF_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); +void cudaD_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, + MatrixDim d); +void cudaF_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val, + MatrixDim d); +void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); +void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); +void cudaD_apply_log(dim3 Gr, dim3 Bl, double *mat, MatrixDim d); +void cudaF_apply_log(dim3 Gr, dim3 Bl, float *mat, MatrixDim d); +void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, + bool include_sign, MatrixDim d); +void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, + bool include_sign, MatrixDim d); +void cudaD_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, MatrixDim d); +void cudaF_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim d); +void cudaD_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_data, + int num_blocks, const double *C_data, + int C_num_cols, int C_row_stride, int C_col_stride, + const double *D_data, int D_row_stride, + int D_col_stride, double alpha, double beta); +void cudaF_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_data, + int num_blocks, const float *C_data, + int C_num_cols, int C_row_stride, int C_col_stride, + const float *D_data, int D_row_stride, + int D_col_stride, float alpha, float beta); +void cudaD_calc_group_max_deriv(dim3 Gr, dim3 Bl, double *y, const double *x1, + const double *x2, MatrixDim y_dim, + int x1_stride, int x2_stride, int group_size); +void cudaF_calc_group_max_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1, + const float *x2, MatrixDim y_dim, int x1_stride, + int x2_stride, int group_size); +void cudaD_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement* x, int s, + const double* z, MatrixDim d, double* z2, + MatrixDim d2, double* t); +void cudaF_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement* x, int s, + const float* z, MatrixDim d, float* z2, MatrixDim d2, + float* t); +void cudaD_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, + const double* mat, MatrixDim dmat, int dim); +void cudaF_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, + const float* mat, MatrixDim dmat, int dim); +void cudaD_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, + const double* mat, MatrixDim dmat, int dim); +void cudaF_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, + const float* mat, MatrixDim dmat, int dim); +void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, + const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride); -void cudaF_diff_sigmoid(dim3 Gr, dim3 Bl, float *eout, const float *e, - const float *y, MatrixDim d, int e_stride, - int y_stride); -void cudaF_tanh(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, - int src_stride); -void cudaF_diff_tanh(dim3 Gr, dim3 Bl, float *eout, const float *e, - const float *y, MatrixDim d, int e_stride, int y_stride); -void cudaF_parametric_relu(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride, const float *a, - const float *b); -void cudaF_diff_parametric_relu(dim3 Gr, dim3 Bl, float *eout, const float *e, - const float *y, MatrixDim d, int e_stride, - int y_stride, const float *a, const float *b); - -void cudaF_regularize_l1(dim3 Gr, dim3 Bl, float *wei, float *grad, float l1, - float lr, MatrixDim d, int stride_grad); -void cudaF_find_row_max_id(dim3 Gr, dim3 Bl, const float *mat, float *vec_val, - int32_cuda *vec_id, MatrixDim d); -void cudaF_diff_xent(dim3 Gr, dim3 Bl, const int32_cuda *vec_tgt, - float *mat_net_out, float *vec_log_post, MatrixDim d); -void cudaF_diff_softmax(dim3 Gr, dim3 Bl, float* x, const MatrixDim dim, - const float* value, const int value_stride, - const float* diff, const int diff_stride); -void cudaF_copy_rows_from_vec(dim3 Gr, dim3 Bl, float *mat_out, MatrixDim d_out, +void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, + const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, + int src_stride); +void cudaD_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in); +void cudaF_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, MatrixDim d_out, const float *v_in); - -void cudaF_randomize(dim3 Gr, dim3 Bl, float *y, const float *x, - const int32_cuda *copy_from, MatrixDim d_out, - MatrixDim d_in); -void cudaF_splice(dim3 Gr, dim3 Bl, float *y, const float *x, - const int32_cuda *off, MatrixDim d_out, MatrixDim d_in); -void cudaF_diff_log_softmax(dim3 Gr, dim3 Bl, const MatrixDim in_deriv_dim, - const float* out_value, const int out_value_stride, - const float* out_deriv, const int out_deriv_stride, - float* in_deriv); -void cudaF_one(int Gr, int Bl, float* x, int dim); +void cudaD_copy(dim3 Gr, dim3 Bl, double *y, const double *x, + const int32_cuda *copy_from, MatrixDim d_out, MatrixDim d_in); void cudaF_copy(dim3 Gr, dim3 Bl, float *y, const float *x, const int32_cuda *copy_from, MatrixDim d_out, MatrixDim d_in); +void cuda_copy_from_mat_dd(dim3 Gr, dim3 Bl, double *mat_out, + const double* mat_in, MatrixDim d_out, + MatrixDim d_in); +void cuda_copy_from_mat_dd_trans(dim3 Gr, dim3 Bl, double *mat_out, + const double* mat_in, MatrixDim d_out, + MatrixDim d_in); +void cuda_copy_from_mat_df(dim3 Gr, dim3 Bl, double* mat_out, + const float* mat_in, MatrixDim d_out, + MatrixDim d_in); +void cuda_copy_from_mat_df_trans(dim3 Gr, dim3 Bl, double* mat_out, + const float* mat_in, MatrixDim d_out, + MatrixDim d_in); +void cuda_copy_from_mat_fd(dim3 Gr, dim3 Bl, float *mat_out, + const double* mat_in, MatrixDim d_out, + MatrixDim d_in); +void cuda_copy_from_mat_fd_trans(dim3 Gr, dim3 Bl, float *mat_out, + const double* mat_in, MatrixDim d_out, + MatrixDim d_in); +void cuda_copy_from_mat_ff(dim3 Gr, dim3 Bl, float* mat_out, + const float* mat_in, MatrixDim d_out, + MatrixDim d_in); +void cuda_copy_from_mat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, + const float* mat_in, MatrixDim d_out, + MatrixDim d_in); +void cuda_copy_from_smat_dd(dim3 Gr, dim3 Bl, double* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in); +void cuda_copy_from_smat_dd_trans(dim3 Gr, dim3 Bl, double* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in); +void cuda_copy_from_smat_df(dim3 Gr, dim3 Bl, double* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in); +void cuda_copy_from_smat_df_trans(dim3 Gr, dim3 Bl, double* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in); +void cuda_copy_from_smat_fd(dim3 Gr, dim3 Bl, float* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in); +void cuda_copy_from_smat_fd_trans(dim3 Gr, dim3 Bl, float* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in); +void cuda_copy_from_smat_ff(dim3 Gr, dim3 Bl, float* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in); +void cuda_copy_from_smat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in); +void cudaD_copy_from_sp(dim3 Gr, dim3 Bl, const double* x, double* y, + MatrixDim d_out); void cudaF_copy_from_sp(dim3 Gr, dim3 Bl, const float* x, float* y, MatrixDim d_out); -void cudaF_take_lower(dim3 Gr, dim3 Bl, const float* x, float* y, - MatrixDim d_in); -void cudaF_take_upper(dim3 Gr, dim3 Bl, const float* x, float* y, - MatrixDim d_in); -void cudaF_take_mean(dim3 Gr, dim3 Bl, const float* x, float* y, - MatrixDim d_in); -void cudaF_matrix_add_elements(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, - float alpha, MatrixElement* x, - int num_elements); -void cudaF_matrix_add_indexed_values(dim3 Gr, dim3 Bl, MatrixDim dim, - float alpha, const Int32Pair* indices, - const float* x, int s, float* data); -void cudaF_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement* x, int s, - const float* z, MatrixDim d, float* z2, MatrixDim d2, - float* t); -void cudaF_sy_add_tr2(dim3 Gr, dim3 Bl, float alpha, float beta, const float* T, - MatrixDim tdim, float *S, MatrixDim sdim); -void cudaF_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, - const float *src_data, MatrixDim src_dim, - const Int32Pair *indices); -void cudaF_add_row_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, - const float *src_data, MatrixDim src_dim, - const Int32Pair *indexes); -void cudaF_matrix_lookup(dim3 Gr, dim3 Bl, const float *data, MatrixDim dim, - const Int32Pair *indices, int indices_size, - float *output); - -void cudaF_equal_element_mask(dim3 Gr, dim3 Bl, const float *mat1, - const float *mat2, float *mask, - MatrixDim mat1_dim, int mat2_stride, - int mask_stride); - -/********************************************************* - * double CUDA kernel calls - */ - -/* - * CuMatrix - */ -void cudaD_copy_upp_low(dim3 Gr, dim3 Bl, double* A, MatrixDim dimB); -void cudaD_copy_low_upp(dim3 Gr, dim3 Bl, double* A, MatrixDim dimA); -void cudaD_add_diag_vec_mat(dim3 Gr, dim3 Bl, double alpha, double *mat, - MatrixDim mat_dim, const double *vec, - const double *mat2, int mat2_row_stride, - int mat2_col_stride, double beta); -void cudaD_copy_from_tp_trans(dim3 Gr, dim3 Bl, double* A, const double* B, - MatrixDim dmat); -void cudaDF_copy_from_tp_trans(dim3 Gr, dim3 Bl, double* A, const float* B, - MatrixDim dmat); void cudaD_copy_from_tp(dim3 Gr, dim3 Bl, double* A, const double* B, MatrixDim dmat); void cudaDF_copy_from_tp(dim3 Gr, dim3 Bl, double* A, const float* B, MatrixDim dmat); -void cudaD_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); -void cudaD_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, MatrixDim d); -void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, - bool include_sign, MatrixDim d); -void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); -void cudaD_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, - MatrixDim d); -void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, +void cudaFD_copy_from_tp(dim3 Gr, dim3 Bl, float* A, const double* B, + MatrixDim dmat); +void cudaF_copy_from_tp(dim3 Gr, dim3 Bl, float* A, const float* B, + MatrixDim dmat); +void cudaD_copy_from_tp_trans(dim3 Gr, dim3 Bl, double* A, const double* B, + MatrixDim dmat); +void cudaDF_copy_from_tp_trans(dim3 Gr, dim3 Bl, double* A, const float* B, + MatrixDim dmat); +void cudaFD_copy_from_tp_trans(dim3 Gr, dim3 Bl, float* A, const double* B, + MatrixDim dmat); +void cudaF_copy_from_tp_trans(dim3 Gr, dim3 Bl, float* A, const float* B, + MatrixDim dmat); +void cublas_copy_kaldi_df(int Gr, int Bl, int n, const double* x, int incx, + float* y, int incy); +void cublas_copy_kaldi_fd(int Gr, int Bl, int n, const float* x, int incx, + double* y, int incy); +void cudaD_copy_low_upp(dim3 Gr, dim3 Bl, double* A, MatrixDim dimA); +void cudaF_copy_low_upp(dim3 Gr, dim3 Bl, float* A, MatrixDim dimA); +void cudaD_copy_rows(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride); -void cudaD_add_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, - const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, - int src_stride); -void cudaD_copy_rows(dim3 Gr, dim3 Bl, double* dst, const double* src, +void cudaF_copy_rows(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride); void cudaD_copy_rows_direct(dim3 Gr, dim3 Bl, double* dst, const double* const * src, MatrixDim dst_dim); +void cudaF_copy_rows_direct(dim3 Gr, dim3 Bl, float* dst, + const float* const * src, MatrixDim dst_dim); +void cudaD_copy_rows_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in); +void cudaF_copy_rows_from_vec(dim3 Gr, dim3 Bl, float *mat_out, MatrixDim d_out, + const float *v_in); void cudaD_copy_to_rows_direct(dim3 Gr, dim3 Bl, double* const * dst, const double* src, MatrixDim src_dim); -void cudaD_add_rows(dim3 Gr, dim3 Bl, double alpha, double* dst, - const double* src, const MatrixIndexT_cuda* reorder, - MatrixDim dst_dim, int src_stride); -void cudaD_add_rows_direct(dim3 Gr, dim3 Bl, double alpha, double* dst, - const double* const * src, MatrixDim dst_dim); -void cudaD_add_to_rows_direct(dim3 Gr, dim3 Bl, double alpha, - double* const * dst, const double* src, - MatrixDim src_dim); -void cudaD_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, double ceiling_val, - MatrixDim d); -void cudaD_set_diag(int Gr, int Bl, double* mat, double value, MatrixDim d); -void cudaD_set_diag_packed(int Gr, int Bl, double* mat, double value, int dim); -void cudaD_add_diag_packed(int Gr, int Bl, double* mat, double value, int dim); -void cudaD_set_const(dim3 Gr, dim3 Bl, double *mat, double value, MatrixDim d); -void cudaD_set_zero_above_diag(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); -void cudaD_add(dim3 Gr, dim3 Bl, double *mat, double value, MatrixDim d); -void cudaD_add_vec2(dim3 Gr, dim3 Bl, double *mat, const double *vec, - const double alpha, int dim); -void cudaD_scale_diag_packed(int Gr, int Bl, double* mat, double value, - int dim); -void cudaD_scale(dim3 Gr, dim3 Bl, double *mat, double value, MatrixDim d); -void cudaD_apply_log(dim3 Gr, dim3 Bl, double *mat, MatrixDim d); -void cudaD_mul_elements(dim3 Gr, dim3 Bl, double *mat, const double *A, - MatrixDim dst_d, int src_stride); -void cudaD_div_elements(dim3 Gr, dim3 Bl, double *mat, const double *A, - MatrixDim dst_d, int src_stride); -void cudaD_max(dim3 Gr, dim3 Bl, double *mat, const double *A, MatrixDim dst_d, - int src_stride); -void cudaD_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, - MatrixDim d); -void cudaD_mul_rows_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, - MatrixDim d); -void cudaD_mul_rows_group_mat(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride, int group_size); +void cudaF_copy_to_rows_direct(dim3 Gr, dim3 Bl, float* const * dst, + const float* src, MatrixDim src_dim); +void cudaD_copy_upp_low(dim3 Gr, dim3 Bl, double* A, MatrixDim dimB); +void cudaF_copy_upp_low(dim3 Gr, dim3 Bl, float* A, MatrixDim dimA); void cudaD_diff_group_pnorm(dim3 Gr, dim3 Bl, double *id, const double *iv, const double *ov, const double* od, MatrixDim id_dim, int iv_stride, int ov_stride, int od_stride, int group_size, double power); -void cudaD_calc_group_max_deriv(dim3 Gr, dim3 Bl, double *y, const double *x1, - const double *x2, MatrixDim y_dim, - int x1_stride, int x2_stride, int group_size); +void cudaF_diff_group_pnorm(dim3 Gr, dim3 Bl, float *id, const float *iv, + const float *ov, const float* od, MatrixDim id_dim, + int iv_stride, int ov_stride, int od_stride, + int group_size, float power); +void cudaD_diff_log_softmax(dim3 Gr, dim3 Bl, const MatrixDim in_deriv_dim, + const double* out_value, const int out_value_stride, + const double* out_deriv, const int out_deriv_stride, + double* in_deriv); +void cudaF_diff_log_softmax(dim3 Gr, dim3 Bl, const MatrixDim in_deriv_dim, + const float* out_value, const int out_value_stride, + const float* out_deriv, const int out_deriv_stride, + float* in_deriv); +void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, + const int num_rows, const double* input, + const int in_stride, const double* params, + const int params_stride, + const double* output_deriv, + const int output_deriv_stride, + const double* deriv_sum_in, + const int deriv_sum_in_stride, + const double* self_repair_config, + double count, double* input_deriv, + const int input_deriv_stride, + double* params_deriv, + const int params_deriv_stride, + double* value_sum_out, + const int value_sum_out_stride, + double* deriv_sum_out, + const int deriv_sum_out_stride, + double* self_repair_sum_out, + const int self_repair_sum_out_stride); +void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, + const int num_rows, const float* input, + const int in_stride, const float* params, + const int params_stride, + const float* output_deriv, + const int output_deriv_stride, + const double* deriv_sum_in, + const int deriv_sum_in_stride, + const float* self_repair_config, double count, + float* input_deriv, + const int input_deriv_stride, + float* params_deriv, + const int params_deriv_stride, + double* value_sum_out, + const int value_sum_out_stride, + double* deriv_sum_out, + const int deriv_sum_out_stride, + float* self_repair_sum_out, + const int self_repair_sum_out_stride); +void cudaD_diff_normalize_per_row(size_t Gr, size_t Bl, double *id, + int id_stride, const double *iv, + MatrixDim iv_dim, const double* od, + int od_stride, double target_rms, + bool add_log_stddev); +void cudaF_diff_normalize_per_row(size_t Gr, size_t Bl, float *id, + int id_stride, const float *iv, + MatrixDim iv_dim, const float* od, + int od_stride, float target_rms, + bool add_log_stddev); +void cudaD_diff_parametric_relu(dim3 Gr, dim3 Bl, double *eout, const double *e, + const double *y, MatrixDim d, int e_stride, + int y_stride, const double *a, const double *b); +void cudaF_diff_parametric_relu(dim3 Gr, dim3 Bl, float *eout, const float *e, + const float *y, MatrixDim d, int e_stride, + int y_stride, const float *a, const float *b); +void cudaD_diff_sigmoid(dim3 Gr, dim3 Bl, double *eout, const double *e, + const double *y, MatrixDim d, int e_stride, + int y_stride); +void cudaF_diff_sigmoid(dim3 Gr, dim3 Bl, float *eout, const float *e, + const float *y, MatrixDim d, int e_stride, + int y_stride); +void cudaD_diff_softmax(dim3 Gr, dim3 Bl, double* x, const MatrixDim dim, + const double* value, const int value_stride, + const double* diff, const int diff_stride); +void cudaF_diff_softmax(dim3 Gr, dim3 Bl, float* x, const MatrixDim dim, + const float* value, const int value_stride, + const float* diff, const int diff_stride); +void cudaD_diff_tanh(dim3 Gr, dim3 Bl, double *eout, const double *e, + const double *y, MatrixDim d, int e_stride, int y_stride); +void cudaF_diff_tanh(dim3 Gr, dim3 Bl, float *eout, const float *e, + const float *y, MatrixDim d, int e_stride, int y_stride); +void cudaD_diff_xent(dim3 Gr, dim3 Bl, const int32_cuda *vec_tgt, + double *mat_net_out, double *vec_log_post, MatrixDim d); +void cudaF_diff_xent(dim3 Gr, dim3 Bl, const int32_cuda *vec_tgt, + float *mat_net_out, float *vec_log_post, MatrixDim d); +void cudaD_div_elements(dim3 Gr, dim3 Bl, double *mat, const double *A, + MatrixDim dst_d, int src_stride); +void cudaF_div_elements(dim3 Gr, dim3 Bl, float *mat, const float *A, + MatrixDim dst_d, int src_stride); void cudaD_div_rows_vec(dim3 Gr, dim3 Bl, double *mat, const double *vec_div, MatrixDim d); -void cudaD_add_mat(dim3 Gr, dim3 Bl, double alpha, const double *src, - double *dst, MatrixDim d, int src_stride, int A_trans); -void cudaD_add_mat_blocks(dim3 Gr, dim3 Bl, double alpha, const double *src, - int32_cuda num_row_blocks, int32_cuda num_col_blocks, - double *dst, MatrixDim d, int src_stride, - int A_trans); -void cudaD_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const double *A, - const double *B, const double *C, double *dst, - MatrixDim d, int stride_a, int stride_b, - int stride_c); -void cudaD_add_vec_to_cols(dim3 Gr, dim3 Bl, double alpha, const double *col, - double beta, double *dst, MatrixDim d); -void cudaD_add_vec_to_rows(dim3 Gr, dim3 Bl, double alpha, const double *row, - double beta, double *dst, MatrixDim d); -void cudaD_add_mat_diag_vec(dim3 Gr, dim3 Bl, double alpha, double *mat, - MatrixDim mat_dim, const double *mat2, - int mat2_row_stride, int mat2_col_stride, - const double *vec, double beta); -void cudaD_add_mat_mat_elements(dim3 Gr, dim3 Bl, double *data, - const double *srcA_data, - const double *srcB_data, MatrixDim dim, - int srcA_stride, int srcB_stride, double alpha, - double beta); - -/* - * CuVector - */ +void cudaF_div_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *vec_div, + MatrixDim d); +void cudaD_equal_element_mask(dim3 Gr, dim3 Bl, const double *mat1, + const double *mat2, double *mask, + MatrixDim mat1_dim, int mat2_stride, + int mask_stride); +void cudaF_equal_element_mask(dim3 Gr, dim3 Bl, const float *mat1, + const float *mat2, float *mask, + MatrixDim mat1_dim, int mat2_stride, + int mask_stride); +void cudaD_find_row_max_id(dim3 Gr, dim3 Bl, const double *mat, double *vec_val, + int32_cuda *vec_id, MatrixDim d); +void cudaF_find_row_max_id(dim3 Gr, dim3 Bl, const float *mat, float *vec_val, + int32_cuda *vec_id, MatrixDim d); +void cudaD_group_max(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, + int src_stride, int group_size); +void cudaF_group_max(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, + int src_stride, int group_size); +void cudaD_group_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride, int group_size, + double power); +void cudaF_group_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, + int src_stride, int group_size, float power); +void cudaD_group_spec_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride, int group_size, + double power); +void cudaF_group_spec_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride, int group_size, + float power); +void cudaD_heaviside(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, + int src_stride); +void cudaF_heaviside(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, + int src_stride); +void cuda_int32_add(dim3 Gr, dim3 Bl, int32_cuda *mat, int32_cuda value, + MatrixDim d); +void cuda_int32_set_const(dim3 Gr, dim3 Bl, int32_cuda *mat, int32_cuda value, + MatrixDim d); +void cudaD_invert_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim d); +void cudaF_invert_elements(dim3 Gr, dim3 Bl, float *data, MatrixDim d); +void cudaD_log_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, + MatrixDim y_dim, int x_stride); +void cudaF_log_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, + MatrixDim y_dim, int x_stride); +void cudaD_lstm_nonlinearity(dim3 Gr, dim3 Bl, const double* in, + const int in_stride, const double* params, + const int params_stride, const int out_stride, + const int cell_dim, const int have_dropout_mask, + const int num_rows, + double* out); +void cudaF_lstm_nonlinearity(dim3 Gr, dim3 Bl, const float* in, + const int in_stride, const float* params, + const int params_stride, const int out_stride, + const int cell_dim, const int have_dropout_mask, + const int num_rows, + float* out); +void cudaD_matrix_add_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, + double alpha, MatrixElement* x, + int num_elements); +void cudaF_matrix_add_elements(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, + float alpha, MatrixElement* x, + int num_elements); +void cudaD_matrix_add_indexed_values(dim3 Gr, dim3 Bl, MatrixDim dim, + double alpha, const Int32Pair* indices, + const double* x, int s, double* data); +void cudaF_matrix_add_indexed_values(dim3 Gr, dim3 Bl, MatrixDim dim, + float alpha, const Int32Pair* indices, + const float* x, int s, float* data); +void cudaD_matrix_lookup(dim3 Gr, dim3 Bl, const double *data, MatrixDim dim, + const Int32Pair *indices, int indices_size, + double *output); +void cudaF_matrix_lookup(dim3 Gr, dim3 Bl, const float *data, MatrixDim dim, + const Int32Pair *indices, int indices_size, + float *output); +void cudaD_max(dim3 Gr, dim3 Bl, double *mat, const double *A, MatrixDim dst_d, + int src_stride); +void cudaF_max(dim3 Gr, dim3 Bl, float *mat, const float *A, MatrixDim dst_d, + int src_stride); void cudaD_max_mat_cols(int Gr, int Bl, double* result, const double* mat, const MatrixDim d); +void cudaF_max_mat_cols(int Gr, int Bl, float* result, const float* mat, + const MatrixDim d); +void cudaD_min(dim3 Gr, dim3 Bl, double *mat, const double *other, + MatrixDim mat_d, int other_stride); +void cudaF_min(dim3 Gr, dim3 Bl, float *mat, const float *other, + MatrixDim mat_d, int other_stride); void cudaD_min_mat_cols(int Gr, int Bl, double* result, const double* mat, const MatrixDim d); -void cudaD_sum_mat_cols(int Gr, int Bl, double* result, const double* mat, +void cudaF_min_mat_cols(int Gr, int Bl, float* result, const float* mat, const MatrixDim d); +void cudaD_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, + MatrixDim d); +void cudaF_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, + MatrixDim d); +void cudaD_mul_elements(dim3 Gr, dim3 Bl, double *mat, const double *A, + MatrixDim dst_d, int src_stride); +void cudaF_mul_elements(dim3 Gr, dim3 Bl, float *mat, const float *A, + MatrixDim dst_d, int src_stride); +void cudaD_mul_rows_group_mat(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride, int group_size); +void cudaF_mul_rows_group_mat(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride, int group_size); +void cudaD_mul_rows_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, + MatrixDim d); +void cudaF_mul_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, + MatrixDim d); +void cudaD_normalize_per_row(size_t Gr, size_t Bl, double *y, int y_stride, + const double *x, MatrixDim x_d, double tartget_rms, + bool add_log_stddev); +void cudaF_normalize_per_row(size_t Gr, size_t Bl, float *y, int y_stride, + const float *x, MatrixDim x_d, float tartget_rms, + bool add_log_stddev); +void cudaD_one(int Gr, int Bl, double* x, int dim); +void cudaF_one(int Gr, int Bl, float* x, int dim); +void cudaD_parametric_relu(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride, const double *a, + const double *b); +void cudaF_parametric_relu(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride, const float *a, + const float *b); +void cudaD_randomize(dim3 Gr, dim3 Bl, double *y, const double *x, + const int32_cuda *copy_from, MatrixDim d_out, + MatrixDim d_in); +void cudaF_randomize(dim3 Gr, dim3 Bl, float *y, const float *x, + const int32_cuda *copy_from, MatrixDim d_out, + MatrixDim d_in); +void cudaD_regularize_l1(dim3 Gr, dim3 Bl, double *wei, double *grad, double l1, + double lr, MatrixDim d, int stride_grad); +void cudaF_regularize_l1(dim3 Gr, dim3 Bl, float *wei, float *grad, float l1, + float lr, MatrixDim d, int stride_grad); void cudaD_replace_value(int Gr, int Bl, double *v, int dim, double orig, double changed); +void cudaF_replace_value(int Gr, int Bl, float *v, int dim, float orig, + float changed); +void cudaD_scale_diag_packed(int Gr, int Bl, double* mat, double value, + int dim); +void cudaF_scale_diag_packed(int Gr, int Bl, float* mat, float value, int dim); +void cudaD_scale(dim3 Gr, dim3 Bl, double *mat, double value, MatrixDim d); +void cudaF_scale(dim3 Gr, dim3 Bl, float *mat, float value, MatrixDim d); void cudaD_set_bias_params(int Gr, int Bl, double* v, const double* a, double param_1, double param_2, double param_3, int* flag, int dim); -void cudaD_vec_mul_elements(int Gr, int Bl, double* v, const double* a, - int dim); -void cudaD_vec_soft_max(int Gr, int Bl, double* v, int dim); -void cudaD_vec_min(int Gr, int Bl, const double* v, double* value, int dim, - int inc); -void cudaD_vec_max(int Gr, int Bl, const double* v, double* value, int dim, - int inc); -void cudaD_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const double* A, - const double* B, MatrixDim dA, int B_stride, - double* value); -void cudaD_trace_mat_mat(dim3 Gr, dim3 Bl, const double* A, const double* B, - MatrixDim dA, int B_stride, double* value); -void cudaD_add_diag_mat_mat_MNT(int Gr, int Bl, const double alpha, - const double* M, const MatrixDim dim_M, - const double* N, const int stride_N, - const double beta, double* v); -void cudaD_add_diag_mat_mat_MTN(dim3 Gr, dim3 Bl, const double alpha, - const double* M, const int strid_M, - const double* N, const MatrixDim dim_N, - const double beta, double* v); -void cudaD_add_diag_mat_mat_MN(dim3 Gr, dim3 Bl, const double alpha, - const double* M, const int strid_M, - const double* N, const MatrixDim dim_N, - const double beta, double* v); -void cudaD_add_vec_vec(int Gr, int Bl, double alpha, double* v, const double* x, - const double* y, double beta, int dim); -void cudaD_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, - const double* mat, MatrixDim dmat, int dim); -void cudaD_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, - const double* mat, MatrixDim dmat, int dim); -void cudaD_vec_sum(int Gr, int Bl, double* v, double* value, int dim, int inc); -void cudaD_vec_copy_diag_from_packed(int Gr, int Bl, double *dst, - const double *src, int dim); -void cudaD_vec_apply_floor(int Gr, int Bl, double* v, double floor_val, - float* num, int dim); -void cudaD_vec_apply_ceiling(int Gr, int Bl, double* v, double ceiling_val, - float* num, int dim); -void cudaD_vec_apply_exp(int Gr, int Bl, double* v, int dim); -void cudaD_vec_apply_log(int Gr, int Bl, double* v, double* flag, int dim); -void cudaD_trace(int Gr, int Bl, double* mat, double* value, int dim); -void cudaD_invert_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim d); -// note: B_trans is nonzero if B is tranposed. -void cudaD_add_mat_blockmat(dim3 Gr, dim3 Bl, double *data, MatrixDim d, - const double *Adata, int A_num_rows, int A_num_cols, - int A_row_stride, int A_col_stride, - const CuBlockMatrixData *B_cu_data, - int B_num_blocks, double alpha, double beta, - int B_trans); -void cudaD_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_data, - int num_blocks, const double *C_data, - int C_num_cols, int C_row_stride, int C_col_stride, - const double *D_data, int D_row_stride, - int D_col_stride, double alpha, double beta); - -/* - * cu:: - */ -void cudaD_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, - MatrixDim d, int src_stride); -void cudaD_log_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, - MatrixDim y_dim, int x_stride); -void cudaD_soft_hinge(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, - int src_stride); -void cudaD_group_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride, int group_size, - double power); -void cudaD_normalize_per_row(size_t Gr, size_t Bl, double *y, int y_stride, - const double *x, MatrixDim x_d, double tartget_rms, - bool add_log_stddev); -void cudaD_group_spec_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride, int group_size, - double power); -void cudaD_group_max(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, - int src_stride, int group_size); +void cudaF_set_bias_params(int Gr, int Bl, float* v, const float* a, + float param_1, float param_2, float param_3, + int* flag, int dim); +void cudaD_set_const(dim3 Gr, dim3 Bl, double *mat, double value, MatrixDim d); +void cudaF_set_const(dim3 Gr, dim3 Bl, float *mat, float value, MatrixDim d); +void cudaD_set_diag(int Gr, int Bl, double* mat, double value, MatrixDim d); +void cudaF_set_diag(int Gr, int Bl, float* mat, float value, MatrixDim d); +void cudaD_set_diag_packed(int Gr, int Bl, double* mat, double value, int dim); +void cudaF_set_diag_packed(int Gr, int Bl, float* mat, float value, int dim); +void cudaD_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const double *A, + const double *B, const double *C, double *dst, + MatrixDim d, int stride_a, int stride_b, + int stride_c); +void cudaF_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const float *A, const float *B, + const float *C, float *dst, MatrixDim d, + int stride_a, int stride_b, int stride_c); +void cudaD_set_zero_above_diag(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); +void cudaF_set_zero_above_diag(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); void cudaD_sigmoid(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride); -void cudaD_heaviside(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, - int src_stride); -void cudaD_diff_sigmoid(dim3 Gr, dim3 Bl, double *eout, const double *e, - const double *y, MatrixDim d, int e_stride, - int y_stride); -void cudaD_tanh(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, - int src_stride); -void cudaD_diff_tanh(dim3 Gr, dim3 Bl, double *eout, const double *e, - const double *y, MatrixDim d, int e_stride, int y_stride); -void cudaD_parametric_relu(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride, const double *a, - const double *b); -void cudaD_diff_parametric_relu(dim3 Gr, dim3 Bl, double *eout, const double *e, - const double *y, MatrixDim d, int e_stride, - int y_stride, const double *a, const double *b); - -void cudaD_regularize_l1(dim3 Gr, dim3 Bl, double *wei, double *grad, double l1, - double lr, MatrixDim d, int stride_grad); -void cudaD_find_row_max_id(dim3 Gr, dim3 Bl, const double *mat, double *vec_val, - int32_cuda *vec_id, MatrixDim d); -void cudaD_diff_xent(dim3 Gr, dim3 Bl, const int32_cuda *vec_tgt, - double *mat_net_out, double *vec_log_post, MatrixDim d); -void cudaD_diff_softmax(dim3 Gr, dim3 Bl, double* x, const MatrixDim dim, - const double* value, const int value_stride, - const double* diff, const int diff_stride); -void cudaD_copy_rows_from_vec(dim3 Gr, dim3 Bl, double *mat_out, - MatrixDim d_out, const double *v_in); - -void cudaD_randomize(dim3 Gr, dim3 Bl, double *y, const double *x, - const int32_cuda *copy_from, MatrixDim d_out, - MatrixDim d_in); +void cudaF_sigmoid(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, + int src_stride); +void cudaD_soft_hinge(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, + int src_stride); +void cudaF_soft_hinge(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, + int src_stride); +void cudaD_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, + MatrixDim d, int src_stride); +void cudaF_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, + MatrixDim d, int src_stride); void cudaD_splice(dim3 Gr, dim3 Bl, double *y, const double *x, const int32_cuda *off, MatrixDim d_out, MatrixDim d_in); -void cudaD_diff_log_softmax(dim3 Gr, dim3 Bl, const MatrixDim in_deriv_dim, - const double* out_value, const int out_value_stride, - const double* out_deriv, const int out_deriv_stride, - double* in_deriv); -void cudaD_one(int Gr, int Bl, double* x, int dim); -void cudaD_copy(dim3 Gr, dim3 Bl, double *y, const double *x, - const int32_cuda *copy_from, MatrixDim d_out, MatrixDim d_in); -void cudaD_copy_from_sp(dim3 Gr, dim3 Bl, const double* x, double* y, - MatrixDim d_out); +void cudaF_splice(dim3 Gr, dim3 Bl, float *y, const float *x, + const int32_cuda *off, MatrixDim d_out, MatrixDim d_in); +void cudaD_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, + const double *src_data, MatrixDim src_dim, + const Int32Pair *indices); +void cudaF_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, + const float *src_data, MatrixDim src_dim, + const Int32Pair *indices); +void cudaD_sum_mat_cols(int Gr, int Bl, double* result, const double* mat, + const MatrixDim d); +void cudaF_sum_mat_cols(int Gr, int Bl, float* result, const float* mat, + const MatrixDim d); +void cudaD_sy_add_tr2(dim3 Gr, dim3 Bl, double alpha, double beta, + const double* T, MatrixDim tdim, double *S, + MatrixDim sdim); +void cudaF_sy_add_tr2(dim3 Gr, dim3 Bl, float alpha, float beta, const float* T, + MatrixDim tdim, float *S, MatrixDim sdim); void cudaD_take_lower(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in); -void cudaD_take_upper(dim3 Gr, dim3 Bl, const double* x, double* y, +void cudaF_take_lower(dim3 Gr, dim3 Bl, const float* x, float* y, MatrixDim d_in); void cudaD_take_mean(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in); - -// some mostly mixed-type kernels. -void cuda_copy_from_mat_df(dim3 Gr, dim3 Bl, double* mat_out, - const float* mat_in, MatrixDim d_out, - MatrixDim d_in); -void cuda_copy_from_mat_ff(dim3 Gr, dim3 Bl, float* mat_out, - const float* mat_in, MatrixDim d_out, - MatrixDim d_in); -void cuda_copy_from_mat_fd(dim3 Gr, dim3 Bl, float *mat_out, - const double* mat_in, MatrixDim d_out, - MatrixDim d_in); -void cuda_copy_from_mat_dd(dim3 Gr, dim3 Bl, double *mat_out, - const double* mat_in, MatrixDim d_out, - MatrixDim d_in); -void cuda_copy_from_mat_df_trans(dim3 Gr, dim3 Bl, double* mat_out, - const float* mat_in, MatrixDim d_out, - MatrixDim d_in); -void cuda_copy_from_mat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, - const float* mat_in, MatrixDim d_out, - MatrixDim d_in); -void cuda_copy_from_mat_fd_trans(dim3 Gr, dim3 Bl, float *mat_out, - const double* mat_in, MatrixDim d_out, - MatrixDim d_in); -void cuda_copy_from_mat_dd_trans(dim3 Gr, dim3 Bl, double *mat_out, - const double* mat_in, MatrixDim d_out, - MatrixDim d_in); - -void cuda_copy_from_smat_ff(dim3 Gr, dim3 Bl, float* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in); -void cuda_copy_from_smat_fd(dim3 Gr, dim3 Bl, float* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in); -void cuda_copy_from_smat_df(dim3 Gr, dim3 Bl, double* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in); -void cuda_copy_from_smat_dd(dim3 Gr, dim3 Bl, double* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in); -void cuda_copy_from_smat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in); -void cuda_copy_from_smat_fd_trans(dim3 Gr, dim3 Bl, float* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in); -void cuda_copy_from_smat_df_trans(dim3 Gr, dim3 Bl, double* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in); -void cuda_copy_from_smat_dd_trans(dim3 Gr, dim3 Bl, double* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in); - -void cudaF_trace_mat_smat(dim3 Gr, dim3 Bl, const float* mat_in, - const MatrixElement* smat_in, - MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, - float* trace_vec_out); -void cudaF_trace_mat_smat_trans(dim3 Gr, dim3 Bl, const float* mat_in, - const MatrixElement* smat_in, - MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, - float* trace_vec_out); +void cudaF_take_mean(dim3 Gr, dim3 Bl, const float* x, float* y, + MatrixDim d_in); +void cudaD_take_upper(dim3 Gr, dim3 Bl, const double* x, double* y, + MatrixDim d_in); +void cudaF_take_upper(dim3 Gr, dim3 Bl, const float* x, float* y, + MatrixDim d_in); +void cudaD_tanh(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, + int src_stride); +void cudaF_tanh(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, + int src_stride); +void cudaD_trace(int Gr, int Bl, double* mat, double* value, int dim); +void cudaF_trace(int Gr, int Bl, float* mat, float* value, int dim); +void cudaD_trace_mat_mat(dim3 Gr, dim3 Bl, const double* A, const double* B, + MatrixDim dA, int B_stride, double* value); +void cudaF_trace_mat_mat(dim3 Gr, dim3 Bl, const float* A, const float* B, + MatrixDim dA, int B_stride, float* value); +void cudaD_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const double* A, + const double* B, MatrixDim dA, int B_stride, + double* value); +void cudaF_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const float* A, const float* B, + MatrixDim dA, int B_stride, float* value); void cudaD_trace_mat_smat(dim3 Gr, dim3 Bl, const double* mat_in, const MatrixElement* smat_in, MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, double* trace_vec_out); +void cudaF_trace_mat_smat(dim3 Gr, dim3 Bl, const float* mat_in, + const MatrixElement* smat_in, + MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, + float* trace_vec_out); void cudaD_trace_mat_smat_trans(dim3 Gr, dim3 Bl, const double* mat_in, const MatrixElement* smat_in, MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, double* trace_vec_out); - -void cudaD_matrix_add_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, - double alpha, MatrixElement* x, - int num_elements); -void cudaD_matrix_add_indexed_values(dim3 Gr, dim3 Bl, MatrixDim dim, - double alpha, const Int32Pair* indices, - const double* x, int s, double* data); -void cudaD_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement* x, int s, - const double* z, MatrixDim d, double* z2, - MatrixDim d2, double* t); - -void cudaD_sy_add_tr2(dim3 Gr, dim3 Bl, double alpha, double beta, - const double* T, MatrixDim tdim, double *S, - MatrixDim sdim); -void cudaD_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, - const double *src_data, MatrixDim src_dim, - const Int32Pair *indices); -void cudaD_add_row_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, - const double *src_data, MatrixDim src_dim, - const Int32Pair *indexes); -void cudaD_matrix_lookup(dim3 Gr, dim3 Bl, const double *data, MatrixDim dim, - const Int32Pair *indices, int indices_size, - double *output); - -void cudaD_equal_element_mask(dim3 Gr, dim3 Bl, const double *mat1, - const double *mat2, double *mask, - MatrixDim mat1_dim, int mat2_stride, - int mask_stride); - -void cudaD_lstm_nonlinearity(dim3 Gr, dim3 Bl, const double* in, - const int in_stride, const double* params, - const int params_stride, const int out_stride, - const int cell_dim, const int num_rows, - double* out); -void cudaF_lstm_nonlinearity(dim3 Gr, dim3 Bl, const float* in, - const int in_stride, const float* params, - const int params_stride, const int out_stride, - const int cell_dim, const int num_rows, - float* out); -void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, - const int num_rows, const double* input, - const int in_stride, const double* params, - const int params_stride, - const double* output_deriv, - const int output_deriv_stride, - const double* deriv_sum_in, - const int deriv_sum_in_stride, - const double* self_repair_config, - double count, double* input_deriv, - const int input_deriv_stride, - double* params_deriv, - const int params_deriv_stride, - double* value_sum_out, - const int value_sum_out_stride, - double* deriv_sum_out, - const int deriv_sum_out_stride, - double* self_repair_sum_out, - const int self_repair_sum_out_stride); -void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, - const int num_rows, const float* input, - const int in_stride, const float* params, - const int params_stride, - const float* output_deriv, - const int output_deriv_stride, - const double* deriv_sum_in, - const int deriv_sum_in_stride, - const float* self_repair_config, double count, - float* input_deriv, - const int input_deriv_stride, - float* params_deriv, - const int params_deriv_stride, - double* value_sum_out, - const int value_sum_out_stride, - double* deriv_sum_out, - const int deriv_sum_out_stride, - float* self_repair_sum_out, - const int self_repair_sum_out_stride); - - +void cudaF_trace_mat_smat_trans(dim3 Gr, dim3 Bl, const float* mat_in, + const MatrixElement* smat_in, + MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, + float* trace_vec_out); +void cudaD_vec_apply_ceiling(int Gr, int Bl, double* v, double ceiling_val, + float* num, int dim); +void cudaF_vec_apply_ceiling(int Gr, int Bl, float* v, float ceiling_val, + float* num, int dim); +void cudaD_vec_apply_exp(int Gr, int Bl, double* v, int dim); +void cudaF_vec_apply_exp(int Gr, int Bl, float* v, int dim); +void cudaD_vec_apply_floor(int Gr, int Bl, double* v, double floor_val, + float* num, int dim); +void cudaF_vec_apply_floor(int Gr, int Bl, float* v, float floor_val, + float* num, int dim); +void cudaD_vec_apply_log(int Gr, int Bl, double* v, double* flag, int dim); +void cudaF_vec_apply_log(int Gr, int Bl, float* v, float* flag, int dim); +void cudaD_vec_copy_diag_from_packed(int Gr, int Bl, double *dst, + const double *src, int dim); +void cudaF_vec_copy_diag_from_packed(int Gr, int Bl, float *dst, + const float *src, int dim); +void cudaD_vec_max(int Gr, int Bl, const double* v, double* value, int dim, + int inc); +void cudaF_vec_max(int Gr, int Bl, const float* v, float* value, int dim, + int inc); +void cudaD_vec_min(int Gr, int Bl, const double* v, double* value, int dim, + int inc); +void cudaF_vec_min(int Gr, int Bl, const float* v, float* value, int dim, + int inc); +void cudaD_vec_mul_elements(int Gr, int Bl, double* v, const double* a, + int dim); +void cudaF_vec_mul_elements(int Gr, int Bl, float* v, const float* a, int dim); +void cudaD_vec_soft_max(int Gr, int Bl, double* v, int dim); +void cudaF_vec_soft_max(int Gr, int Bl, float* v, int dim); +void cudaD_vec_sum(int Gr, int Bl, double* v, double* value, int dim, int inc); +void cudaF_vec_sum(int Gr, int Bl, float* v, float* value, int dim, int inc); } // extern "C" diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 795b4321413..6df0e5af9db 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -41,7 +41,7 @@ static Real _sum_reduce(Real buffer[]) { __syncthreads(); // perform tree-based reduction (sum) while (nTotalThreads > 1) { - int32_cuda halfPoint = ((1 + nTotalThreads) >> 1); // divide by two + int32_cuda halfPoint = ((1 + nTotalThreads) >> 1); // divide by two // only the first half of the threads will be active. if (threadIdx.x >= halfPoint) { // was < // Get the shared value stored by another thread @@ -52,7 +52,7 @@ static Real _sum_reduce(Real buffer[]) { buffer[threadIdx.x - halfPoint] += temp; } __syncthreads(); - nTotalThreads = ((1 + nTotalThreads) >> 1); // divide by two. + nTotalThreads = ((1 + nTotalThreads) >> 1); // divide by two. } // the result return buffer[0]; @@ -385,6 +385,20 @@ static void _max(Real* mat, const Real* A, MatrixDim dst_d, int src_stride) { } } +template +__global__ +static void _min(Real* mat, const Real* other, MatrixDim mat_d, + int other_stride) { + int32_cuda j = blockIdx.x * blockDim.x + threadIdx.x; + int32_cuda i = blockIdx.y * blockDim.y + threadIdx.y; + int32_cuda mat_index = i * mat_d.stride + j; + int32_cuda other_index = i * other_stride + j; + if (j < mat_d.cols && i < mat_d.rows) { + Real a = mat[mat_index], b = other[other_index]; + mat[mat_index] = fmin(a, b); + } +} + template __global__ static void _vec_mul_elements(Real* v, const Real* a, int dim) { @@ -745,6 +759,18 @@ static void _copy_rows_from_vec(Real* m_out, MatrixDim d, const Real* v_in) { } } +// This kernel writes a copy of the vector "v_in" to each col of the matrix +// "m_out". the dimension of v_in should be equal to the #row of m_out. +template +__global__ +static void _copy_cols_from_vec(Real* m_out, MatrixDim d, const Real* v_in) { + int i = blockIdx.y * blockDim.y + threadIdx.y; // row id + int j = blockIdx.x * blockDim.x + threadIdx.x; // col id + if (i < d.rows && j < d.cols) { + m_out[i * d.stride + j] = v_in[i]; + } +} + // _trace_mat_mat reduce the partial sum to // value[blockIdx.y * gridDim.x + blockIdx.x] // It use shared mem to transpose matrix B to ensure coalesced memory access @@ -1194,7 +1220,7 @@ static void _equal_element_mask(const Real *mat1, const Real *mat2, Real *mask, } enum EnumTransformReduce { - SUM, MAX, MIN, LINFNORM, L2NORM, L1NORM, L0NORM, LPNORM + SUMAB, SUM, MAX, MIN, LINFNORM, L2NORM, L1NORM, L0NORM, LPNORM }; template @@ -1217,6 +1243,35 @@ struct TransReduceOp { } }; +template +struct TransReduceOp { + const Real alpha_; + const Real beta_; + TransReduceOp(const Real& a, const Real& b) : + alpha_(a), beta_(b) { + } + __forceinline__ + __device__ Real InitValue() const { + return Real(0); + } + __forceinline__ + __device__ Real Transform(const Real& x) const { + return x; + } + __forceinline__ + __device__ Real Reduce(const Real& a, const Real& b) const { + return a + b; + } + __forceinline__ + __device__ Real PostReduce(const Real& x, const Real& output) const { + if (beta_ == Real(0)) { + return alpha_ * x; + } else { + return alpha_ * x + beta_ * output; + } + } +}; + template struct TransReduceOp { __forceinline__ @@ -2266,7 +2321,7 @@ static void _normalize_per_row(Real *y, int y_stride, const Real *x, } } - const Real kSquaredNormFloor = 1.35525271560688e-20; // 2^-66 + const Real kSquaredNormFloor = 1.3552527156068805425e-20; // 2^-66 if (tid == 0) { ssum[0] = sqrt( fmax(ssum[0] / (target_rms * target_rms * x_d.cols), kSquaredNormFloor)); @@ -2289,6 +2344,87 @@ static void _normalize_per_row(Real *y, int y_stride, const Real *x, } +template +__global__ +static void _diff_normalize_per_row(Real *id, int id_stride, const Real *iv, + MatrixDim iv_dim, const Real* od, + int od_stride, Real target_rms, + bool add_log_stddev) { + + const Real kSquaredNormFloor = 1.3552527156068805425e-20; // 2^-66 + const Real kInvNormFloor = 8589934592.0; + + const int tid = threadIdx.x; + const int i = blockIdx.x; + const Real* iv_row = iv + i * iv_dim.stride; + const Real* od_row = od + i * od_stride; + + // reduce to CU1DBLOCK elements per row + Real dot_products = Real(0); + Real in_norm = Real(0); + for (int j = tid; j < iv_dim.cols; j += CU1DBLOCK) { + const Real iv_ij = iv_row[j]; + dot_products += iv_ij * od_row[j]; + in_norm += iv_ij * iv_ij; + } + __shared__ Real sprod[CU1DBLOCK]; + __shared__ Real snorm[CU1DBLOCK]; + sprod[tid] = dot_products; + snorm[tid] = in_norm; + __syncthreads(); + + // reduce to 2x warpSize elements per row +# pragma unroll + for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { + if (tid < shift) { + sprod[tid] += sprod[tid + shift]; + snorm[tid] += snorm[tid + shift]; + } + __syncthreads(); + } + + // reduce to 1 element per row + if (tid < warpSize) { +# pragma unroll + for (int shift = warpSize; shift > 0; shift >>= 1) { + sprod[tid] += sprod[tid + shift]; + snorm[tid] += snorm[tid + shift]; + } + } + + // broadcast the sum results + __syncthreads(); + dot_products = sprod[0]; + in_norm = snorm[0]; + + Real log_stddev_deriv; + if (add_log_stddev) { + log_stddev_deriv = Real(1) / max(in_norm, iv_dim.cols * kSquaredNormFloor) + * od_row[iv_dim.cols]; + } + + const Real inv_d_scaled = Real(1) / (iv_dim.cols * target_rms * target_rms); + in_norm = Real(1) / sqrt(max(in_norm * inv_d_scaled, kSquaredNormFloor)); + + const Real f = in_norm == kInvNormFloor ? Real(0) : in_norm; + dot_products *= f * f * f * inv_d_scaled; + + for (int j = tid; j < iv_dim.cols; j += CU1DBLOCK) { + const Real iv_ij = iv_row[j]; + Real id_ij = id[i * id_stride + j]; + if (add_log_stddev) { + id_ij += log_stddev_deriv * iv_ij; + } + if (id != od) { + id_ij += in_norm * od_row[j]; + } else { + id_ij *= in_norm; + } + id_ij -= dot_products * iv_ij; + id[i * id_stride + j] = id_ij; + } +} + // Per-row log-softmax operation on 'x', with writing to 'y'. // note, x and y may point to the same memory. This is equivalent to setting // matrix y to matrix x and then, for each row of y, subtracting the offset that @@ -2710,6 +2846,9 @@ static void _diff_log_softmax(const MatrixDim in_deriv_dim, consecutive blocks, each of dimension cell_dim, which we name: (i_part, f_part, c_part, o_part, c_{t-1}). + If 'have_dropout_mask' is nonzero, each row of + 'in' will have 3 extra elements, interpreted + as dropout masks/scales for i_t, f_t and o_t. @param [in] params A matrix, of dimension 3 by cell_dim, with rows containing the 3 diagonal parameter matrices used in LSTMs, namely @@ -2734,7 +2873,8 @@ __global__ static void _lstm_nonlinearity(const Real* in, const int in_stride, const Real* params, const int params_stride, const int out_stride, const int cell_dim, - const int num_rows, Real* out) { + const int have_dropout_mask, const int num_rows, + Real* out) { const int tid = threadIdx.x; const int i = blockIdx.x; const Real* i_part = in + i * in_stride; @@ -2747,15 +2887,18 @@ static void _lstm_nonlinearity(const Real* in, const int in_stride, const Real* w_oc = params + params_stride * 2; Real* c_t = out + i * out_stride; Real* m_t = out + i * out_stride + cell_dim; + Real i_scale = (have_dropout_mask ? in[i * in_stride + cell_dim * 5] : 1), + f_scale = (have_dropout_mask ? in[i * in_stride + cell_dim * 5 + 1] : 1), + o_scale = (have_dropout_mask ? in[i * in_stride + cell_dim * 5 + 2] : 1); for (int j = tid; j < cell_dim; j += CU1DBLOCK) { Real c_tm1_j = c_tm1[j]; Real i_t_j = Real(1) / (Real(1) + exp(-i_part[j] - w_ic[j] * c_tm1_j)); Real f_t_j = Real(1) / (Real(1) + exp(-f_part[j] - w_fc[j] * c_tm1_j)); - Real c_t_j = f_t_j * c_tm1_j + i_t_j * tanh(c_part[j]); + Real c_t_j = f_t_j * f_scale * c_tm1_j + i_t_j * i_scale * tanh(c_part[j]); Real o_t_j = Real(1) / (Real(1) + exp(-o_part[j] - w_oc[j] * c_t_j)); c_t[j] = c_t_j; - m_t[j] = o_t_j * tanh(c_t_j); + m_t[j] = o_t_j * o_scale * tanh(c_t_j); } } @@ -2780,6 +2923,9 @@ static void _lstm_nonlinearity(const Real* in, const int in_stride, a multiple of 5). The column-space is interpreted as 5 consecutive blocks, each of dimension C, which we name: (i_part, f_part, c_part, o_part, c_{t-1}). + If 'have_dropout_mask' is nonzero, each row of + 'in' will have 3 extra elements, interpreted + as dropout masks/scales for i_t, f_t and o_t. @param [in] params The same as in ComputeLstmNonlinearity(). A matrix, of dimension 3 by C, with rows containing the three diagonal parameter matrices used in LSTMs, namely @@ -2852,7 +2998,8 @@ static void _lstm_nonlinearity(const Real* in, const int in_stride, */ template __global__ -static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, +static void _diff_lstm_nonlinearity(const int cell_dim, const int have_dropout_mask, + const int num_rows, const Real* input, const int input_stride, const Real* params, const int params_stride, const Real* output_deriv, @@ -2906,6 +3053,7 @@ static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, const Real o_t_self_repair = (update_sr[3] ? sr_config[8] : 0); const Real c_t_self_repair = (update_sr[4] ? sr_config[9] : 0); + for (int i = i0; i < num_rows; i += grid_stride) { const Real i_part = input[i * input_stride + j]; const Real f_part = input[i * input_stride + j + cell_dim]; @@ -2913,10 +3061,19 @@ static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, const Real o_part = input[i * input_stride + j + 3 * cell_dim]; const Real c_prev = input[i * input_stride + j + 4 * cell_dim]; - const Real i_t = 1 / (1 + exp(-i_part - w_ic * c_prev)); - const Real f_t = 1 / (1 + exp(-f_part - w_fc * c_prev)); + + const Real i_scale = (have_dropout_mask ? + input[i * input_stride + cell_dim * 5] : 1), + f_scale = (have_dropout_mask ? + input[i * input_stride + cell_dim * 5 + 1] :1), + o_scale = (have_dropout_mask ? + input[i * input_stride + cell_dim * 5 + 2] :1); + + + const Real i_t = Real(1) / (1 + exp(-i_part - w_ic * c_prev)); + const Real f_t = Real(1) / (1 + exp(-f_part - w_fc * c_prev)); const Real tanh_c_part = tanh(c_part); - const Real c_t = f_t * c_prev + i_t * tanh_c_part; + const Real c_t = f_t * f_scale * c_prev + i_t * i_scale * tanh_c_part; const Real o_t = 1 / (1 + exp(-o_part - w_oc * c_t)); const Real tanh_c_t = tanh(c_t); @@ -2943,20 +3100,20 @@ static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, const Real dc_t_out = output_deriv[i * output_deriv_stride + j]; const Real dm_t = output_deriv[i * output_deriv_stride + j + cell_dim]; - const Real dtanh_c_t = o_t * dm_t; - const Real do_t = tanh_c_t * dm_t; + const Real dtanh_c_t = o_t * o_scale * dm_t; + const Real do_t = o_scale * tanh_c_t * dm_t; const Real do_t_input = (o_t_deriv * do_t - (2 * o_t - 1) * o_t_self_repair); const Real dc_t = (c_t_deriv * dtanh_c_t + dc_t_out + do_t_input * w_oc) - tanh_c_t * c_t_self_repair; - const Real dtanh_c_part = i_t * dc_t; - const Real df_t = dc_t * c_prev; + const Real dtanh_c_part = i_t * i_scale * dc_t; + const Real df_t = dc_t * f_scale * c_prev; const Real df_t_input = (df_t * f_t_deriv - - (2 * f_t - 1) * f_t_self_repair); - const Real di_t = dc_t * tanh_c_part; + - (2 * f_t - 1) * f_t_self_repair); + const Real di_t = dc_t * i_scale * tanh_c_part; const Real di_t_input = (di_t * i_t_deriv - - (2 * i_t - 1) * i_t_self_repair); + - (2 * i_t - 1) * i_t_self_repair); if (params_deriv) { w_ic_deriv_sum += c_prev * di_t_input; @@ -2964,7 +3121,7 @@ static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, w_oc_deriv_sum += c_t * do_t_input; } - const Real dc_prev = w_ic * di_t_input + w_fc * df_t_input + f_t * dc_t; + const Real dc_prev = w_ic * di_t_input + w_fc * df_t_input + f_t * f_scale * dc_t; const Real do_part = do_t_input; const Real dc_part = (c_part_deriv * dtanh_c_part - tanh_c_part * c_part_self_repair); @@ -3338,6 +3495,11 @@ void cudaF_max(dim3 Gr, dim3 Bl, float* mat, const float* A, MatrixDim dst_d, _max<<>>(mat,A,dst_d,src_stride); } +void cudaF_min(dim3 Gr, dim3 Bl, float* mat, const float* other, + MatrixDim mat_d, int other_stride) { + _min<<>>(mat,other,mat_d,other_stride); +} + void cudaF_mul_cols_vec(dim3 Gr, dim3 Bl, float* mat, const float* scale, MatrixDim d) { _mul_cols_vec<<>>(mat,scale,d); @@ -3458,6 +3620,12 @@ void cudaF_sum_mat_cols(int Gr, int Bl, float* result, const float* mat, _transform_reduce_mat_cols<<>>(result,mat,d, TransReduceOp()); } +void cudaF_add_col_sum_mat(int Gr, int Bl, float* result, const float* mat, + const MatrixDim d, const float alpha, + const float beta) { + _transform_reduce_mat_cols<<>>(result, mat, d, + TransReduceOp(alpha, beta)); +} void cudaF_replace_value(int Gr, int Bl, float *v, int dim, float orig, float changed) { @@ -3987,6 +4155,11 @@ void cudaD_max(dim3 Gr, dim3 Bl, double* mat, const double* A, MatrixDim dst_d, _max<<>>(mat,A,dst_d,src_stride); } +void cudaD_min(dim3 Gr, dim3 Bl, double* mat, const double* other, MatrixDim mat_d, + int other_stride) { + _min<<>>(mat,other,mat_d,other_stride); +} + void cudaD_mul_cols_vec(dim3 Gr, dim3 Bl, double* mat, const double* scale, MatrixDim d) { _mul_cols_vec<<>>(mat,scale,d); @@ -4108,6 +4281,12 @@ void cudaD_sum_mat_cols(int Gr, int Bl, double* result, const double* mat, _transform_reduce_mat_cols<<>>(result,mat,d, TransReduceOp()); } +void cudaD_add_col_sum_mat(int Gr, int Bl, double* result, const double* mat, + const MatrixDim d, const double alpha, + const double beta) { + _transform_reduce_mat_cols<<>>(result, mat, d, + TransReduceOp(alpha, beta)); +} void cudaD_replace_value(int Gr, int Bl, double *v, int dim, double orig, double changed) { @@ -4579,20 +4758,23 @@ void cudaD_trace_mat_smat_trans(dim3 Gr, dim3 Bl, const double* mat_in, void cudaD_lstm_nonlinearity(dim3 Gr, dim3 Bl, const double* in, const int in_stride, const double* params, const int params_stride, const int out_stride, - const int cell_dim, const int num_rows, - double* out) { - _lstm_nonlinearity<<>>(in, in_stride, params, params_stride, - out_stride, cell_dim, num_rows, out); + const int cell_dim, const int have_dropout_mask, + const int num_rows, double* out) { + _lstm_nonlinearity<<>>( + in, in_stride, params, params_stride, + out_stride, cell_dim, have_dropout_mask, num_rows, out); } void cudaF_lstm_nonlinearity(dim3 Gr, dim3 Bl, const float* in, const int in_stride, const float* params, const int params_stride, const int out_stride, - const int cell_dim, const int num_rows, - float* out) { - _lstm_nonlinearity<<>>(in, in_stride, params, params_stride, - out_stride, cell_dim, num_rows, out); + const int cell_dim, const int have_dropout_mask, + const int num_rows, float* out) { + _lstm_nonlinearity<<>>( + in, in_stride, params, params_stride, + out_stride, cell_dim, have_dropout_mask, num_rows, out); } void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, const int num_rows, const double* input, const int input_stride, const double* params, const int params_stride, @@ -4611,7 +4793,8 @@ void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, const int deriv_sum_out_stride, double* self_repair_sum_out, const int self_repair_sum_out_stride) { - _diff_lstm_nonlinearity<<>>(cell_dim, num_rows, input, + _diff_lstm_nonlinearity<<>>( + cell_dim, have_dropout_mask, num_rows, input, input_stride, params, params_stride, output_deriv, output_deriv_stride, deriv_sum_in, deriv_sum_in_stride, self_repair_config, count, input_deriv, input_deriv_stride, params_deriv, params_deriv_stride, value_sum_out, @@ -4619,6 +4802,7 @@ void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, self_repair_sum_out, self_repair_sum_out_stride); } void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, const int num_rows, const float* input, const int input_stride, const float* params, const int params_stride, @@ -4637,10 +4821,38 @@ void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, const int deriv_sum_out_stride, float* self_repair_sum_out, const int self_repair_sum_out_stride) { - _diff_lstm_nonlinearity<<>>(cell_dim, num_rows, input, + _diff_lstm_nonlinearity<<>>( + cell_dim, have_dropout_mask, num_rows, input, input_stride, params, params_stride, output_deriv, output_deriv_stride, deriv_sum_in, deriv_sum_in_stride, self_repair_config, count, input_deriv, input_deriv_stride, params_deriv, params_deriv_stride, value_sum_out, value_sum_out_stride, deriv_sum_out, deriv_sum_out_stride, self_repair_sum_out, self_repair_sum_out_stride); } + + +void cudaD_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in) { + _copy_cols_from_vec<<>>(mat_out, d_out, v_in); +} +void cudaF_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, MatrixDim d_out, + const float *v_in) { + _copy_cols_from_vec<<>>(mat_out, d_out, v_in); +} + +void cudaF_diff_normalize_per_row(size_t Gr, size_t Bl, float *id, + int id_stride, const float *iv, + MatrixDim iv_dim, const float* od, + int od_stride, float target_rms, + bool add_log_stddev) { + _diff_normalize_per_row<<>>(id, id_stride, iv, iv_dim, od, od_stride, + target_rms, add_log_stddev); +} +void cudaD_diff_normalize_per_row(size_t Gr, size_t Bl, double *id, + int id_stride, const double *iv, + MatrixDim iv_dim, const double* od, + int od_stride, double target_rms, + bool add_log_stddev) { + _diff_normalize_per_row<<>>(id, id_stride, iv, iv_dim, od, od_stride, + target_rms, add_log_stddev); +} diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 9e9910d6f56..d2a79f471c8 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -38,15 +38,76 @@ namespace kaldi { -/* - * CuMatrix - */ - -inline void cuda_copy_upp_low(dim3 Gr, dim3 Bl, float* A, MatrixDim dimA) { - cudaF_copy_upp_low(Gr, Bl, A, dimA); +inline void cuda_add_col_sum_mat(int Gr, int Bl, double* result, + const double* mat, const MatrixDim d, + const double alpha, const double beta) { + cudaD_add_col_sum_mat(Gr, Bl, result, mat, d, alpha, beta); } -inline void cuda_copy_low_upp(dim3 Gr, dim3 Bl, float* A, MatrixDim dimA) { - cudaF_copy_low_upp(Gr, Bl, A, dimA); +inline void cuda_add_col_sum_mat(int Gr, int Bl, float* result, + const float* mat, const MatrixDim d, + const float alpha, const float beta) { + cudaF_add_col_sum_mat(Gr, Bl, result, mat, d, alpha, beta); +} +inline void cuda_add_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, + const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, + int src_stride) { + cudaD_add_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride); +} +inline void cuda_add_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, + const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, + int src_stride) { + cudaF_add_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride); +} +inline void cuda_add_diag_mat_mat_MN(dim3 Gr, dim3 Bl, const double alpha, + const double* M, const int stride_M, + const double* N, const MatrixDim dim_N, + const double beta, double* v) { + cudaD_add_diag_mat_mat_MN(Gr, Bl, alpha, M, stride_M, N, dim_N, beta, v); +} +inline void cuda_add_diag_mat_mat_MN(dim3 Gr, dim3 Bl, const float alpha, + const float* M, const int stride_M, + const float* N, const MatrixDim dim_N, + const float beta, float* v) { + cudaF_add_diag_mat_mat_MN(Gr, Bl, alpha, M, stride_M, N, dim_N, beta, v); +} +inline void cuda_add_diag_mat_mat_MNT(int Gr, int Bl, const double alpha, + const double* M, const MatrixDim dim_M, + const double* N, const int stride_N, + const double beta, double* v) { + cudaD_add_diag_mat_mat_MNT(Gr, Bl, alpha, M, dim_M, N, stride_N, beta, v); +} +inline void cuda_add_diag_mat_mat_MNT(int Gr, int Bl, const float alpha, + const float* M, const MatrixDim dim_M, + const float* N, const int stride_N, + const float beta, float* v) { + cudaF_add_diag_mat_mat_MNT(Gr, Bl, alpha, M, dim_M, N, stride_N, beta, v); +} +inline void cuda_add_diag_mat_mat_MTN(dim3 Gr, dim3 Bl, const double alpha, + const double* M, const int stride_M, + const double* N, const MatrixDim dim_N, + const double beta, double* v) { + cudaD_add_diag_mat_mat_MTN(Gr, Bl, alpha, M, stride_M, N, dim_N, beta, v); +} +inline void cuda_add_diag_mat_mat_MTN(dim3 Gr, dim3 Bl, const float alpha, + const float* M, const int stride_M, + const float* N, const MatrixDim dim_N, + const float beta, float* v) { + cudaF_add_diag_mat_mat_MTN(Gr, Bl, alpha, M, stride_M, N, dim_N, beta, v); +} +inline void cuda_add_diag_packed(int Gr, int Bl, double* mat, double value, + int dim) { + cudaD_add_diag_packed(Gr, Bl, mat, value, dim); +} +inline void cuda_add_diag_packed(int Gr, int Bl, float* mat, float value, + int dim) { + cudaF_add_diag_packed(Gr, Bl, mat, value, dim); +} +inline void cuda_add_diag_vec_mat(dim3 Gr, dim3 Bl, double alpha, double *mat, + MatrixDim mat_dim, const double *vec, + const double *mat2, int mat2_row_stride, + int mat2_col_stride, double beta) { + cudaD_add_diag_vec_mat(Gr, Bl, alpha, mat, mat_dim, vec, mat2, + mat2_row_stride, mat2_col_stride, beta); } inline void cuda_add_diag_vec_mat(dim3 Gr, dim3 Bl, float alpha, float *mat, MatrixDim mat_dim, const float *vec, @@ -55,269 +116,248 @@ inline void cuda_add_diag_vec_mat(dim3 Gr, dim3 Bl, float alpha, float *mat, cudaF_add_diag_vec_mat(Gr, Bl, alpha, mat, mat_dim, vec, mat2, mat2_row_stride, mat2_col_stride, beta); } -inline void cuda_copy_from_tp_trans(dim3 Gr, dim3 Bl, float* A, const float* B, - MatrixDim dmat) { - cudaF_copy_from_tp_trans(Gr, Bl, A, B, dmat); +inline void cuda_add(dim3 Gr, dim3 Bl, double *mat, double value, MatrixDim d) { + cudaD_add(Gr, Bl, mat, value, d); } -inline void cuda_copy_from_tp_trans(dim3 Gr, dim3 Bl, float* A, const double* B, - MatrixDim dmat) { - cudaFD_copy_from_tp_trans(Gr, Bl, A, B, dmat); +inline void cuda_add(dim3 Gr, dim3 Bl, float *mat, float value, MatrixDim d) { + cudaF_add(Gr, Bl, mat, value, d); } -inline void cuda_copy_from_tp(dim3 Gr, dim3 Bl, float* A, const float* B, - MatrixDim dmat) { - cudaF_copy_from_tp(Gr, Bl, A, B, dmat); +inline void cuda_add_mat_blockmat(dim3 Gr, dim3 Bl, double *data, MatrixDim d, + const double *Adata, int A_num_rows, + int A_num_cols, int A_row_stride, + int A_col_stride, + const CuBlockMatrixData *B_cu_data, + int B_num_blocks, double alpha, double beta, + int B_trans) { + cudaD_add_mat_blockmat(Gr, Bl, data, d, Adata, A_num_rows, A_num_cols, + A_row_stride, A_col_stride, B_cu_data, B_num_blocks, + alpha, beta, B_trans); } -inline void cuda_copy_from_tp(dim3 Gr, dim3 Bl, float* A, const double* B, - MatrixDim dmat) { - cudaFD_copy_from_tp(Gr, Bl, A, B, dmat); +inline void cuda_add_mat_blockmat(dim3 Gr, dim3 Bl, float *data, MatrixDim d, + const float *Adata, int A_num_rows, + int A_num_cols, int A_row_stride, + int A_col_stride, + const CuBlockMatrixData *B_cu_data, + int B_num_blocks, float alpha, float beta, + int B_trans) { + cudaF_add_mat_blockmat(Gr, Bl, data, d, Adata, A_num_rows, A_num_cols, + A_row_stride, A_col_stride, B_cu_data, B_num_blocks, + alpha, beta, B_trans); } - -inline void cuda_copy_from_mat(dim3 Gr, dim3 Bl, float* mat_out, - const double* mat_in, MatrixDim d_out, - MatrixDim d_in) { - cuda_copy_from_mat_fd(Gr, Bl, mat_out, mat_in, d_out, d_in); +inline void cuda_add_mat_blocks(dim3 Gr, dim3 Bl, double alpha, + const double *src, int32_cuda num_row_blocks, + int32_cuda num_col_blocks, double *dst, + MatrixDim d, int src_stride, int A_trans) { + cudaD_add_mat_blocks(Gr, Bl, alpha, src, num_row_blocks, num_col_blocks, dst, + d, src_stride, A_trans); } -inline void cuda_copy_from_mat(dim3 Gr, dim3 Bl, float* mat_out, - const float* mat_in, MatrixDim d_out, - MatrixDim d_in) { - cuda_copy_from_mat_ff(Gr, Bl, mat_out, mat_in, d_out, d_in); +inline void cuda_add_mat_blocks(dim3 Gr, dim3 Bl, float alpha, const float *src, + int32_cuda num_row_blocks, + int32_cuda num_col_blocks, float *dst, + MatrixDim d, int src_stride, int A_trans) { + cudaF_add_mat_blocks(Gr, Bl, alpha, src, num_row_blocks, num_col_blocks, dst, + d, src_stride, A_trans); } -inline void cuda_copy_from_mat(dim3 Gr, dim3 Bl, double* mat_out, - const double* mat_in, MatrixDim d_out, - MatrixDim d_in) { - cuda_copy_from_mat_dd(Gr, Bl, mat_out, mat_in, d_out, d_in); +inline void cuda_add_mat_diag_vec(dim3 Gr, dim3 Bl, double alpha, double *mat, + MatrixDim mat_dim, const double *mat2, + int mat2_row_stride, int mat2_col_stride, + const double *vec, double beta) { + cudaD_add_mat_diag_vec(Gr, Bl, alpha, mat, mat_dim, mat2, mat2_row_stride, + mat2_col_stride, vec, beta); } -inline void cuda_copy_from_mat(dim3 Gr, dim3 Bl, double* mat_out, - const float* mat_in, MatrixDim d_out, - MatrixDim d_in) { - cuda_copy_from_mat_df(Gr, Bl, mat_out, mat_in, d_out, d_in); +inline void cuda_add_mat_diag_vec(dim3 Gr, dim3 Bl, float alpha, float *mat, + MatrixDim mat_dim, const float *mat2, + int mat2_row_stride, int mat2_col_stride, + const float *vec, float beta) { + cudaF_add_mat_diag_vec(Gr, Bl, alpha, mat, mat_dim, mat2, mat2_row_stride, + mat2_col_stride, vec, beta); } - -inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, float* mat_out, - const double* mat_in, MatrixDim d_out, - MatrixDim d_in) { - cuda_copy_from_mat_fd_trans(Gr, Bl, mat_out, mat_in, d_out, d_in); +inline void cuda_add_mat(dim3 Gr, dim3 Bl, double alpha, const double *src, + double *dst, MatrixDim d, int src_stride, + int A_trans) { + cudaD_add_mat(Gr, Bl, alpha, src, dst, d, src_stride, A_trans); } -inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, float* mat_out, - const float* mat_in, MatrixDim d_out, - MatrixDim d_in) { - cuda_copy_from_mat_ff_trans(Gr, Bl, mat_out, mat_in, d_out, d_in); +inline void cuda_add_mat(dim3 Gr, dim3 Bl, float alpha, const float *src, + float *dst, MatrixDim d, int src_stride, int A_trans) { + cudaF_add_mat(Gr, Bl, alpha, src, dst, d, src_stride, A_trans); } -inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, double* mat_out, - const double* mat_in, MatrixDim d_out, - MatrixDim d_in) { - cuda_copy_from_mat_dd_trans(Gr, Bl, mat_out, mat_in, d_out, d_in); +inline void cuda_add_mat_mat_elements(dim3 Gr, dim3 Bl, double *data, + const double *srcA_data, + const double *srcB_data, MatrixDim dim, + int srcA_stride, int srcB_stride, + double alpha, double beta) { + cudaD_add_mat_mat_elements(Gr, Bl, data, srcA_data, srcB_data, dim, + srcA_stride, srcB_stride, alpha, beta); } -inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, double* mat_out, - const float* mat_in, MatrixDim d_out, - MatrixDim d_in) { - cuda_copy_from_mat_df_trans(Gr, Bl, mat_out, mat_in, d_out, d_in); +inline void cuda_add_mat_mat_elements(dim3 Gr, dim3 Bl, float *data, + const float *srcA_data, + const float *srcB_data, MatrixDim dim, + int srcA_stride, int srcB_stride, + float alpha, float beta) { + cudaF_add_mat_mat_elements(Gr, Bl, data, srcA_data, srcB_data, dim, + srcA_stride, srcB_stride, alpha, beta); } - -inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, float* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in) { - cuda_copy_from_smat_ff(Gr, Bl, mat_out, smat_in, d_out, d_in); +inline void cuda_add_row_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, + const double *src_data, MatrixDim src_dim, + const Int32Pair *indexes) { + cudaD_add_row_ranges(Gr, Bl, data, dim, src_data, src_dim, indexes); } -inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, float* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in) { - cuda_copy_from_smat_fd(Gr, Bl, mat_out, smat_in, d_out, d_in); +inline void cuda_add_row_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, + const float *src_data, MatrixDim src_dim, + const Int32Pair *indexes) { + cudaF_add_row_ranges(Gr, Bl, data, dim, src_data, src_dim, indexes); } -inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, double* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in) { - cuda_copy_from_smat_df(Gr, Bl, mat_out, smat_in, d_out, d_in); +inline void cuda_add_rows(dim3 Gr, dim3 Bl, double alpha, double* dst, + const double* const * src, MatrixDim dst_dim) { + cudaD_add_rows_direct(Gr, Bl, alpha, dst, src, dst_dim); } -inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, double* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in) { - cuda_copy_from_smat_dd(Gr, Bl, mat_out, smat_in, d_out, d_in); +inline void cuda_add_rows(dim3 Gr, dim3 Bl, double alpha, double* dst, + const double* src, const MatrixIndexT_cuda* reorder, + MatrixDim dst_dim, int src_stride) { + cudaD_add_rows(Gr, Bl, alpha, dst, src, reorder, dst_dim, src_stride); } - -inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, float* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in) { - cuda_copy_from_smat_ff_trans(Gr, Bl, mat_out, smat_in, d_out, d_in); +inline void cuda_add_rows(dim3 Gr, dim3 Bl, float alpha, float* dst, + const float* const * src, MatrixDim dst_dim) { + cudaF_add_rows_direct(Gr, Bl, alpha, dst, src, dst_dim); } -inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, float* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in) { - cuda_copy_from_smat_fd_trans(Gr, Bl, mat_out, smat_in, d_out, d_in); +inline void cuda_add_rows(dim3 Gr, dim3 Bl, float alpha, float* dst, + const float* src, const MatrixIndexT_cuda* reorder, + MatrixDim dst_dim, int src_stride) { + cudaF_add_rows(Gr, Bl, alpha, dst, src, reorder, dst_dim, src_stride); } -inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, double* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in) { - cuda_copy_from_smat_df_trans(Gr, Bl, mat_out, smat_in, d_out, d_in); +inline void cuda_add_to_rows(dim3 Gr, dim3 Bl, double alpha, + double* const * dst, const double* src, + MatrixDim src_dim) { + cudaD_add_to_rows_direct(Gr, Bl, alpha, dst, src, src_dim); } -inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, double* mat_out, - const MatrixElement* smat_in, - MatrixDim d_out, MatrixIndexT_cuda d_in) { - cuda_copy_from_smat_dd_trans(Gr, Bl, mat_out, smat_in, d_out, d_in); +inline void cuda_add_to_rows(dim3 Gr, dim3 Bl, float alpha, float* const * dst, + const float* src, MatrixDim src_dim) { + cudaF_add_to_rows_direct(Gr, Bl, alpha, dst, src, src_dim); } - -inline void cuda_trace_mat_smat(dim3 Gr, dim3 Bl, const float* mat_in, - const MatrixElement* smat_in, - MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, - float* trace_vec_out) { - cudaF_trace_mat_smat(Gr, Bl, mat_in, smat_in, mat_d_in, smat_d_in, - trace_vec_out); +inline void cuda_add_vec2(dim3 Gr, dim3 Bl, double *mat, const double *vec, + const double alpha, int dim) { + cudaD_add_vec2(Gr, Bl, mat, vec, alpha, dim); } -inline void cuda_trace_mat_smat_trans(dim3 Gr, dim3 Bl, const float* mat_in, - const MatrixElement* smat_in, - MatrixDim mat_d_in, - MatrixIndexT_cuda smat_d_in, - float* trace_vec_out) { - cudaF_trace_mat_smat_trans(Gr, Bl, mat_in, smat_in, mat_d_in, smat_d_in, - trace_vec_out); +inline void cuda_add_vec2(dim3 Gr, dim3 Bl, float *mat, const float *vec, + const float alpha, int dim) { + cudaF_add_vec2(Gr, Bl, mat, vec, alpha, dim); } -inline void cuda_trace_mat_smat(dim3 Gr, dim3 Bl, const double* mat_in, - const MatrixElement* smat_in, - MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, - double* trace_vec_out) { - cudaD_trace_mat_smat(Gr, Bl, mat_in, smat_in, mat_d_in, smat_d_in, - trace_vec_out); +inline void cuda_add_vec_to_cols(dim3 Gr, dim3 Bl, double alpha, + const double *col, double beta, double *dst, + MatrixDim d) { + cudaD_add_vec_to_cols(Gr, Bl, alpha, col, beta, dst, d); } -inline void cuda_trace_mat_smat_trans(dim3 Gr, dim3 Bl, const double* mat_in, - const MatrixElement* smat_in, - MatrixDim mat_d_in, - MatrixIndexT_cuda smat_d_in, - double* trace_vec_out) { - cudaD_trace_mat_smat_trans(Gr, Bl, mat_in, smat_in, mat_d_in, smat_d_in, - trace_vec_out); +inline void cuda_add_vec_to_cols(dim3 Gr, dim3 Bl, float alpha, + const float *col, float beta, float *dst, + MatrixDim d) { + cudaF_add_vec_to_cols(Gr, Bl, alpha, col, beta, dst, d); } - -inline void cuda_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) { - cudaF_apply_exp(Gr, Bl, mat, d); +inline void cuda_add_vec_to_rows(dim3 Gr, dim3 Bl, double alpha, + const double *row, double beta, double *dst, + MatrixDim d) { + cudaD_add_vec_to_rows(Gr, Bl, alpha, row, beta, dst, d); } -inline void cuda_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, - MatrixDim dim) { - cudaF_apply_pow(Gr, Bl, mat, power, dim); +inline void cuda_add_vec_to_rows(dim3 Gr, dim3 Bl, float alpha, + const float *row, float beta, float *dst, + MatrixDim d) { + cudaF_add_vec_to_rows(Gr, Bl, alpha, row, beta, dst, d); } -inline void cuda_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, - bool include_sign, MatrixDim dim) { - cudaF_apply_pow_abs(Gr, Bl, mat, power, include_sign, dim); +inline void cuda_add_vec_vec(int Gr, int Bl, double alpha, double* v, + const double* x, const double* y, double beta, + int dim) { + cudaD_add_vec_vec(Gr, Bl, alpha, v, x, y, beta, dim); } -inline void cuda_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim dim) { - cudaF_apply_heaviside(Gr, Bl, mat, dim); +inline void cuda_add_vec_vec(int Gr, int Bl, float alpha, float* v, + const float* x, const float* y, float beta, + int dim) { + cudaF_add_vec_vec(Gr, Bl, alpha, v, x, y, beta, dim); } -inline void cuda_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val, - MatrixDim dim) { - cudaF_apply_floor(Gr, Bl, mat, floor_val, dim); +inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, + double ceiling_val, MatrixDim dim) { + cudaD_apply_ceiling(Gr, Bl, mat, ceiling_val, dim); } inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val, MatrixDim dim) { cudaF_apply_ceiling(Gr, Bl, mat, ceiling_val, dim); } -inline void cuda_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, - const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, - int src_stride) { - cudaF_copy_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride); -} -inline void cuda_add_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, - const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, - int src_stride) { - cudaF_add_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride); -} -inline void cuda_copy_rows(dim3 Gr, dim3 Bl, float* dst, const float* src, - const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, - int src_stride) { - cudaF_copy_rows(Gr, Bl, dst, src, reorder, dst_dim, src_stride); -} -inline void cuda_copy_rows(dim3 Gr, dim3 Bl, float* dst, - const float* const * src, MatrixDim dst_dim) { - cudaF_copy_rows_direct(Gr, Bl, dst, src, dst_dim); -} -inline void cuda_copy_to_rows(dim3 Gr, dim3 Bl, float* const * dst, - const float* src, MatrixDim src_dim) { - cudaF_copy_to_rows_direct(Gr, Bl, dst, src, src_dim); -} -inline void cuda_add_rows(dim3 Gr, dim3 Bl, float alpha, float* dst, - const float* src, const MatrixIndexT_cuda* reorder, - MatrixDim dst_dim, int src_stride) { - cudaF_add_rows(Gr, Bl, alpha, dst, src, reorder, dst_dim, src_stride); -} -inline void cuda_add_rows(dim3 Gr, dim3 Bl, float alpha, float* dst, - const float* const * src, MatrixDim dst_dim) { - cudaF_add_rows_direct(Gr, Bl, alpha, dst, src, dst_dim); -} -inline void cuda_add_to_rows(dim3 Gr, dim3 Bl, float alpha, float* const * dst, - const float* src, MatrixDim src_dim) { - cudaF_add_to_rows_direct(Gr, Bl, alpha, dst, src, src_dim); -} -inline void cuda_trace(int Gr, int Bl, float* mat, float* value, int dim) { - cudaF_trace(Gr, Bl, mat, value, dim); -} -inline void cuda_set_diag(int Gr, int Bl, float* mat, float value, - MatrixDim d) { - cudaF_set_diag(Gr, Bl, mat, value, d); -} -inline void cuda_set_diag_packed(int Gr, int Bl, float* mat, float value, - int dim) { - cudaF_set_diag_packed(Gr, Bl, mat, value, dim); -} -inline void cuda_add_diag_packed(int Gr, int Bl, float* mat, float value, - int dim) { - cudaF_add_diag_packed(Gr, Bl, mat, value, dim); +inline void cuda_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) { + cudaD_apply_exp(Gr, Bl, mat, d); } -inline void cuda_set_const(dim3 Gr, dim3 Bl, float *mat, float value, - MatrixDim d) { - cudaF_set_const(Gr, Bl, mat, value, d); +inline void cuda_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) { + cudaF_apply_exp(Gr, Bl, mat, d); } -inline void cuda_set_zero_above_diag(dim3 Gr, dim3 Bl, float* mat, - MatrixDim d) { - cudaF_set_zero_above_diag(Gr, Bl, mat, d); +inline void cuda_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, + MatrixDim dim) { + cudaD_apply_floor(Gr, Bl, mat, floor_val, dim); } -inline void cuda_add(dim3 Gr, dim3 Bl, float *mat, float value, MatrixDim d) { - cudaF_add(Gr, Bl, mat, value, d); +inline void cuda_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val, + MatrixDim dim) { + cudaF_apply_floor(Gr, Bl, mat, floor_val, dim); } -inline void cuda_add_vec2(dim3 Gr, dim3 Bl, float *mat, const float *vec, - const float alpha, int dim) { - cudaF_add_vec2(Gr, Bl, mat, vec, alpha, dim); +inline void cuda_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim dim) { + cudaD_apply_heaviside(Gr, Bl, mat, dim); } -inline void cuda_scale_diag_packed(int Gr, int Bl, float* mat, float value, - int dim) { - cudaF_scale_diag_packed(Gr, Bl, mat, value, dim); +inline void cuda_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim dim) { + cudaF_apply_heaviside(Gr, Bl, mat, dim); } -inline void cuda_scale(dim3 Gr, dim3 Bl, float *mat, float value, MatrixDim d) { - cudaF_scale(Gr, Bl, mat, value, d); +inline void cuda_apply_log(dim3 Gr, dim3 Bl, double *mat, MatrixDim d) { + cudaD_apply_log(Gr, Bl, mat, d); } inline void cuda_apply_log(dim3 Gr, dim3 Bl, float *mat, MatrixDim d) { cudaF_apply_log(Gr, Bl, mat, d); } -inline void cuda_mul_elements(dim3 Gr, dim3 Bl, float *mat, const float *A, - MatrixDim dst_d, int src_stride) { - cudaF_mul_elements(Gr, Bl, mat, A, dst_d, src_stride); +inline void cuda_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, + bool include_sign, MatrixDim dim) { + cudaD_apply_pow_abs(Gr, Bl, mat, power, include_sign, dim); } -inline void cuda_div_elements(dim3 Gr, dim3 Bl, float *mat, const float *A, - MatrixDim dst_d, int src_stride) { - cudaF_div_elements(Gr, Bl, mat, A, dst_d, src_stride); +inline void cuda_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, + bool include_sign, MatrixDim dim) { + cudaF_apply_pow_abs(Gr, Bl, mat, power, include_sign, dim); } -inline void cuda_max(dim3 Gr, dim3 Bl, float *mat, const float *A, - MatrixDim dst_d, int src_stride) { - cudaF_max(Gr, Bl, mat, A, dst_d, src_stride); +inline void cuda_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, + MatrixDim dim) { + cudaD_apply_pow(Gr, Bl, mat, power, dim); } -inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, - MatrixDim d) { - cudaF_mul_cols_vec(Gr, Bl, mat, scale, d); +inline void cuda_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, + MatrixDim dim) { + cudaF_apply_pow(Gr, Bl, mat, power, dim); } -inline void cuda_mul_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, - MatrixDim d) { - cudaF_mul_rows_vec(Gr, Bl, mat, scale, d); +inline cublasStatus_t cuda_axpy(cublasHandle_t handle, int n, double alpha, + const double *x, int incx, double *y, + int incy) { + return cublasDaxpy_v2(handle, n, &alpha, x, incx, y, incy); } -inline void cuda_mul_rows_group_mat(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride, - int group_size) { - cudaF_mul_rows_group_mat(Gr, Bl, y, x, d, src_stride, group_size); +inline cublasStatus_t cuda_axpy(cublasHandle_t handle, int n, float alpha, + const float *x, int incx, float *y, int incy) { + return cublasSaxpy_v2(handle, n, &alpha, x, incx, y, incy); } - -inline void cuda_diff_group_pnorm(dim3 Gr, dim3 Bl, float *id, const float *iv, - const float *ov, const float* od, - MatrixDim id_dim, int iv_stride, - int ov_stride, int od_stride, int group_size, - float power) { - cudaF_diff_group_pnorm(Gr, Bl, id, iv, ov, od, id_dim, iv_stride, ov_stride, - od_stride, group_size, power); +inline void cuda_block_add_mat_mat(dim3 Gr, dim3 Bl, + CuBlockMatrixData *B_cu_data, int num_blocks, + const double *C_data, int C_num_cols, + int C_row_stride, int C_col_stride, + const double *D_data, int D_row_stride, + int D_col_stride, double alpha, + double beta) { + cudaD_block_add_mat_mat(Gr, Bl, B_cu_data, num_blocks, C_data, C_num_cols, + C_row_stride, C_col_stride, D_data, D_row_stride, + D_col_stride, alpha, beta); +} +inline void cuda_block_add_mat_mat(dim3 Gr, dim3 Bl, + CuBlockMatrixData *B_cu_data, int num_blocks, + const float *C_data, int C_num_cols, + int C_row_stride, int C_col_stride, + const float *D_data, int D_row_stride, + int D_col_stride, float alpha, float beta) { + cudaF_block_add_mat_mat(Gr, Bl, B_cu_data, num_blocks, C_data, C_num_cols, + C_row_stride, C_col_stride, D_data, D_row_stride, + D_col_stride, alpha, beta); +} +inline void cuda_calc_group_max_deriv(dim3 Gr, dim3 Bl, double *y, + const double *x1, const double *x2, + MatrixDim y_dim, int x1_stride, + int x2_stride, int group_size) { + cudaD_calc_group_max_deriv(Gr, Bl, y, x1, x2, y_dim, x1_stride, x2_stride, + group_size); } inline void cuda_calc_group_max_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1, const float *x2, @@ -326,280 +366,255 @@ inline void cuda_calc_group_max_deriv(dim3 Gr, dim3 Bl, float *y, cudaF_calc_group_max_deriv(Gr, Bl, y, x1, x2, y_dim, x1_stride, x2_stride, group_size); } -inline void cuda_add_mat(dim3 Gr, dim3 Bl, float alpha, const float *src, - float *dst, MatrixDim d, int src_stride, int A_trans) { - cudaF_add_mat(Gr, Bl, alpha, src, dst, d, src_stride, A_trans); +inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement* x, + int32 size, const double* z, MatrixDim d, + double* z2, MatrixDim d2, double* t) { + cudaD_comp_obj_deriv(Gr, Bl, x, size, z, d, z2, d2, t); } -inline void cuda_add_mat_blocks(dim3 Gr, dim3 Bl, float alpha, const float *src, - int32_cuda num_row_blocks, - int32_cuda num_col_blocks, float *dst, - MatrixDim d, int src_stride, int A_trans) { - cudaF_add_mat_blocks(Gr, Bl, alpha, src, num_row_blocks, num_col_blocks, dst, - d, src_stride, A_trans); +inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement* x, + int32 size, const float* z, MatrixDim d, + float* z2, MatrixDim d2, float* t) { + cudaF_comp_obj_deriv(Gr, Bl, x, size, z, d, z2, d2, t); } -inline void cuda_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const float *A, - const float *B, const float *C, float *dst, - MatrixDim d, int stride_a, int stride_b, - int stride_c) { - cudaF_set_mat_mat_div_mat(Gr, Bl, A, B, C, dst, d, stride_a, stride_b, - stride_c); +inline void cuda_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, + const double* mat, MatrixDim dmat, + int dim) { + cudaD_copy_col_from_mat_df(Gr, Bl, v, col, mat, dmat, dim); } -inline void cuda_add_vec_to_cols(dim3 Gr, dim3 Bl, float alpha, - const float *col, float beta, float *dst, - MatrixDim d) { - cudaF_add_vec_to_cols(Gr, Bl, alpha, col, beta, dst, d); +inline void cuda_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, + const float* mat, MatrixDim dmat, + int dim) { + cudaF_copy_col_from_mat_df(Gr, Bl, v, col, mat, dmat, dim); } -inline void cuda_add_vec_to_rows(dim3 Gr, dim3 Bl, float alpha, - const float *row, float beta, float *dst, - MatrixDim d) { - cudaF_add_vec_to_rows(Gr, Bl, alpha, row, beta, dst, d); -} -inline void cuda_sy_add_tr2(dim3 Gr, dim3 Bl, float alpha, float beta, - const float* T, MatrixDim tdim, float *S, - MatrixDim sdim) { - cudaF_sy_add_tr2(Gr, Bl, alpha, beta, T, tdim, S, sdim); +inline void cuda_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, + const double* mat, MatrixDim dmat, + int dim) { + cudaD_copy_col_from_mat_fd(Gr, Bl, v, col, mat, dmat, dim); } -inline void cuda_add_mat_diag_vec(dim3 Gr, dim3 Bl, float alpha, float *mat, - MatrixDim mat_dim, const float *mat2, - int mat2_row_stride, int mat2_col_stride, - const float *vec, float beta) { - cudaF_add_mat_diag_vec(Gr, Bl, alpha, mat, mat_dim, mat2, mat2_row_stride, - mat2_col_stride, vec, beta); +inline void cuda_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, + const float* mat, MatrixDim dmat, + int dim) { + cudaF_copy_col_from_mat_fd(Gr, Bl, v, col, mat, dmat, dim); } -inline void cuda_add_mat_mat_elements(dim3 Gr, dim3 Bl, float *data, - const float *srcA_data, - const float *srcB_data, MatrixDim dim, - int srcA_stride, int srcB_stride, - float alpha, float beta) { - cudaF_add_mat_mat_elements(Gr, Bl, data, srcA_data, srcB_data, dim, - srcA_stride, srcB_stride, alpha, beta); +inline void cuda_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, + const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, + int src_stride) { + cudaD_copy_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride); } - -/* - * CuVector - */ -inline void cuda_max_mat_cols(int Gr, int Bl, float* result, const float* mat, - const MatrixDim d) { - cudaF_max_mat_cols(Gr, Bl, result, mat, d); +inline void cuda_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, + const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, + int src_stride) { + cudaF_copy_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride); } -inline void cuda_min_mat_cols(int Gr, int Bl, float* result, const float* mat, - const MatrixDim d) { - cudaF_min_mat_cols(Gr, Bl, result, mat, d); +inline void cuda_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in) { + cudaD_copy_cols_from_vec(Gr, Bl, mat_out, d_out, v_in); } -inline void cuda_sum_mat_cols(int Gr, int Bl, float* result, const float* mat, - const MatrixDim d) { - cudaF_sum_mat_cols(Gr, Bl, result, mat, d); +inline void cuda_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, + MatrixDim d_out, const float *v_in) { + cudaF_copy_cols_from_vec(Gr, Bl, mat_out, d_out, v_in); } -inline void cuda_replace_value(int Gr, int Bl, float *v, int dim, float orig, - float changed) { - cudaF_replace_value(Gr, Bl, v, dim, orig, changed); +inline void cuda_copy(dim3 Gr, dim3 Bl, double *y, const double *x, + const int32_cuda *copy_from, MatrixDim d_out, + MatrixDim d_in) { + cudaD_copy(Gr, Bl, y, x, copy_from, d_out, d_in); } -inline void cuda_div_rows_vec(dim3 Gr, dim3 Bl, float *mat, - const float *vec_div, MatrixDim d) { - cudaF_div_rows_vec(Gr, Bl, mat, vec_div, d); +inline void cuda_copy(dim3 Gr, dim3 Bl, float *y, const float *x, + const int32_cuda *copy_from, MatrixDim d_out, + MatrixDim d_in) { + cudaF_copy(Gr, Bl, y, x, copy_from, d_out, d_in); } -inline void cuda_set_bias_params(int Gr, int Bl, float* v, const float* a, - float param_1, float param_2, float param_3, - int* flag, int dim) { - cudaF_set_bias_params(Gr, Bl, v, a, param_1, param_2, param_3, flag, dim); +inline void cuda_copy_from_mat(dim3 Gr, dim3 Bl, double* mat_out, + const double* mat_in, MatrixDim d_out, + MatrixDim d_in) { + cuda_copy_from_mat_dd(Gr, Bl, mat_out, mat_in, d_out, d_in); } -inline void cuda_vec_mul_elements(int Gr, int Bl, float* v, const float* a, - int dim) { - cudaF_vec_mul_elements(Gr, Bl, v, a, dim); +inline void cuda_copy_from_mat(dim3 Gr, dim3 Bl, double* mat_out, + const float* mat_in, MatrixDim d_out, + MatrixDim d_in) { + cuda_copy_from_mat_df(Gr, Bl, mat_out, mat_in, d_out, d_in); } -inline void cuda_vec_soft_max(int Gr, int Bl, float* v, int dim) { - cudaF_vec_soft_max(Gr, Bl, v, dim); +inline void cuda_copy_from_mat(dim3 Gr, dim3 Bl, float* mat_out, + const double* mat_in, MatrixDim d_out, + MatrixDim d_in) { + cuda_copy_from_mat_fd(Gr, Bl, mat_out, mat_in, d_out, d_in); } -inline void cuda_vec_min(int Gr, int Bl, const float* v, float* value, int dim, - int inc) { - cudaF_vec_min(Gr, Bl, v, value, dim, inc); +inline void cuda_copy_from_mat(dim3 Gr, dim3 Bl, float* mat_out, + const float* mat_in, MatrixDim d_out, + MatrixDim d_in) { + cuda_copy_from_mat_ff(Gr, Bl, mat_out, mat_in, d_out, d_in); } -inline void cuda_vec_max(int Gr, int Bl, const float* v, float* value, int dim, - int inc) { - cudaF_vec_max(Gr, Bl, v, value, dim, inc); +inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, double* mat_out, + const double* mat_in, MatrixDim d_out, + MatrixDim d_in) { + cuda_copy_from_mat_dd_trans(Gr, Bl, mat_out, mat_in, d_out, d_in); } -inline void cuda_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const float* A, - const float* B, MatrixDim dA, int B_stride, - float* value) { - cudaF_trace_mat_mat_trans(Gr, Bl, A, B, dA, B_stride, value); +inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, double* mat_out, + const float* mat_in, MatrixDim d_out, + MatrixDim d_in) { + cuda_copy_from_mat_df_trans(Gr, Bl, mat_out, mat_in, d_out, d_in); } -inline void cuda_trace_mat_mat(dim3 Gr, dim3 Bl, const float* A, const float* B, - MatrixDim dA, int B_stride, float* value) { - cudaF_trace_mat_mat(Gr, Bl, A, B, dA, B_stride, value); +inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, float* mat_out, + const double* mat_in, MatrixDim d_out, + MatrixDim d_in) { + cuda_copy_from_mat_fd_trans(Gr, Bl, mat_out, mat_in, d_out, d_in); } -inline void cuda_add_diag_mat_mat_MNT(int Gr, int Bl, const float alpha, - const float* M, const MatrixDim dim_M, - const float* N, const int stride_N, - const float beta, float* v) { - cudaF_add_diag_mat_mat_MNT(Gr, Bl, alpha, M, dim_M, N, stride_N, beta, v); +inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, float* mat_out, + const float* mat_in, MatrixDim d_out, + MatrixDim d_in) { + cuda_copy_from_mat_ff_trans(Gr, Bl, mat_out, mat_in, d_out, d_in); } -inline void cuda_add_diag_mat_mat_MTN(dim3 Gr, dim3 Bl, const float alpha, - const float* M, const int stride_M, - const float* N, const MatrixDim dim_N, - const float beta, float* v) { - cudaF_add_diag_mat_mat_MTN(Gr, Bl, alpha, M, stride_M, N, dim_N, beta, v); +inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, double* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in) { + cuda_copy_from_smat_dd(Gr, Bl, mat_out, smat_in, d_out, d_in); } -inline void cuda_add_diag_mat_mat_MN(dim3 Gr, dim3 Bl, const float alpha, - const float* M, const int stride_M, - const float* N, const MatrixDim dim_N, - const float beta, float* v) { - cudaF_add_diag_mat_mat_MN(Gr, Bl, alpha, M, stride_M, N, dim_N, beta, v); +inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, double* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in) { + cuda_copy_from_smat_df(Gr, Bl, mat_out, smat_in, d_out, d_in); } -inline void cuda_add_vec_vec(int Gr, int Bl, float alpha, float* v, - const float* x, const float* y, float beta, - int dim) { - cudaF_add_vec_vec(Gr, Bl, alpha, v, x, y, beta, dim); +inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, float* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in) { + cuda_copy_from_smat_fd(Gr, Bl, mat_out, smat_in, d_out, d_in); } -inline void cuda_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, - const float* mat, MatrixDim dmat, - int dim) { - cudaF_copy_col_from_mat_df(Gr, Bl, v, col, mat, dmat, dim); +inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, float* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in) { + cuda_copy_from_smat_ff(Gr, Bl, mat_out, smat_in, d_out, d_in); } -inline void cuda_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, - const float* mat, MatrixDim dmat, - int dim) { - cudaF_copy_col_from_mat_fd(Gr, Bl, v, col, mat, dmat, dim); +inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, double* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in) { + cuda_copy_from_smat_dd_trans(Gr, Bl, mat_out, smat_in, d_out, d_in); } -inline void cuda_vec_sum(int Gr, int Bl, float* v, float* value, int dim, - int inc) { - cudaF_vec_sum(Gr, Bl, v, value, dim, inc); +inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, double* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in) { + cuda_copy_from_smat_df_trans(Gr, Bl, mat_out, smat_in, d_out, d_in); } -inline void cuda_vec_copy_diag_from_packed(int Gr, int Bl, float *dst, - const float *src, int dim) { - cudaF_vec_copy_diag_from_packed(Gr, Bl, dst, src, dim); +inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, float* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in) { + cuda_copy_from_smat_fd_trans(Gr, Bl, mat_out, smat_in, d_out, d_in); } -inline void cuda_vec_apply_floor(int Gr, int Bl, float* v, float floor_val, - float* num, int dim) { - cudaF_vec_apply_floor(Gr, Bl, v, floor_val, num, dim); +inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, float* mat_out, + const MatrixElement* smat_in, + MatrixDim d_out, MatrixIndexT_cuda d_in) { + cuda_copy_from_smat_ff_trans(Gr, Bl, mat_out, smat_in, d_out, d_in); } -inline void cuda_vec_apply_ceiling(int Gr, int Bl, float* v, float floor_val, - float* num, int dim) { - cudaF_vec_apply_ceiling(Gr, Bl, v, floor_val, num, dim); +inline void cuda_copy_from_sp(dim3 Gr, dim3 Bl, const double* x, double* y, + MatrixDim d_out) { + cudaD_copy_from_sp(Gr, Bl, x, y, d_out); } -inline void cuda_vec_apply_exp(int Gr, int Bl, float* v, int dim) { - cudaF_vec_apply_exp(Gr, Bl, v, dim); +inline void cuda_copy_from_sp(dim3 Gr, dim3 Bl, const float* x, float* y, + MatrixDim d_out) { + cudaF_copy_from_sp(Gr, Bl, x, y, d_out); } -inline void cuda_vec_apply_log(int Gr, int Bl, float* v, float* flag, int dim) { - cudaF_vec_apply_log(Gr, Bl, v, flag, dim); +inline void cuda_copy_from_tp(dim3 Gr, dim3 Bl, double* A, const double* B, + MatrixDim dmat) { + cudaD_copy_from_tp(Gr, Bl, A, B, dmat); } -inline void cuda_invert_elements(dim3 Gr, dim3 Bl, float *data, MatrixDim d) { - cudaF_invert_elements(Gr, Bl, data, d); +inline void cuda_copy_from_tp(dim3 Gr, dim3 Bl, double* A, const float* B, + MatrixDim dmat) { + cudaDF_copy_from_tp(Gr, Bl, A, B, dmat); } -// B_trans nonzero if B transposed. -inline void cuda_add_mat_blockmat(dim3 Gr, dim3 Bl, float *data, MatrixDim d, - const float *Adata, int A_num_rows, - int A_num_cols, int A_row_stride, - int A_col_stride, - const CuBlockMatrixData *B_cu_data, - int B_num_blocks, float alpha, float beta, - int B_trans) { - cudaF_add_mat_blockmat(Gr, Bl, data, d, Adata, A_num_rows, A_num_cols, - A_row_stride, A_col_stride, B_cu_data, B_num_blocks, - alpha, beta, B_trans); +inline void cuda_copy_from_tp(dim3 Gr, dim3 Bl, float* A, const double* B, + MatrixDim dmat) { + cudaFD_copy_from_tp(Gr, Bl, A, B, dmat); } -inline void cuda_block_add_mat_mat(dim3 Gr, dim3 Bl, - CuBlockMatrixData *B_cu_data, int num_blocks, - const float *C_data, int C_num_cols, - int C_row_stride, int C_col_stride, - const float *D_data, int D_row_stride, - int D_col_stride, float alpha, float beta) { - cudaF_block_add_mat_mat(Gr, Bl, B_cu_data, num_blocks, C_data, C_num_cols, - C_row_stride, C_col_stride, D_data, D_row_stride, - D_col_stride, alpha, beta); +inline void cuda_copy_from_tp(dim3 Gr, dim3 Bl, float* A, const float* B, + MatrixDim dmat) { + cudaF_copy_from_tp(Gr, Bl, A, B, dmat); } - -/* - * cu:: - */ -inline void cuda_soft_hinge(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride) { - cudaF_soft_hinge(Gr, Bl, y, x, d, src_stride); +inline void cuda_copy_from_tp_trans(dim3 Gr, dim3 Bl, double* A, + const double* B, MatrixDim dmat) { + cudaD_copy_from_tp_trans(Gr, Bl, A, B, dmat); } -inline void cuda_group_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride, int group_size, - float power) { - cudaF_group_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power); +inline void cuda_copy_from_tp_trans(dim3 Gr, dim3 Bl, double* A, const float* B, + MatrixDim dmat) { + cudaDF_copy_from_tp_trans(Gr, Bl, A, B, dmat); } -inline void cuda_group_spec_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride, int group_size, - float power) { - cudaF_group_spec_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power); +inline void cuda_copy_from_tp_trans(dim3 Gr, dim3 Bl, float* A, const double* B, + MatrixDim dmat) { + cudaFD_copy_from_tp_trans(Gr, Bl, A, B, dmat); } -inline void cuda_group_max(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride, int group_size) { - cudaF_group_max(Gr, Bl, y, x, d, src_stride, group_size); +inline void cuda_copy_from_tp_trans(dim3 Gr, dim3 Bl, float* A, const float* B, + MatrixDim dmat) { + cudaF_copy_from_tp_trans(Gr, Bl, A, B, dmat); } -inline void cuda_sigmoid(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride) { - cudaF_sigmoid(Gr, Bl, y, x, d, src_stride); +inline void cuda_copy_low_upp(dim3 Gr, dim3 Bl, double* A, MatrixDim dimA) { + cudaD_copy_low_upp(Gr, Bl, A, dimA); } -inline void cuda_diff_sigmoid(dim3 Gr, dim3 Bl, float *eout, const float *e, - const float *y, MatrixDim d, int e_stride, - int y_stride) { - cudaF_diff_sigmoid(Gr, Bl, eout, e, y, d, e_stride, y_stride); +inline void cuda_copy_low_upp(dim3 Gr, dim3 Bl, float* A, MatrixDim dimA) { + cudaF_copy_low_upp(Gr, Bl, A, dimA); } -inline void cuda_tanh(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, - int src_stride) { - cudaF_tanh(Gr, Bl, y, x, d, src_stride); +inline void cuda_copy_rows(dim3 Gr, dim3 Bl, double* dst, + const double* const * src, MatrixDim dst_dim) { + cudaD_copy_rows_direct(Gr, Bl, dst, src, dst_dim); } -inline void cuda_diff_tanh(dim3 Gr, dim3 Bl, float *eout, const float *e, - const float *y, MatrixDim d, int e_stride, - int y_stride) { - cudaF_diff_tanh(Gr, Bl, eout, e, y, d, e_stride, y_stride); +inline void cuda_copy_rows(dim3 Gr, dim3 Bl, double* dst, const double* src, + const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, + int src_stride) { + cudaD_copy_rows(Gr, Bl, dst, src, reorder, dst_dim, src_stride); } -inline void cuda_parametric_relu(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride, - const float *a, const float *b) { - cudaF_parametric_relu(Gr,Bl,y,x,d,src_stride,a,b); +inline void cuda_copy_rows(dim3 Gr, dim3 Bl, float* dst, + const float* const * src, MatrixDim dst_dim) { + cudaF_copy_rows_direct(Gr, Bl, dst, src, dst_dim); } -inline void cuda_diff_parametric_relu(dim3 Gr, dim3 Bl, float *eout, - const float *e, const float *y, - MatrixDim d, int e_stride, int y_stride, - const float *a, const float *b) { - cudaF_diff_parametric_relu(Gr,Bl,eout,e,y,d,e_stride,y_stride,a,b); +inline void cuda_copy_rows(dim3 Gr, dim3 Bl, float* dst, const float* src, + const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, + int src_stride) { + cudaF_copy_rows(Gr, Bl, dst, src, reorder, dst_dim, src_stride); } -inline void cuda_heaviside(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride) { - cudaF_heaviside(Gr, Bl, y, x, d, src_stride); +inline void cuda_copy_rows_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in) { + cudaD_copy_rows_from_vec(Gr, Bl, mat_out, d_out, v_in); } -// Bl: dimBlock value is fixed min(d.col, CU1DBLOCK), represent CU1DBLOCK -// threads reduce a row at the same time. -// Gr: the number of rows -inline void cuda_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, - MatrixDim d, int src_stride) { - cudaF_softmax_reduce(Gr, Bl, y, x, d, src_stride); +inline void cuda_copy_rows_from_vec(dim3 Gr, dim3 Bl, float *mat_out, + MatrixDim d_out, const float *v_in) { + cudaF_copy_rows_from_vec(Gr, Bl, mat_out, d_out, v_in); } -inline void cuda_log_softmax_reduce(size_t Gr, size_t Bl, float *y, - const float *x, MatrixDim y_dim, - int x_stride) { - cudaF_log_softmax_reduce(Gr, Bl, y, x, y_dim, x_stride); +inline void cuda_copy_to_rows(dim3 Gr, dim3 Bl, double* const * dst, + const double* src, MatrixDim src_dim) { + cudaD_copy_to_rows_direct(Gr, Bl, dst, src, src_dim); } - -inline void cuda_regularize_l1(dim3 Gr, dim3 Bl, float *wei, float *grad, - float l1, float lr, MatrixDim d, - int stride_grad) { - cudaF_regularize_l1(Gr, Bl, wei, grad, l1, lr, d, stride_grad); +inline void cuda_copy_to_rows(dim3 Gr, dim3 Bl, float* const * dst, + const float* src, MatrixDim src_dim) { + cudaF_copy_to_rows_direct(Gr, Bl, dst, src, src_dim); } -inline void cuda_find_row_max_id(dim3 Gr, dim3 Bl, const float *mat, - float *vec_val, int32_cuda *vec_id, - MatrixDim d) { - cudaF_find_row_max_id(Gr, Bl, mat, vec_val, vec_id, d); +inline void cuda_copy_upp_low(dim3 Gr, dim3 Bl, double* A, MatrixDim dimA) { + cudaD_copy_upp_low(Gr, Bl, A, dimA); } -inline void cuda_diff_xent(dim3 Gr, dim3 Bl, const int32_cuda *vec_tgt, - float *mat_net_out, float *vec_log_post, - MatrixDim d) { - cudaF_diff_xent(Gr, Bl, vec_tgt, mat_net_out, vec_log_post, d); +inline void cuda_copy_upp_low(dim3 Gr, dim3 Bl, float* A, MatrixDim dimA) { + cudaF_copy_upp_low(Gr, Bl, A, dimA); } -inline void cuda_normalize_per_row(size_t Gr, size_t Bl, float *y, int y_stride, - const float *x, MatrixDim x_d, - float target_rms, bool add_log_stddev) { - cudaF_normalize_per_row(Gr, Bl, y, y_stride, x, x_d, target_rms, - add_log_stddev); +inline void cuda_diff_group_pnorm(dim3 Gr, dim3 Bl, double *id, + const double *iv, const double *ov, + const double* od, MatrixDim id_dim, + int iv_stride, int ov_stride, int od_stride, + int group_size, double power) { + cudaD_diff_group_pnorm(Gr, Bl, id, iv, ov, od, id_dim, iv_stride, ov_stride, + od_stride, group_size, power); } -inline void cuda_diff_softmax(dim3 Gr, dim3 Bl, float* x, const MatrixDim dim, - const float* value, const int value_stride, - const float* diff, const int diff_stride) { - cudaF_diff_softmax(Gr, Bl, x, dim, value, value_stride, diff, diff_stride); +inline void cuda_diff_group_pnorm(dim3 Gr, dim3 Bl, float *id, const float *iv, + const float *ov, const float* od, + MatrixDim id_dim, int iv_stride, + int ov_stride, int od_stride, int group_size, + float power) { + cudaF_diff_group_pnorm(Gr, Bl, id, iv, ov, od, id_dim, iv_stride, ov_stride, + od_stride, group_size, power); +} +inline void cuda_diff_log_softmax(dim3 Gr, dim3 Bl, + const MatrixDim in_deriv_dim, + const double* out_value, + const int out_value_stride, + const double* out_deriv, + const int out_deriv_stride, + double* in_deriv) { + cudaD_diff_log_softmax(Gr, Bl, in_deriv_dim, out_value, out_value_stride, + out_deriv, out_deriv_stride, in_deriv); } inline void cuda_diff_log_softmax(dim3 Gr, dim3 Bl, const MatrixDim in_deriv_dim, @@ -610,79 +625,161 @@ inline void cuda_diff_log_softmax(dim3 Gr, dim3 Bl, cudaF_diff_log_softmax(Gr, Bl, in_deriv_dim, out_value, out_value_stride, out_deriv, out_deriv_stride, in_deriv); } -inline void cuda_copy_rows_from_vec(dim3 Gr, dim3 Bl, float *mat_out, - MatrixDim d_out, const float *v_in) { - cudaF_copy_rows_from_vec(Gr, Bl, mat_out, d_out, v_in); -} - -inline void cuda_randomize(dim3 Gr, dim3 Bl, float *y, const float *x, - const int32_cuda *copy_from, MatrixDim d_out, - MatrixDim d_in) { - cudaF_randomize(Gr, Bl, y, x, copy_from, d_out, d_in); +inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, + const int num_rows, const double* input, + const int input_stride, + const double* params, + const int params_stride, + const double* output_deriv, + const int output_deriv_stride, + const double* deriv_sum_in, + const int deriv_sum_in_stride, + const double* self_repair_config, + double count, double* input_deriv, + const int input_deriv_stride, + double* params_deriv, + const int params_deriv_stride, + double* value_sum_out, + const int value_sum_out_stride, + double* deriv_sum_out, + const int deriv_sum_out_stride, + double* self_repair_sum_out, + const int self_repair_sum_out_stride) { + cudaD_diff_lstm_nonlinearity(Gr, Bl, cell_dim, have_dropout_mask, num_rows, + input, input_stride, + params, params_stride, output_deriv, + output_deriv_stride, deriv_sum_in, + deriv_sum_in_stride, self_repair_config, count, + input_deriv, input_deriv_stride, params_deriv, + params_deriv_stride, value_sum_out, + value_sum_out_stride, deriv_sum_out, + deriv_sum_out_stride, self_repair_sum_out, + self_repair_sum_out_stride); } - -inline void cuda_splice(dim3 Gr, dim3 Bl, float *y, const float *x, - const int32_cuda *off, MatrixDim d_out, - MatrixDim d_in) { - cudaF_splice(Gr, Bl, y, x, off, d_out, d_in); +inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, + const int num_rows, const float* input, + const int input_stride, + const float* params, + const int params_stride, + const float* output_deriv, + const int output_deriv_stride, + const double* deriv_sum_in, + const int deriv_sum_in_stride, + const float* self_repair_config, + double count, float* input_deriv, + const int input_deriv_stride, + float* params_deriv, + const int params_deriv_stride, + double* value_sum_out, + const int value_sum_out_stride, + double* deriv_sum_out, + const int deriv_sum_out_stride, + float* self_repair_sum_out, + const int self_repair_sum_out_stride) { + cudaF_diff_lstm_nonlinearity(Gr, Bl, cell_dim, have_dropout_mask, + num_rows, input, input_stride, + params, params_stride, output_deriv, + output_deriv_stride, deriv_sum_in, + deriv_sum_in_stride, self_repair_config, count, + input_deriv, input_deriv_stride, params_deriv, + params_deriv_stride, value_sum_out, + value_sum_out_stride, deriv_sum_out, + deriv_sum_out_stride, self_repair_sum_out, + self_repair_sum_out_stride); } -inline void cuda_one(int Gr, int Bl, float* x, int dim) { - cudaF_one(Gr, Bl, x, dim); +inline void cuda_diff_normalize_per_row(size_t Gr, size_t Bl, double *id, + int id_stride, const double *iv, + MatrixDim iv_dim, const double* od, + int od_stride, double target_rms, + bool add_log_stddev) { + cudaD_diff_normalize_per_row(Gr, Bl, id, id_stride, iv, iv_dim, od, od_stride, + target_rms, add_log_stddev); +} +inline void cuda_diff_normalize_per_row(size_t Gr, size_t Bl, float *id, + int id_stride, const float *iv, + MatrixDim iv_dim, const float* od, + int od_stride, float target_rms, + bool add_log_stddev) { + cudaF_diff_normalize_per_row(Gr, Bl, id, id_stride, iv, iv_dim, od, od_stride, + target_rms, add_log_stddev); } -inline void cuda_copy(dim3 Gr, dim3 Bl, float *y, const float *x, - const int32_cuda *copy_from, MatrixDim d_out, - MatrixDim d_in) { - cudaF_copy(Gr, Bl, y, x, copy_from, d_out, d_in); +inline void cuda_diff_parametric_relu(dim3 Gr, dim3 Bl, double *eout, + const double *e, const double *y, + MatrixDim d, int e_stride, int y_stride, + const double *a, const double *b) { + cudaD_diff_parametric_relu(Gr, Bl, eout, e, y, d, e_stride, y_stride, a, b); } -inline void cuda_copy_from_sp(dim3 Gr, dim3 Bl, const float* x, float* y, - MatrixDim d_out) { - cudaF_copy_from_sp(Gr, Bl, x, y, d_out); +inline void cuda_diff_parametric_relu(dim3 Gr, dim3 Bl, float *eout, + const float *e, const float *y, + MatrixDim d, int e_stride, int y_stride, + const float *a, const float *b) { + cudaF_diff_parametric_relu(Gr, Bl, eout, e, y, d, e_stride, y_stride, a, b); } -inline void cuda_take_lower(dim3 Gr, dim3 Bl, const float* x, float* y, - MatrixDim d_in) { - cudaF_take_lower(Gr, Bl, x, y, d_in); +inline void cuda_diff_sigmoid(dim3 Gr, dim3 Bl, double *eout, const double *e, + const double *y, MatrixDim d, int e_stride, + int y_stride) { + cudaD_diff_sigmoid(Gr, Bl, eout, e, y, d, e_stride, y_stride); } -inline void cuda_take_upper(dim3 Gr, dim3 Bl, const float* x, float* y, - MatrixDim d_in) { - cudaF_take_upper(Gr, Bl, x, y, d_in); +inline void cuda_diff_sigmoid(dim3 Gr, dim3 Bl, float *eout, const float *e, + const float *y, MatrixDim d, int e_stride, + int y_stride) { + cudaF_diff_sigmoid(Gr, Bl, eout, e, y, d, e_stride, y_stride); } -inline void cuda_take_mean(dim3 Gr, dim3 Bl, const float* x, float* y, - MatrixDim d_in) { - cudaF_take_mean(Gr, Bl, x, y, d_in); +inline void cuda_diff_softmax(dim3 Gr, dim3 Bl, double* x, const MatrixDim dim, + const double* value, const int value_stride, + const double* diff, const int diff_stride) { + cudaD_diff_softmax(Gr, Bl, x, dim, value, value_stride, diff, diff_stride); } -inline void cuda_matrix_add_elements(dim3 Gr, dim3 Bl, float *data, - MatrixDim dim, float alpha, - MatrixElement* x, - int num_elements) { - cudaF_matrix_add_elements(Gr, Bl, data, dim, alpha, x, num_elements); +inline void cuda_diff_softmax(dim3 Gr, dim3 Bl, float* x, const MatrixDim dim, + const float* value, const int value_stride, + const float* diff, const int diff_stride) { + cudaF_diff_softmax(Gr, Bl, x, dim, value, value_stride, diff, diff_stride); } -inline void cuda_matrix_add_indexed_values(dim3 Gr, dim3 Bl, MatrixDim dim, - float alpha, - const Int32Pair* indices, - const float* x, int s, float* data) { - cudaF_matrix_add_indexed_values(Gr, Bl, dim, alpha, indices, x, s, data); +inline void cuda_diff_tanh(dim3 Gr, dim3 Bl, double *eout, const double *e, + const double *y, MatrixDim d, int e_stride, + int y_stride) { + cudaD_diff_tanh(Gr, Bl, eout, e, y, d, e_stride, y_stride); } -inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement* x, - int32 size, const float* z, MatrixDim d, - float* z2, MatrixDim d2, float* t) { - cudaF_comp_obj_deriv(Gr, Bl, x, size, z, d, z2, d2, t); +inline void cuda_diff_tanh(dim3 Gr, dim3 Bl, float *eout, const float *e, + const float *y, MatrixDim d, int e_stride, + int y_stride) { + cudaF_diff_tanh(Gr, Bl, eout, e, y, d, e_stride, y_stride); } -inline void cuda_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, - const float *src_data, MatrixDim src_dim, - const Int32Pair *indices) { - cudaF_sum_column_ranges(Gr, Bl, data, dim, src_data, src_dim, indices); +inline void cuda_diff_xent(dim3 Gr, dim3 Bl, const int32_cuda *vec_tgt, + double *mat_net_out, double *vec_log_post, + MatrixDim d) { + cudaD_diff_xent(Gr, Bl, vec_tgt, mat_net_out, vec_log_post, d); } -inline void cuda_add_row_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, - const float *src_data, MatrixDim src_dim, - const Int32Pair *indexes) { - cudaF_add_row_ranges(Gr, Bl, data, dim, src_data, src_dim, indexes); +inline void cuda_diff_xent(dim3 Gr, dim3 Bl, const int32_cuda *vec_tgt, + float *mat_net_out, float *vec_log_post, + MatrixDim d) { + cudaF_diff_xent(Gr, Bl, vec_tgt, mat_net_out, vec_log_post, d); } -inline void cuda_matrix_lookup(dim3 Gr, dim3 Bl, const float *data, - MatrixDim dim, const Int32Pair *indices, - int indices_size, float *output) { - cudaF_matrix_lookup(Gr, Bl, data, dim, indices, indices_size, output); +inline void cuda_div_elements(dim3 Gr, dim3 Bl, double *mat, const double *A, + MatrixDim dst_d, int src_stride) { + cudaD_div_elements(Gr, Bl, mat, A, dst_d, src_stride); +} +inline void cuda_div_elements(dim3 Gr, dim3 Bl, float *mat, const float *A, + MatrixDim dst_d, int src_stride) { + cudaF_div_elements(Gr, Bl, mat, A, dst_d, src_stride); +} +inline void cuda_div_rows_vec(dim3 Gr, dim3 Bl, double *mat, + const double *vec_div, MatrixDim d) { + cudaD_div_rows_vec(Gr, Bl, mat, vec_div, d); +} +inline void cuda_div_rows_vec(dim3 Gr, dim3 Bl, float *mat, + const float *vec_div, MatrixDim d) { + cudaF_div_rows_vec(Gr, Bl, mat, vec_div, d); +} +inline void cuda_equal_element_mask(dim3 Gr, dim3 Bl, const double *mat1, + const double *mat2, double *mask, + MatrixDim mat1_dim, int mat2_stride, + int mask_stride) { + cudaD_equal_element_mask(Gr, Bl, mat1, mat2, mask, mat1_dim, mat2_stride, + mask_stride); } - inline void cuda_equal_element_mask(dim3 Gr, dim3 Bl, const float *mat1, const float *mat2, float *mask, MatrixDim mat1_dim, int mat2_stride, @@ -690,645 +787,548 @@ inline void cuda_equal_element_mask(dim3 Gr, dim3 Bl, const float *mat1, cudaF_equal_element_mask(Gr, Bl, mat1, mat2, mask, mat1_dim, mat2_stride, mask_stride); } - -// double versions - -/* - * CuMatrix - */ -inline void cuda_copy_upp_low(dim3 Gr, dim3 Bl, double* A, MatrixDim dimA) { - cudaD_copy_upp_low(Gr, Bl, A, dimA); +inline void cuda_find_row_max_id(dim3 Gr, dim3 Bl, const double *mat, + double *vec_val, int32_cuda *vec_id, + MatrixDim d) { + cudaD_find_row_max_id(Gr, Bl, mat, vec_val, vec_id, d); } -inline void cuda_copy_low_upp(dim3 Gr, dim3 Bl, double* A, MatrixDim dimA) { - cudaD_copy_low_upp(Gr, Bl, A, dimA); +inline void cuda_find_row_max_id(dim3 Gr, dim3 Bl, const float *mat, + float *vec_val, int32_cuda *vec_id, + MatrixDim d) { + cudaF_find_row_max_id(Gr, Bl, mat, vec_val, vec_id, d); } -inline void cuda_add_diag_vec_mat(dim3 Gr, dim3 Bl, double alpha, double *mat, - MatrixDim mat_dim, const double *vec, - const double *mat2, int mat2_row_stride, - int mat2_col_stride, double beta) { - cudaD_add_diag_vec_mat(Gr, Bl, alpha, mat, mat_dim, vec, mat2, - mat2_row_stride, mat2_col_stride, beta); +inline void cuda_group_max(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride, int group_size) { + cudaD_group_max(Gr, Bl, y, x, d, src_stride, group_size); } -inline void cuda_copy_from_tp_trans(dim3 Gr, dim3 Bl, double* A, - const double* B, MatrixDim dmat) { - cudaD_copy_from_tp_trans(Gr, Bl, A, B, dmat); +inline void cuda_group_max(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride, int group_size) { + cudaF_group_max(Gr, Bl, y, x, d, src_stride, group_size); } -inline void cuda_copy_from_tp_trans(dim3 Gr, dim3 Bl, double* A, const float* B, - MatrixDim dmat) { - cudaDF_copy_from_tp_trans(Gr, Bl, A, B, dmat); +inline void cuda_group_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride, int group_size, + double power) { + cudaD_group_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power); } -inline void cuda_copy_from_tp(dim3 Gr, dim3 Bl, double* A, const double* B, - MatrixDim dmat) { - cudaD_copy_from_tp(Gr, Bl, A, B, dmat); +inline void cuda_group_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride, int group_size, + float power) { + cudaF_group_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power); } -inline void cuda_copy_from_tp(dim3 Gr, dim3 Bl, double* A, const float* B, - MatrixDim dmat) { - cudaDF_copy_from_tp(Gr, Bl, A, B, dmat); +inline void cuda_group_spec_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride, int group_size, + double power) { + cudaD_group_spec_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power); } -inline void cuda_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) { - cudaD_apply_exp(Gr, Bl, mat, d); +inline void cuda_group_spec_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride, int group_size, + float power) { + cudaF_group_spec_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power); } -inline void cuda_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, - MatrixDim dim) { - cudaD_apply_pow(Gr, Bl, mat, power, dim); +inline void cuda_heaviside(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride) { + cudaD_heaviside(Gr, Bl, y, x, d, src_stride); } -inline void cuda_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, - bool include_sign, MatrixDim dim) { - cudaD_apply_pow_abs(Gr, Bl, mat, power, include_sign, dim); +inline void cuda_heaviside(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride) { + cudaF_heaviside(Gr, Bl, y, x, d, src_stride); } -inline void cuda_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim dim) { - cudaD_apply_heaviside(Gr, Bl, mat, dim); +inline void cuda_invert_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim d) { + cudaD_invert_elements(Gr, Bl, data, d); } -inline void cuda_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, - MatrixDim dim) { - cudaD_apply_floor(Gr, Bl, mat, floor_val, dim); +inline void cuda_invert_elements(dim3 Gr, dim3 Bl, float *data, MatrixDim d) { + cudaF_invert_elements(Gr, Bl, data, d); } -inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, - double ceiling_val, MatrixDim dim) { - cudaD_apply_ceiling(Gr, Bl, mat, ceiling_val, dim); +inline void cuda_log_softmax_reduce(size_t Gr, size_t Bl, double *y, + const double *x, MatrixDim y_dim, + int x_stride) { + cudaD_log_softmax_reduce(Gr, Bl, y, x, y_dim, x_stride); } -inline void cuda_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, - const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, - int src_stride) { - cudaD_copy_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride); +inline void cuda_log_softmax_reduce(size_t Gr, size_t Bl, float *y, + const float *x, MatrixDim y_dim, + int x_stride) { + cudaF_log_softmax_reduce(Gr, Bl, y, x, y_dim, x_stride); } -inline void cuda_add_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, - const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, - int src_stride) { - cudaD_add_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride); +inline void cuda_lstm_nonlinearity(dim3 Gr, dim3 Bl, const double* in, + const int in_stride, const double* params, + const int params_stride, + const int out_stride, const int cell_dim, + const int have_dropout_mask, + const int num_rows, double* out) { + cudaD_lstm_nonlinearity(Gr, Bl, in, in_stride, params, params_stride, + out_stride, cell_dim, have_dropout_mask, + num_rows, out); } -inline void cuda_copy_rows(dim3 Gr, dim3 Bl, double* dst, const double* src, - const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, - int src_stride) { - cudaD_copy_rows(Gr, Bl, dst, src, reorder, dst_dim, src_stride); +inline void cuda_lstm_nonlinearity(dim3 Gr, dim3 Bl, const float* in, + const int in_stride, const float* params, + const int params_stride, + const int out_stride, const int cell_dim, + const int have_dropout_mask, + const int num_rows, float* out) { + cudaF_lstm_nonlinearity(Gr, Bl, in, in_stride, params, params_stride, + out_stride, cell_dim, have_dropout_mask, + num_rows, out); } -inline void cuda_copy_rows(dim3 Gr, dim3 Bl, double* dst, - const double* const * src, MatrixDim dst_dim) { - cudaD_copy_rows_direct(Gr, Bl, dst, src, dst_dim); +inline void cuda_matrix_add_elements(dim3 Gr, dim3 Bl, double *data, + MatrixDim dim, double alpha, + MatrixElement* x, + int num_elements) { + cudaD_matrix_add_elements(Gr, Bl, data, dim, alpha, x, num_elements); } -inline void cuda_copy_to_rows(dim3 Gr, dim3 Bl, double* const * dst, - const double* src, MatrixDim src_dim) { - cudaD_copy_to_rows_direct(Gr, Bl, dst, src, src_dim); +inline void cuda_matrix_add_elements(dim3 Gr, dim3 Bl, float *data, + MatrixDim dim, float alpha, + MatrixElement* x, + int num_elements) { + cudaF_matrix_add_elements(Gr, Bl, data, dim, alpha, x, num_elements); } -inline void cuda_add_rows(dim3 Gr, dim3 Bl, double alpha, double* dst, - const double* src, const MatrixIndexT_cuda* reorder, - MatrixDim dst_dim, int src_stride) { - cudaD_add_rows(Gr, Bl, alpha, dst, src, reorder, dst_dim, src_stride); +inline void cuda_matrix_add_indexed_values(dim3 Gr, dim3 Bl, MatrixDim dim, + double alpha, + const Int32Pair* indices, + const double* x, int s, + double* data) { + cudaD_matrix_add_indexed_values(Gr, Bl, dim, alpha, indices, x, s, data); } -inline void cuda_add_rows(dim3 Gr, dim3 Bl, double alpha, double* dst, - const double* const * src, MatrixDim dst_dim) { - cudaD_add_rows_direct(Gr, Bl, alpha, dst, src, dst_dim); +inline void cuda_matrix_add_indexed_values(dim3 Gr, dim3 Bl, MatrixDim dim, + float alpha, + const Int32Pair* indices, + const float* x, int s, float* data) { + cudaF_matrix_add_indexed_values(Gr, Bl, dim, alpha, indices, x, s, data); } -inline void cuda_add_to_rows(dim3 Gr, dim3 Bl, double alpha, - double* const * dst, const double* src, - MatrixDim src_dim) { - cudaD_add_to_rows_direct(Gr, Bl, alpha, dst, src, src_dim); +inline void cuda_matrix_lookup(dim3 Gr, dim3 Bl, const double *data, + MatrixDim dim, const Int32Pair *indices, + int indices_size, double *output) { + cudaD_matrix_lookup(Gr, Bl, data, dim, indices, indices_size, output); } -inline void cuda_trace(int Gr, int Bl, double* mat, double* value, int dim) { - cudaD_trace(Gr, Bl, mat, value, dim); +inline void cuda_matrix_lookup(dim3 Gr, dim3 Bl, const float *data, + MatrixDim dim, const Int32Pair *indices, + int indices_size, float *output) { + cudaF_matrix_lookup(Gr, Bl, data, dim, indices, indices_size, output); } -inline void cuda_set_diag(int Gr, int Bl, double* mat, double value, - MatrixDim d) { - cudaD_set_diag(Gr, Bl, mat, value, d); +inline void cuda_max(dim3 Gr, dim3 Bl, double *mat, const double *A, + MatrixDim dst_d, int src_stride) { + cudaD_max(Gr, Bl, mat, A, dst_d, src_stride); } -inline void cuda_set_diag_packed(int Gr, int Bl, double* mat, double value, - int dim) { - cudaD_set_diag_packed(Gr, Bl, mat, value, dim); +inline void cuda_max(dim3 Gr, dim3 Bl, float *mat, const float *A, + MatrixDim dst_d, int src_stride) { + cudaF_max(Gr, Bl, mat, A, dst_d, src_stride); } -inline void cuda_add_diag_packed(int Gr, int Bl, double* mat, double value, - int dim) { - cudaD_add_diag_packed(Gr, Bl, mat, value, dim); +inline void cuda_max_mat_cols(int Gr, int Bl, double* result, const double* mat, + const MatrixDim d) { + cudaD_max_mat_cols(Gr, Bl, result, mat, d); } -inline void cuda_set_const(dim3 Gr, dim3 Bl, double *mat, double value, - MatrixDim d) { - cudaD_set_const(Gr, Bl, mat, value, d); +inline void cuda_max_mat_cols(int Gr, int Bl, float* result, const float* mat, + const MatrixDim d) { + cudaF_max_mat_cols(Gr, Bl, result, mat, d); } -inline void cuda_set_zero_above_diag(dim3 Gr, dim3 Bl, double* mat, - MatrixDim d) { - cudaD_set_zero_above_diag(Gr, Bl, mat, d); +inline void cuda_min(dim3 Gr, dim3 Bl, double *mat, const double *other, + MatrixDim mat_d, int other_stride) { + cudaD_min(Gr, Bl, mat, other, mat_d, other_stride); } -inline void cuda_add(dim3 Gr, dim3 Bl, double *mat, double value, MatrixDim d) { - cudaD_add(Gr, Bl, mat, value, d); +inline void cuda_min(dim3 Gr, dim3 Bl, float *mat, const float *other, + MatrixDim mat_d, int other_stride) { + cudaF_min(Gr, Bl, mat, other, mat_d, other_stride); } -inline void cuda_add_vec2(dim3 Gr, dim3 Bl, double *mat, const double *vec, - const double alpha, int dim) { - cudaD_add_vec2(Gr, Bl, mat, vec, alpha, dim); +inline void cuda_min_mat_cols(int Gr, int Bl, double* result, const double* mat, + const MatrixDim d) { + cudaD_min_mat_cols(Gr, Bl, result, mat, d); } -inline void cuda_scale_diag_packed(int Gr, int Bl, double* mat, double value, - int dim) { - cudaD_scale_diag_packed(Gr, Bl, mat, value, dim); +inline void cuda_min_mat_cols(int Gr, int Bl, float* result, const float* mat, + const MatrixDim d) { + cudaF_min_mat_cols(Gr, Bl, result, mat, d); } -inline void cuda_scale(dim3 Gr, dim3 Bl, double *mat, double value, - MatrixDim d) { - cudaD_scale(Gr, Bl, mat, value, d); +inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, + const double *scale, MatrixDim d) { + cudaD_mul_cols_vec(Gr, Bl, mat, scale, d); } -inline void cuda_apply_log(dim3 Gr, dim3 Bl, double *mat, MatrixDim d) { - cudaD_apply_log(Gr, Bl, mat, d); +inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, + MatrixDim d) { + cudaF_mul_cols_vec(Gr, Bl, mat, scale, d); } inline void cuda_mul_elements(dim3 Gr, dim3 Bl, double *mat, const double *A, MatrixDim dst_d, int src_stride) { cudaD_mul_elements(Gr, Bl, mat, A, dst_d, src_stride); } -inline void cuda_div_elements(dim3 Gr, dim3 Bl, double *mat, const double *A, +inline void cuda_mul_elements(dim3 Gr, dim3 Bl, float *mat, const float *A, MatrixDim dst_d, int src_stride) { - cudaD_div_elements(Gr, Bl, mat, A, dst_d, src_stride); -} -inline void cuda_max(dim3 Gr, dim3 Bl, double *mat, const double *A, - MatrixDim dst_d, int src_stride) { - cudaD_max(Gr, Bl, mat, A, dst_d, src_stride); -} -inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, - const double *scale, MatrixDim d) { - cudaD_mul_cols_vec(Gr, Bl, mat, scale, d); -} -inline void cuda_mul_rows_vec(dim3 Gr, dim3 Bl, double *mat, - const double *scale, MatrixDim d) { - cudaD_mul_rows_vec(Gr, Bl, mat, scale, d); + cudaF_mul_elements(Gr, Bl, mat, A, dst_d, src_stride); } inline void cuda_mul_rows_group_mat(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride, int group_size) { cudaD_mul_rows_group_mat(Gr, Bl, y, x, d, src_stride, group_size); } - -inline void cuda_diff_group_pnorm(dim3 Gr, dim3 Bl, double *id, - const double *iv, const double *ov, - const double* od, MatrixDim id_dim, - int iv_stride, int ov_stride, int od_stride, - int group_size, double power) { - cudaD_diff_group_pnorm(Gr, Bl, id, iv, ov, od, id_dim, iv_stride, ov_stride, - od_stride, group_size, power); +inline void cuda_mul_rows_group_mat(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride, + int group_size) { + cudaF_mul_rows_group_mat(Gr, Bl, y, x, d, src_stride, group_size); } -inline void cuda_calc_group_max_deriv(dim3 Gr, dim3 Bl, double *y, - const double *x1, const double *x2, - MatrixDim y_dim, int x1_stride, - int x2_stride, int group_size) { - cudaD_calc_group_max_deriv(Gr, Bl, y, x1, x2, y_dim, x1_stride, x2_stride, - group_size); +inline void cuda_mul_rows_vec(dim3 Gr, dim3 Bl, double *mat, + const double *scale, MatrixDim d) { + cudaD_mul_rows_vec(Gr, Bl, mat, scale, d); } -inline void cuda_add_mat(dim3 Gr, dim3 Bl, double alpha, const double *src, - double *dst, MatrixDim d, int src_stride, - int A_trans) { - cudaD_add_mat(Gr, Bl, alpha, src, dst, d, src_stride, A_trans); +inline void cuda_mul_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, + MatrixDim d) { + cudaF_mul_rows_vec(Gr, Bl, mat, scale, d); } -inline void cuda_add_mat_blocks(dim3 Gr, dim3 Bl, double alpha, - const double *src, int32_cuda num_row_blocks, - int32_cuda num_col_blocks, double *dst, - MatrixDim d, int src_stride, int A_trans) { - cudaD_add_mat_blocks(Gr, Bl, alpha, src, num_row_blocks, num_col_blocks, dst, - d, src_stride, A_trans); +inline void cuda_normalize_per_row(size_t Gr, size_t Bl, double *y, + int y_stride, const double *x, MatrixDim x_d, + double target_rms, bool add_log_stddev) { + cudaD_normalize_per_row(Gr, Bl, y, y_stride, x, x_d, target_rms, + add_log_stddev); } -inline void cuda_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const double *A, - const double *B, const double *C, - double *dst, MatrixDim d, int stride_a, - int stride_b, int stride_c) { - cudaD_set_mat_mat_div_mat(Gr, Bl, A, B, C, dst, d, stride_a, stride_b, - stride_c); +inline void cuda_normalize_per_row(size_t Gr, size_t Bl, float *y, int y_stride, + const float *x, MatrixDim x_d, + float target_rms, bool add_log_stddev) { + cudaF_normalize_per_row(Gr, Bl, y, y_stride, x, x_d, target_rms, + add_log_stddev); } -inline void cuda_add_vec_to_cols(dim3 Gr, dim3 Bl, double alpha, - const double *col, double beta, double *dst, - MatrixDim d) { - cudaD_add_vec_to_cols(Gr, Bl, alpha, col, beta, dst, d); +inline void cuda_one(int Gr, int Bl, double* x, int dim) { + cudaD_one(Gr, Bl, x, dim); } -inline void cuda_add_vec_to_rows(dim3 Gr, dim3 Bl, double alpha, - const double *row, double beta, double *dst, - MatrixDim d) { - cudaD_add_vec_to_rows(Gr, Bl, alpha, row, beta, dst, d); +inline void cuda_one(int Gr, int Bl, float* x, int dim) { + cudaF_one(Gr, Bl, x, dim); } -inline void cuda_sy_add_tr2(dim3 Gr, dim3 Bl, double alpha, double beta, - const double* T, MatrixDim tdim, double *S, - MatrixDim sdim) { - cudaD_sy_add_tr2(Gr, Bl, alpha, beta, T, tdim, S, sdim); +inline void cuda_parametric_relu(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride, const double *a, + const double *b) { + cudaD_parametric_relu(Gr, Bl, y, x, d, src_stride, a, b); } -inline void cuda_add_mat_diag_vec(dim3 Gr, dim3 Bl, double alpha, double *mat, - MatrixDim mat_dim, const double *mat2, - int mat2_row_stride, int mat2_col_stride, - const double *vec, double beta) { - cudaD_add_mat_diag_vec(Gr, Bl, alpha, mat, mat_dim, mat2, mat2_row_stride, - mat2_col_stride, vec, beta); +inline void cuda_parametric_relu(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride, const float *a, + const float *b) { + cudaF_parametric_relu(Gr, Bl, y, x, d, src_stride, a, b); } -inline void cuda_add_mat_mat_elements(dim3 Gr, dim3 Bl, double *data, - const double *srcA_data, - const double *srcB_data, MatrixDim dim, - int srcA_stride, int srcB_stride, - double alpha, double beta) { - cudaD_add_mat_mat_elements(Gr, Bl, data, srcA_data, srcB_data, dim, - srcA_stride, srcB_stride, alpha, beta); +inline void cuda_randomize(dim3 Gr, dim3 Bl, double *y, const double *x, + const int32_cuda *copy_from, MatrixDim d_out, + MatrixDim d_in) { + cudaD_randomize(Gr, Bl, y, x, copy_from, d_out, d_in); } - -/* - * CuVector - */ - -inline void cuda_max_mat_cols(int Gr, int Bl, double* result, const double* mat, - const MatrixDim d) { - cudaD_max_mat_cols(Gr, Bl, result, mat, d); +inline void cuda_randomize(dim3 Gr, dim3 Bl, float *y, const float *x, + const int32_cuda *copy_from, MatrixDim d_out, + MatrixDim d_in) { + cudaF_randomize(Gr, Bl, y, x, copy_from, d_out, d_in); } -inline void cuda_min_mat_cols(int Gr, int Bl, double* result, const double* mat, - const MatrixDim d) { - cudaD_min_mat_cols(Gr, Bl, result, mat, d); +inline void cuda_regularize_l1(dim3 Gr, dim3 Bl, double *wei, double *grad, + double l1, double lr, MatrixDim d, + int stride_grad) { + cudaD_regularize_l1(Gr, Bl, wei, grad, l1, lr, d, stride_grad); } -inline void cuda_sum_mat_cols(int Gr, int Bl, double* result, const double* mat, - const MatrixDim d) { - cudaD_sum_mat_cols(Gr, Bl, result, mat, d); +inline void cuda_regularize_l1(dim3 Gr, dim3 Bl, float *wei, float *grad, + float l1, float lr, MatrixDim d, + int stride_grad) { + cudaF_regularize_l1(Gr, Bl, wei, grad, l1, lr, d, stride_grad); } inline void cuda_replace_value(int Gr, int Bl, double *v, int dim, double orig, double changed) { cudaD_replace_value(Gr, Bl, v, dim, orig, changed); } -inline void cuda_div_rows_vec(dim3 Gr, dim3 Bl, double *mat, - const double *vec_div, MatrixDim d) { - cudaD_div_rows_vec(Gr, Bl, mat, vec_div, d); +inline void cuda_replace_value(int Gr, int Bl, float *v, int dim, float orig, + float changed) { + cudaF_replace_value(Gr, Bl, v, dim, orig, changed); +} +inline cublasStatus_t cuda_scal(cublasHandle_t handle, int n, double alpha, + double *x, int incx) { + return cublasDscal_v2(handle, n, &alpha, x, incx); +} +inline cublasStatus_t cuda_scal(cublasHandle_t handle, int n, float alpha, + float *x, int incx) { + return cublasSscal_v2(handle, n, &alpha, x, incx); +} +inline void cuda_scale_diag_packed(int Gr, int Bl, double* mat, double value, + int dim) { + cudaD_scale_diag_packed(Gr, Bl, mat, value, dim); +} +inline void cuda_scale_diag_packed(int Gr, int Bl, float* mat, float value, + int dim) { + cudaF_scale_diag_packed(Gr, Bl, mat, value, dim); +} +inline void cuda_scale(dim3 Gr, dim3 Bl, double *mat, double value, + MatrixDim d) { + cudaD_scale(Gr, Bl, mat, value, d); +} +inline void cuda_scale(dim3 Gr, dim3 Bl, float *mat, float value, MatrixDim d) { + cudaF_scale(Gr, Bl, mat, value, d); } inline void cuda_set_bias_params(int Gr, int Bl, double* v, const double* a, double param_1, double param_2, double param_3, int* flag, int dim) { cudaD_set_bias_params(Gr, Bl, v, a, param_1, param_2, param_3, flag, dim); } -inline void cuda_vec_mul_elements(int Gr, int Bl, double* v, const double* a, - int dim) { - cudaD_vec_mul_elements(Gr, Bl, v, a, dim); +inline void cuda_set_bias_params(int Gr, int Bl, float* v, const float* a, + float param_1, float param_2, float param_3, + int* flag, int dim) { + cudaF_set_bias_params(Gr, Bl, v, a, param_1, param_2, param_3, flag, dim); } -inline void cuda_vec_soft_max(int Gr, int Bl, double* v, int dim) { - cudaD_vec_soft_max(Gr, Bl, v, dim); +inline void cuda_set_const(dim3 Gr, dim3 Bl, double *mat, double value, + MatrixDim d) { + cudaD_set_const(Gr, Bl, mat, value, d); } -inline void cuda_vec_min(int Gr, int Bl, const double* v, double* value, - int dim, int inc) { - cudaD_vec_min(Gr, Bl, v, value, dim, inc); +inline void cuda_set_const(dim3 Gr, dim3 Bl, float *mat, float value, + MatrixDim d) { + cudaF_set_const(Gr, Bl, mat, value, d); } -inline void cuda_vec_max(int Gr, int Bl, const double* v, double* value, - int dim, int inc) { - cudaD_vec_max(Gr, Bl, v, value, dim, inc); +inline void cuda_set_diag(int Gr, int Bl, double* mat, double value, + MatrixDim d) { + cudaD_set_diag(Gr, Bl, mat, value, d); } -inline void cuda_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const double* A, - const double* B, MatrixDim dA, - int B_stride, double* value) { - cudaD_trace_mat_mat_trans(Gr, Bl, A, B, dA, B_stride, value); +inline void cuda_set_diag(int Gr, int Bl, float* mat, float value, + MatrixDim d) { + cudaF_set_diag(Gr, Bl, mat, value, d); } -inline void cuda_trace_mat_mat(dim3 Gr, dim3 Bl, const double* A, - const double* B, MatrixDim dA, int B_stride, - double* value) { - cudaD_trace_mat_mat(Gr, Bl, A, B, dA, B_stride, value); +inline void cuda_set_diag_packed(int Gr, int Bl, double* mat, double value, + int dim) { + cudaD_set_diag_packed(Gr, Bl, mat, value, dim); } -inline void cuda_add_diag_mat_mat_MNT(int Gr, int Bl, const double alpha, - const double* M, const MatrixDim dim_M, - const double* N, const int stride_N, - const double beta, double* v) { - cudaD_add_diag_mat_mat_MNT(Gr, Bl, alpha, M, dim_M, N, stride_N, beta, v); +inline void cuda_set_diag_packed(int Gr, int Bl, float* mat, float value, + int dim) { + cudaF_set_diag_packed(Gr, Bl, mat, value, dim); } -inline void cuda_add_diag_mat_mat_MTN(dim3 Gr, dim3 Bl, const double alpha, - const double* M, const int stride_M, - const double* N, const MatrixDim dim_N, - const double beta, double* v) { - cudaD_add_diag_mat_mat_MTN(Gr, Bl, alpha, M, stride_M, N, dim_N, beta, v); +inline void cuda_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const double *A, + const double *B, const double *C, + double *dst, MatrixDim d, int stride_a, + int stride_b, int stride_c) { + cudaD_set_mat_mat_div_mat(Gr, Bl, A, B, C, dst, d, stride_a, stride_b, + stride_c); } -inline void cuda_add_diag_mat_mat_MN(dim3 Gr, dim3 Bl, const double alpha, - const double* M, const int stride_M, - const double* N, const MatrixDim dim_N, - const double beta, double* v) { - cudaD_add_diag_mat_mat_MN(Gr, Bl, alpha, M, stride_M, N, dim_N, beta, v); +inline void cuda_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const float *A, + const float *B, const float *C, float *dst, + MatrixDim d, int stride_a, int stride_b, + int stride_c) { + cudaF_set_mat_mat_div_mat(Gr, Bl, A, B, C, dst, d, stride_a, stride_b, + stride_c); } -inline void cuda_add_vec_vec(int Gr, int Bl, double alpha, double* v, - const double* x, const double* y, double beta, - int dim) { - cudaD_add_vec_vec(Gr, Bl, alpha, v, x, y, beta, dim); +inline void cuda_set_zero_above_diag(dim3 Gr, dim3 Bl, double* mat, + MatrixDim d) { + cudaD_set_zero_above_diag(Gr, Bl, mat, d); } -inline void cuda_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, - const double* mat, MatrixDim dmat, - int dim) { - cudaD_copy_col_from_mat_df(Gr, Bl, v, col, mat, dmat, dim); +inline void cuda_set_zero_above_diag(dim3 Gr, dim3 Bl, float* mat, + MatrixDim d) { + cudaF_set_zero_above_diag(Gr, Bl, mat, d); } -inline void cuda_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, - const double* mat, MatrixDim dmat, - int dim) { - cudaD_copy_col_from_mat_fd(Gr, Bl, v, col, mat, dmat, dim); +inline void cuda_sigmoid(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride) { + cudaD_sigmoid(Gr, Bl, y, x, d, src_stride); } -inline void cuda_vec_sum(int Gr, int Bl, double* v, double* value, int dim, - int inc) { - cudaD_vec_sum(Gr, Bl, v, value, dim, inc); +inline void cuda_sigmoid(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride) { + cudaF_sigmoid(Gr, Bl, y, x, d, src_stride); } -inline void cuda_vec_copy_diag_from_packed(int Gr, int Bl, double *dst, - const double *src, int dim) { - cudaD_vec_copy_diag_from_packed(Gr, Bl, dst, src, dim); +inline void cuda_soft_hinge(dim3 Gr, dim3 Bl, double *y, const double *x, + MatrixDim d, int src_stride) { + cudaD_soft_hinge(Gr, Bl, y, x, d, src_stride); } -inline void cuda_vec_apply_floor(int Gr, int Bl, double* v, double floor_val, - float* num, int dim) { - cudaD_vec_apply_floor(Gr, Bl, v, floor_val, num, dim); +inline void cuda_soft_hinge(dim3 Gr, dim3 Bl, float *y, const float *x, + MatrixDim d, int src_stride) { + cudaF_soft_hinge(Gr, Bl, y, x, d, src_stride); } -inline void cuda_vec_apply_ceiling(int Gr, int Bl, double* v, double floor_val, - float* num, int dim) { - cudaD_vec_apply_ceiling(Gr, Bl, v, floor_val, num, dim); -} -inline void cuda_vec_apply_exp(int Gr, int Bl, double* v, int dim) { - cudaD_vec_apply_exp(Gr, Bl, v, dim); +inline void cuda_softmax_reduce(size_t Gr, size_t Bl, double *y, + const double *x, MatrixDim d, int src_stride) { + cudaD_softmax_reduce(Gr, Bl, y, x, d, src_stride); } -inline void cuda_vec_apply_log(int Gr, int Bl, double* v, double* flag, - int dim) { - cudaD_vec_apply_log(Gr, Bl, v, flag, dim); +inline void cuda_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, + MatrixDim d, int src_stride) { + cudaF_softmax_reduce(Gr, Bl, y, x, d, src_stride); } -inline void cuda_invert_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim d) { - cudaD_invert_elements(Gr, Bl, data, d); +inline void cuda_splice(dim3 Gr, dim3 Bl, double *y, const double *x, + const int32_cuda *off, MatrixDim d_out, + MatrixDim d_in) { + cudaD_splice(Gr, Bl, y, x, off, d_out, d_in); } -// B_trans nonzero if B transposed. -inline void cuda_add_mat_blockmat(dim3 Gr, dim3 Bl, double *data, MatrixDim d, - const double *Adata, int A_num_rows, - int A_num_cols, int A_row_stride, - int A_col_stride, - const CuBlockMatrixData *B_cu_data, - int B_num_blocks, double alpha, double beta, - int B_trans) { - cudaD_add_mat_blockmat(Gr, Bl, data, d, Adata, A_num_rows, A_num_cols, - A_row_stride, A_col_stride, B_cu_data, B_num_blocks, - alpha, beta, B_trans); +inline void cuda_splice(dim3 Gr, dim3 Bl, float *y, const float *x, + const int32_cuda *off, MatrixDim d_out, + MatrixDim d_in) { + cudaF_splice(Gr, Bl, y, x, off, d_out, d_in); } -inline void cuda_block_add_mat_mat(dim3 Gr, dim3 Bl, - CuBlockMatrixData *B_cu_data, int num_blocks, - const double *C_data, int C_num_cols, - int C_row_stride, int C_col_stride, - const double *D_data, int D_row_stride, - int D_col_stride, double alpha, - double beta) { - cudaD_block_add_mat_mat(Gr, Bl, B_cu_data, num_blocks, C_data, C_num_cols, - C_row_stride, C_col_stride, D_data, D_row_stride, - D_col_stride, alpha, beta); +inline void cuda_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, + MatrixDim dim, const double *src_data, + MatrixDim src_dim, + const Int32Pair *indices) { + cudaD_sum_column_ranges(Gr, Bl, data, dim, src_data, src_dim, indices); } - -/* - * cu:: - */ -inline void cuda_soft_hinge(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride) { - cudaD_soft_hinge(Gr, Bl, y, x, d, src_stride); +inline void cuda_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim, + const float *src_data, MatrixDim src_dim, + const Int32Pair *indices) { + cudaF_sum_column_ranges(Gr, Bl, data, dim, src_data, src_dim, indices); } -inline void cuda_group_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride, int group_size, - double power) { - cudaD_group_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power); +inline void cuda_sum_mat_cols(int Gr, int Bl, double* result, const double* mat, + const MatrixDim d) { + cudaD_sum_mat_cols(Gr, Bl, result, mat, d); } -inline void cuda_group_spec_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride, int group_size, - double power) { - cudaD_group_spec_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power); +inline void cuda_sum_mat_cols(int Gr, int Bl, float* result, const float* mat, + const MatrixDim d) { + cudaF_sum_mat_cols(Gr, Bl, result, mat, d); } -inline void cuda_group_max(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride, int group_size) { - cudaD_group_max(Gr, Bl, y, x, d, src_stride, group_size); +inline void cuda_sy_add_tr2(dim3 Gr, dim3 Bl, double alpha, double beta, + const double* T, MatrixDim tdim, double *S, + MatrixDim sdim) { + cudaD_sy_add_tr2(Gr, Bl, alpha, beta, T, tdim, S, sdim); } -inline void cuda_sigmoid(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride) { - cudaD_sigmoid(Gr, Bl, y, x, d, src_stride); +inline void cuda_sy_add_tr2(dim3 Gr, dim3 Bl, float alpha, float beta, + const float* T, MatrixDim tdim, float *S, + MatrixDim sdim) { + cudaF_sy_add_tr2(Gr, Bl, alpha, beta, T, tdim, S, sdim); } -inline void cuda_diff_sigmoid(dim3 Gr, dim3 Bl, double *eout, const double *e, - const double *y, MatrixDim d, int e_stride, - int y_stride) { - cudaD_diff_sigmoid(Gr, Bl, eout, e, y, d, e_stride, y_stride); +inline void cuda_take_lower(dim3 Gr, dim3 Bl, const double* x, double* y, + MatrixDim d_in) { + cudaD_take_lower(Gr, Bl, x, y, d_in); } -inline void cuda_tanh(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, - int src_stride) { - cudaD_tanh(Gr, Bl, y, x, d, src_stride); +inline void cuda_take_lower(dim3 Gr, dim3 Bl, const float* x, float* y, + MatrixDim d_in) { + cudaF_take_lower(Gr, Bl, x, y, d_in); } -inline void cuda_diff_tanh(dim3 Gr, dim3 Bl, double *eout, const double *e, - const double *y, MatrixDim d, int e_stride, - int y_stride) { - cudaD_diff_tanh(Gr, Bl, eout, e, y, d, e_stride, y_stride); +inline void cuda_take_mean(dim3 Gr, dim3 Bl, const double* x, double* y, + MatrixDim d_in) { + cudaD_take_mean(Gr, Bl, x, y, d_in); } -inline void cuda_parametric_relu(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride, - const double *a, const double *b) { - cudaD_parametric_relu(Gr,Bl,y,x,d,src_stride,a,b); +inline void cuda_take_mean(dim3 Gr, dim3 Bl, const float* x, float* y, + MatrixDim d_in) { + cudaF_take_mean(Gr, Bl, x, y, d_in); } -inline void cuda_diff_parametric_relu(dim3 Gr, dim3 Bl, double *eout, - const double *e, const double *y, - MatrixDim d, int e_stride, int y_stride, - const double *a, const double *b) { - cudaD_diff_parametric_relu(Gr,Bl,eout,e,y,d,e_stride,y_stride,a,b); +inline void cuda_take_upper(dim3 Gr, dim3 Bl, const double* x, double* y, + MatrixDim d_in) { + cudaD_take_upper(Gr, Bl, x, y, d_in); } -inline void cuda_heaviside(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride) { - cudaD_heaviside(Gr, Bl, y, x, d, src_stride); +inline void cuda_take_upper(dim3 Gr, dim3 Bl, const float* x, float* y, + MatrixDim d_in) { + cudaF_take_upper(Gr, Bl, x, y, d_in); } -inline void cuda_softmax_reduce(size_t Gr, size_t Bl, double *y, - const double *x, MatrixDim d, int src_stride) { - cudaD_softmax_reduce(Gr, Bl, y, x, d, src_stride); +inline void cuda_tanh(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, + int src_stride) { + cudaD_tanh(Gr, Bl, y, x, d, src_stride); } -inline void cuda_log_softmax_reduce(size_t Gr, size_t Bl, double *y, - const double *x, MatrixDim y_dim, - int x_stride) { - cudaD_log_softmax_reduce(Gr, Bl, y, x, y_dim, x_stride); +inline void cuda_tanh(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, + int src_stride) { + cudaF_tanh(Gr, Bl, y, x, d, src_stride); } -inline void cuda_normalize_per_row(size_t Gr, size_t Bl, double *y, - int y_stride, const double *x, MatrixDim x_d, - double target_rms, bool add_log_stddev) { - cudaD_normalize_per_row(Gr, Bl, y, y_stride, x, x_d, target_rms, - add_log_stddev); +inline void cuda_trace(int Gr, int Bl, double* mat, double* value, int dim) { + cudaD_trace(Gr, Bl, mat, value, dim); } - -inline void cuda_regularize_l1(dim3 Gr, dim3 Bl, double *wei, double *grad, - double l1, double lr, MatrixDim d, - int stride_grad) { - cudaD_regularize_l1(Gr, Bl, wei, grad, l1, lr, d, stride_grad); +inline void cuda_trace(int Gr, int Bl, float* mat, float* value, int dim) { + cudaF_trace(Gr, Bl, mat, value, dim); } -inline void cuda_find_row_max_id(dim3 Gr, dim3 Bl, const double *mat, - double *vec_val, int32_cuda *vec_id, - MatrixDim d) { - cudaD_find_row_max_id(Gr, Bl, mat, vec_val, vec_id, d); +inline void cuda_trace_mat_mat(dim3 Gr, dim3 Bl, const double* A, + const double* B, MatrixDim dA, int B_stride, + double* value) { + cudaD_trace_mat_mat(Gr, Bl, A, B, dA, B_stride, value); } -inline void cuda_diff_xent(dim3 Gr, dim3 Bl, const int32_cuda *vec_tgt, - double *mat_net_out, double *vec_log_post, - MatrixDim d) { - cudaD_diff_xent(Gr, Bl, vec_tgt, mat_net_out, vec_log_post, d); +inline void cuda_trace_mat_mat(dim3 Gr, dim3 Bl, const float* A, const float* B, + MatrixDim dA, int B_stride, float* value) { + cudaF_trace_mat_mat(Gr, Bl, A, B, dA, B_stride, value); } -inline void cuda_diff_softmax(dim3 Gr, dim3 Bl, double* x, const MatrixDim dim, - const double* value, const int value_stride, - const double* diff, const int diff_stride) { - cudaD_diff_softmax(Gr, Bl, x, dim, value, value_stride, diff, diff_stride); +inline void cuda_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const double* A, + const double* B, MatrixDim dA, + int B_stride, double* value) { + cudaD_trace_mat_mat_trans(Gr, Bl, A, B, dA, B_stride, value); } -inline void cuda_diff_log_softmax(dim3 Gr, dim3 Bl, - const MatrixDim in_deriv_dim, - const double* out_value, - const int out_value_stride, - const double* out_deriv, - const int out_deriv_stride, - double* in_deriv) { - cudaD_diff_log_softmax(Gr, Bl, in_deriv_dim, out_value, out_value_stride, - out_deriv, out_deriv_stride, in_deriv); +inline void cuda_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const float* A, + const float* B, MatrixDim dA, int B_stride, + float* value) { + cudaF_trace_mat_mat_trans(Gr, Bl, A, B, dA, B_stride, value); } -inline void cuda_copy_rows_from_vec(dim3 Gr, dim3 Bl, double *mat_out, - MatrixDim d_out, const double *v_in) { - cudaD_copy_rows_from_vec(Gr, Bl, mat_out, d_out, v_in); +inline void cuda_trace_mat_smat(dim3 Gr, dim3 Bl, const double* mat_in, + const MatrixElement* smat_in, + MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, + double* trace_vec_out) { + cudaD_trace_mat_smat(Gr, Bl, mat_in, smat_in, mat_d_in, smat_d_in, + trace_vec_out); } - -inline void cuda_randomize(dim3 Gr, dim3 Bl, double *y, const double *x, - const int32_cuda *copy_from, MatrixDim d_out, - MatrixDim d_in) { - cudaD_randomize(Gr, Bl, y, x, copy_from, d_out, d_in); +inline void cuda_trace_mat_smat(dim3 Gr, dim3 Bl, const float* mat_in, + const MatrixElement* smat_in, + MatrixDim mat_d_in, MatrixIndexT_cuda smat_d_in, + float* trace_vec_out) { + cudaF_trace_mat_smat(Gr, Bl, mat_in, smat_in, mat_d_in, smat_d_in, + trace_vec_out); } -inline void cuda_splice(dim3 Gr, dim3 Bl, double *y, const double *x, - const int32_cuda *off, MatrixDim d_out, - MatrixDim d_in) { - cudaD_splice(Gr, Bl, y, x, off, d_out, d_in); +inline void cuda_trace_mat_smat_trans(dim3 Gr, dim3 Bl, const double* mat_in, + const MatrixElement* smat_in, + MatrixDim mat_d_in, + MatrixIndexT_cuda smat_d_in, + double* trace_vec_out) { + cudaD_trace_mat_smat_trans(Gr, Bl, mat_in, smat_in, mat_d_in, smat_d_in, + trace_vec_out); } -inline void cuda_one(int Gr, int Bl, double* x, int dim) { - cudaD_one(Gr, Bl, x, dim); +inline void cuda_trace_mat_smat_trans(dim3 Gr, dim3 Bl, const float* mat_in, + const MatrixElement* smat_in, + MatrixDim mat_d_in, + MatrixIndexT_cuda smat_d_in, + float* trace_vec_out) { + cudaF_trace_mat_smat_trans(Gr, Bl, mat_in, smat_in, mat_d_in, smat_d_in, + trace_vec_out); } -inline void cuda_copy(dim3 Gr, dim3 Bl, double *y, const double *x, - const int32_cuda *copy_from, MatrixDim d_out, - MatrixDim d_in) { - cudaD_copy(Gr, Bl, y, x, copy_from, d_out, d_in); +inline void cuda_vec_apply_ceiling(int Gr, int Bl, double* v, double floor_val, + float* num, int dim) { + cudaD_vec_apply_ceiling(Gr, Bl, v, floor_val, num, dim); } -inline void cuda_copy_from_sp(dim3 Gr, dim3 Bl, const double* x, double* y, - MatrixDim d_out) { - cudaD_copy_from_sp(Gr, Bl, x, y, d_out); +inline void cuda_vec_apply_ceiling(int Gr, int Bl, float* v, float floor_val, + float* num, int dim) { + cudaF_vec_apply_ceiling(Gr, Bl, v, floor_val, num, dim); } -inline void cuda_take_lower(dim3 Gr, dim3 Bl, const double* x, double* y, - MatrixDim d_in) { - cudaD_take_lower(Gr, Bl, x, y, d_in); +inline void cuda_vec_apply_exp(int Gr, int Bl, double* v, int dim) { + cudaD_vec_apply_exp(Gr, Bl, v, dim); } -inline void cuda_take_upper(dim3 Gr, dim3 Bl, const double* x, double* y, - MatrixDim d_in) { - cudaD_take_upper(Gr, Bl, x, y, d_in); +inline void cuda_vec_apply_exp(int Gr, int Bl, float* v, int dim) { + cudaF_vec_apply_exp(Gr, Bl, v, dim); } -inline void cuda_take_mean(dim3 Gr, dim3 Bl, const double* x, double* y, - MatrixDim d_in) { - cudaD_take_mean(Gr, Bl, x, y, d_in); +inline void cuda_vec_apply_floor(int Gr, int Bl, double* v, double floor_val, + float* num, int dim) { + cudaD_vec_apply_floor(Gr, Bl, v, floor_val, num, dim); } -inline void cuda_matrix_add_elements(dim3 Gr, dim3 Bl, double *data, - MatrixDim dim, double alpha, - MatrixElement* x, - int num_elements) { - cudaD_matrix_add_elements(Gr, Bl, data, dim, alpha, x, num_elements); +inline void cuda_vec_apply_floor(int Gr, int Bl, float* v, float floor_val, + float* num, int dim) { + cudaF_vec_apply_floor(Gr, Bl, v, floor_val, num, dim); } -inline void cuda_matrix_add_indexed_values(dim3 Gr, dim3 Bl, MatrixDim dim, - double alpha, - const Int32Pair* indices, - const double* x, int s, - double* data) { - cudaD_matrix_add_indexed_values(Gr, Bl, dim, alpha, indices, x, s, data); +inline void cuda_vec_apply_log(int Gr, int Bl, double* v, double* flag, + int dim) { + cudaD_vec_apply_log(Gr, Bl, v, flag, dim); } -inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement* x, - int32 size, const double* z, MatrixDim d, - double* z2, MatrixDim d2, double* t) { - cudaD_comp_obj_deriv(Gr, Bl, x, size, z, d, z2, d2, t); +inline void cuda_vec_apply_log(int Gr, int Bl, float* v, float* flag, int dim) { + cudaF_vec_apply_log(Gr, Bl, v, flag, dim); } -inline void cuda_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, - MatrixDim dim, const double *src_data, - MatrixDim src_dim, - const Int32Pair *indices) { - cudaD_sum_column_ranges(Gr, Bl, data, dim, src_data, src_dim, indices); +inline void cuda_vec_copy_diag_from_packed(int Gr, int Bl, double *dst, + const double *src, int dim) { + cudaD_vec_copy_diag_from_packed(Gr, Bl, dst, src, dim); } -inline void cuda_add_row_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, - const double *src_data, MatrixDim src_dim, - const Int32Pair *indexes) { - cudaD_add_row_ranges(Gr, Bl, data, dim, src_data, src_dim, indexes); +inline void cuda_vec_copy_diag_from_packed(int Gr, int Bl, float *dst, + const float *src, int dim) { + cudaF_vec_copy_diag_from_packed(Gr, Bl, dst, src, dim); } -inline void cuda_matrix_lookup(dim3 Gr, dim3 Bl, const double *data, - MatrixDim dim, const Int32Pair *indices, - int indices_size, double *output) { - cudaD_matrix_lookup(Gr, Bl, data, dim, indices, indices_size, output); +inline void cuda_vec_max(int Gr, int Bl, const double* v, double* value, + int dim, int inc) { + cudaD_vec_max(Gr, Bl, v, value, dim, inc); } - -inline void cuda_equal_element_mask(dim3 Gr, dim3 Bl, const double *mat1, - const double *mat2, double *mask, - MatrixDim mat1_dim, int mat2_stride, - int mask_stride) { - cudaD_equal_element_mask(Gr, Bl, mat1, mat2, mask, mat1_dim, mat2_stride, - mask_stride); +inline void cuda_vec_max(int Gr, int Bl, const float* v, float* value, int dim, + int inc) { + cudaF_vec_max(Gr, Bl, v, value, dim, inc); } - -// Also include some template-friendly wrappers of cublas functions: -inline cublasStatus_t cuda_axpy(cublasHandle_t handle, int n, float alpha, - const float *x, int incx, float *y, int incy) { - return cublasSaxpy_v2(handle, n, &alpha, x, incx, y, incy); +inline void cuda_vec_min(int Gr, int Bl, const double* v, double* value, + int dim, int inc) { + cudaD_vec_min(Gr, Bl, v, value, dim, inc); } -inline cublasStatus_t cuda_axpy(cublasHandle_t handle, int n, double alpha, - const double *x, int incx, double *y, - int incy) { - return cublasDaxpy_v2(handle, n, &alpha, x, incx, y, incy); +inline void cuda_vec_min(int Gr, int Bl, const float* v, float* value, int dim, + int inc) { + cudaF_vec_min(Gr, Bl, v, value, dim, inc); } -inline cublasStatus_t cuda_scal(cublasHandle_t handle, int n, float alpha, - float *x, int incx) { - return cublasSscal_v2(handle, n, &alpha, x, incx); +inline void cuda_vec_mul_elements(int Gr, int Bl, double* v, const double* a, + int dim) { + cudaD_vec_mul_elements(Gr, Bl, v, a, dim); } -inline cublasStatus_t cuda_scal(cublasHandle_t handle, int n, double alpha, - double *x, int incx) { - return cublasDscal_v2(handle, n, &alpha, x, incx); +inline void cuda_vec_mul_elements(int Gr, int Bl, float* v, const float* a, + int dim) { + cudaF_vec_mul_elements(Gr, Bl, v, a, dim); } - -inline void cuda_lstm_nonlinearity(dim3 Gr, dim3 Bl, const double* in, - const int in_stride, const double* params, - const int params_stride, - const int out_stride, const int cell_dim, - const int num_rows, double* out) { - cudaD_lstm_nonlinearity(Gr, Bl, in, in_stride, params, params_stride, - out_stride, cell_dim, num_rows, out); +inline void cuda_vec_soft_max(int Gr, int Bl, double* v, int dim) { + cudaD_vec_soft_max(Gr, Bl, v, dim); } -inline void cuda_lstm_nonlinearity(dim3 Gr, dim3 Bl, const float* in, - const int in_stride, const float* params, - const int params_stride, - const int out_stride, const int cell_dim, - const int num_rows, float* out) { - cudaF_lstm_nonlinearity(Gr, Bl, in, in_stride, params, params_stride, - out_stride, cell_dim, num_rows, out); +inline void cuda_vec_soft_max(int Gr, int Bl, float* v, int dim) { + cudaF_vec_soft_max(Gr, Bl, v, dim); } -inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, - const int num_rows, const double* input, - const int input_stride, - const double* params, - const int params_stride, - const double* output_deriv, - const int output_deriv_stride, - const double* deriv_sum_in, - const int deriv_sum_in_stride, - const double* self_repair_config, - double count, double* input_deriv, - const int input_deriv_stride, - double* params_deriv, - const int params_deriv_stride, - double* value_sum_out, - const int value_sum_out_stride, - double* deriv_sum_out, - const int deriv_sum_out_stride, - double* self_repair_sum_out, - const int self_repair_sum_out_stride) { - cudaD_diff_lstm_nonlinearity(Gr, Bl, cell_dim, num_rows, input, input_stride, - params, params_stride, output_deriv, - output_deriv_stride, deriv_sum_in, - deriv_sum_in_stride, self_repair_config, count, - input_deriv, input_deriv_stride, params_deriv, - params_deriv_stride, value_sum_out, - value_sum_out_stride, deriv_sum_out, - deriv_sum_out_stride, self_repair_sum_out, - self_repair_sum_out_stride); +inline void cuda_vec_sum(int Gr, int Bl, double* v, double* value, int dim, + int inc) { + cudaD_vec_sum(Gr, Bl, v, value, dim, inc); } -inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, - const int num_rows, const float* input, - const int input_stride, - const float* params, - const int params_stride, - const float* output_deriv, - const int output_deriv_stride, - const double* deriv_sum_in, - const int deriv_sum_in_stride, - const float* self_repair_config, - double count, float* input_deriv, - const int input_deriv_stride, - float* params_deriv, - const int params_deriv_stride, - double* value_sum_out, - const int value_sum_out_stride, - double* deriv_sum_out, - const int deriv_sum_out_stride, - float* self_repair_sum_out, - const int self_repair_sum_out_stride) { - cudaF_diff_lstm_nonlinearity(Gr, Bl, cell_dim, num_rows, input, input_stride, - params, params_stride, output_deriv, - output_deriv_stride, deriv_sum_in, - deriv_sum_in_stride, self_repair_config, count, - input_deriv, input_deriv_stride, params_deriv, - params_deriv_stride, value_sum_out, - value_sum_out_stride, deriv_sum_out, - deriv_sum_out_stride, self_repair_sum_out, - self_repair_sum_out_stride); +inline void cuda_vec_sum(int Gr, int Bl, float* v, float* value, int dim, + int inc) { + cudaF_vec_sum(Gr, Bl, v, value, dim, inc); } } // namespace kaldi diff --git a/src/cudamatrix/cu-math-test.cc b/src/cudamatrix/cu-math-test.cc index c7a01cf9aa6..daf5c708465 100644 --- a/src/cudamatrix/cu-math-test.cc +++ b/src/cudamatrix/cu-math-test.cc @@ -144,7 +144,8 @@ static void UnitTestCuMathComputeLstmNonlinearity() { for (int i = 0; i < 3; i++) { int32 num_rows = 1 + Rand() % 100; int32 cell_dim = 1 + Rand() % 2000; - Matrix Hinput(num_rows, 5 * cell_dim); + int32 dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); + Matrix Hinput(num_rows, 5 * cell_dim + dropout_dim); Matrix Hparams(3, cell_dim); Matrix Houtput(num_rows, 2 * cell_dim); Hinput.SetRandn(); @@ -161,11 +162,12 @@ static void UnitTestCuMathComputeLstmNonlinearity() { AssertEqual(Houtput, HDoutput); } - for (int i = 16; i <= 2048; i *= 2) { + for (int i = 16; i <= 1024; i *= 2) { BaseFloat time_in_secs = 0.025; int32 num_rows = i; int32 cell_dim = i; - CuMatrix input(num_rows, 5 * cell_dim); + int32 dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); + CuMatrix input(num_rows, 5 * cell_dim + dropout_dim); CuMatrix params(3, cell_dim); CuMatrix output(num_rows, 2 * cell_dim); input.SetRandn(); @@ -180,6 +182,8 @@ static void UnitTestCuMathComputeLstmNonlinearity() { KALDI_LOG << "For ComputeLstmNonlinearity" << (sizeof(Real)==8 ? "" : "") << ", for dim = " << i << ", speed was " << gflops << " gigaflops"; + if (tim.Elapsed() > 0.05) + break; } } @@ -188,7 +192,8 @@ void UnitTestLstmNonlinearity() { // problem dimensions. int32 num_rows = RandInt(5, 20), - cell_dim = RandInt(2, 200); + cell_dim = RandInt(2, 200), + dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); // Pick the (input or params block), and output block, for which we'll // spot-check the derivative values. This will give us test failures @@ -205,7 +210,7 @@ void UnitTestLstmNonlinearity() { test_params = -1; - CuMatrix input(num_rows, cell_dim * 5), + CuMatrix input(num_rows, cell_dim * 5 + dropout_dim), params(3, cell_dim), output_deriv(num_rows, cell_dim * 2); input.SetRandn(); @@ -228,7 +233,7 @@ void UnitTestLstmNonlinearity() { CuVector self_repair_config(10.0); // leave at zero... we don't really test this here. CuMatrix self_repair_sum(5, cell_dim), - input_deriv(num_rows, 5 * cell_dim), + input_deriv(num_rows, 5 * cell_dim + dropout_dim), params_deriv(3, cell_dim); double count_in = 0.0; @@ -247,7 +252,7 @@ void UnitTestLstmNonlinearity() { measured_objf_change(test_dim); for (int32 i = 0; i < test_dim; i++) { - CuMatrix delta_input(num_rows, 5 * cell_dim), + CuMatrix delta_input(num_rows, 5 * cell_dim + dropout_dim), delta_params(3, cell_dim); if (test_input >= 0) { delta_input.ColRange(test_input * cell_dim, cell_dim).SetRandn(); @@ -258,12 +263,9 @@ void UnitTestLstmNonlinearity() { delta_params.Scale(delta); } - - predicted_objf_change(i) = TraceMatMat(delta_input, input_deriv, kTrans) + TraceMatMat(delta_params, params_deriv, kTrans); - CuMatrix perturbed_input(input); perturbed_input.AddMat(1.0, delta_input); @@ -278,7 +280,9 @@ void UnitTestLstmNonlinearity() { measured_objf_change(i) = objf_change; } KALDI_LOG << "LSTM nonlinearity test: num_rows=" << num_rows - << ", cell_dim=" << cell_dim << ", test_input=" << test_input + << ", cell_dim=" << cell_dim + << ", dropout_dim=" << dropout_dim + << ", test_input=" << test_input << ", test_params=" << test_params << ", test_output=" << test_output << ", predicted_objf_change=" << predicted_objf_change @@ -294,16 +298,17 @@ template static void UnitTestBackpropLstmNonlinearity() { for (int i = 0; i < 3; i++) { int32 num_rows = 1 + Rand() % 200; - int32 cell_dim = 1 + Rand() % 2000; + int32 cell_dim = 1 + Rand() % 2000, + dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); // KALDI_LOG << num_rows << ", " << cell_dim; - Matrix hinput(num_rows, 5 * cell_dim); + Matrix hinput(num_rows, 5 * cell_dim + dropout_dim); Matrix hparams(3, cell_dim); Matrix houtput_deriv(num_rows, 2 * cell_dim); Matrix hderiv_sum_in(5, cell_dim); Vector hself_repair_config(10); double count_in; - Matrix hinput_deriv(num_rows, 5 * cell_dim); + Matrix hinput_deriv(num_rows, 5 * cell_dim + dropout_dim); Matrix hparams_deriv(3, cell_dim); Matrix hvalue_sum_out(5, cell_dim); Matrix hderiv_sum_out(5, cell_dim); @@ -407,15 +412,16 @@ static void UnitTestBackpropLstmNonlinearity() { BaseFloat time_in_secs = 0.025; int32 num_rows = i; int32 cell_dim = i; + int32 dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); - CuMatrix input(num_rows, 5 * cell_dim); + CuMatrix input(num_rows, 5 * cell_dim + dropout_dim); CuMatrix params(3, cell_dim); CuMatrix output_deriv(num_rows, 2 * cell_dim); CuMatrix deriv_sum_in(5, cell_dim); CuVector self_repair_config(10); double count_in; - CuMatrix input_deriv(num_rows, 5 * cell_dim); + CuMatrix input_deriv(num_rows, 5 * cell_dim + dropout_dim); CuMatrix params_deriv(3, cell_dim); CuMatrix value_sum_out(5, cell_dim); CuMatrix deriv_sum_out(5, cell_dim); @@ -441,6 +447,8 @@ static void UnitTestBackpropLstmNonlinearity() { KALDI_LOG << "For BackpropLstmNonlinearity" << (sizeof(Real) == 8 ? "" : "") << ", for dim = " << i << ", speed was " << gflops << " gigaflops"; + if (tim.Elapsed() > 0.05) + break; } } @@ -506,12 +514,115 @@ static void UnitTestCuMathNormalizePerRow() { BaseFloat gflops = ((BaseFloat) dim * dim * iter) / (tim.Elapsed() * 1.0e+09); - KALDI_LOG << "For CuMatrix::NormalizePerRow" + KALDI_LOG << "For CuMath::NormalizePerRow" << (sizeof(Real)==8?"":"") << ", for dim = " << dim << ", speed was " << gflops << " gigaflops."; + if (tim.Elapsed() > 0.05) + break; } } +template +static void UnitTestCuDiffNormalizePerRow() { + for (int32 i = 0; i < 2; i++) { + int row = 10 + Rand() % 40; + int col = 10 + Rand() % 50; + + Matrix Hi(row, col); + Matrix Ho(row, col + 1); + Matrix Hid(row, col); + Matrix Hod(row, col + 1); + Hi.SetRandn(); + Hod.SetRandn(); + Hi.Scale(5.0); + + CuMatrix Di(row, col); + CuMatrix Do(row, col + 1); + CuMatrix Did(row, col); + CuMatrix Dod(row, col + 1); + Di.CopyFromMat(Hi); + Dod.CopyFromMat(Hod); + + Real target_rms = 0.3456; + bool add_log_stddev = true; + const Real kSquaredNormFloor = 1.3552527156068805425e-20; // 2^-66 + + //gpu + cu::DiffNormalizePerRow(Di, Dod, target_rms, add_log_stddev, &Did); + + //cpu + { + MatrixBase* in_deriv = &Hid; + MatrixBase& out_deriv(Hod); + MatrixBase& in_value(Hi); + + const SubMatrix out_deriv_no_log(out_deriv, 0, out_deriv.NumRows(), + 0, in_value.NumCols()); + Vector dot_products(out_deriv.NumRows()); + dot_products.AddDiagMatMat(1.0, out_deriv_no_log, kNoTrans, in_value, + kTrans, 0.0); + Vector in_norm(in_value.NumRows()); + Real d_scaled = (in_value.NumCols() * target_rms * target_rms); + in_norm.AddDiagMat2(1.0, in_value, kNoTrans, 0.0); + if (add_log_stddev) { + Vector log_stddev_deriv(in_norm), // log_stddev deriv as dF/dy .* (x^T x)^-1 + out_deriv_for_stddev(out_deriv.NumRows(), kUndefined); + // f = log(sqrt(max(epsi, x^T x / D))) + // df/dx = epsi^2 * D < x^T x ? (1/(x^T x)) * x : 0. + // we don't compute this exactly below for the case when x^2 x is very + // small, but we do make sure that the deriv isn't infinity when the input + // is zero. + log_stddev_deriv.ApplyFloor(in_value.NumCols() * kSquaredNormFloor); + log_stddev_deriv.ApplyPow(-1.0); + out_deriv_for_stddev.CopyColFromMat(out_deriv, + (out_deriv.NumCols() - 1)); + log_stddev_deriv.MulElements(out_deriv_for_stddev); + if (in_deriv) + in_deriv->AddDiagVecMat(1.0, log_stddev_deriv, in_value, kNoTrans, + 1.0); + } + in_norm.Scale(1.0 / d_scaled); + in_norm.ApplyFloor(kSquaredNormFloor); + in_norm.ApplyPow(-0.5); + if (in_deriv) { + if (in_deriv->Data() != out_deriv_no_log.Data()) + in_deriv->AddDiagVecMat(1.0, in_norm, out_deriv_no_log, kNoTrans, + 1.0); + else + in_deriv->MulRowsVec(in_norm); + in_norm.ReplaceValue(1.0 / sqrt(kSquaredNormFloor), 0.0); + in_norm.ApplyPow(3.0); + dot_products.MulElements(in_norm); + + in_deriv->AddDiagVecMat(-1.0 / d_scaled, dot_products, in_value, + kNoTrans, 1.0); + } + + Matrix Hid2(Did); + AssertEqual(Hid, Hid2, 0.00001); + } + } + + for (int dim = 16; dim <= 1024; dim *= 2) { + BaseFloat time_in_secs = 0.025; + CuMatrix id(dim, dim), iv(dim, dim), od(dim, dim + 1); + iv.SetRandn(); + od.SetRandn(); + Timer tim; + int32 iter = 0; + for (; tim.Elapsed() < time_in_secs; iter++) { + cu::DiffNormalizePerRow(iv, od, Real(0.456), true, &id); + } + BaseFloat fdim = dim; + BaseFloat gflops = (fdim * fdim * iter) / (tim.Elapsed() * 1.0e+09); + KALDI_LOG << "For CuMath::DiffNormalizePerRow" + << (sizeof(Real)==8?"":"") + << ", for dim = " << dim << ", speed was " << gflops + << " gigaflops."; + } +} + + template void CudaMathUnitTest() { #if HAVE_CUDA == 1 @@ -525,14 +636,16 @@ template void CudaMathUnitTest() { UnitTestLstmNonlinearity(); UnitTestBackpropLstmNonlinearity(); UnitTestCuMathNormalizePerRow(); + UnitTestCuDiffNormalizePerRow(); } } // namespace kaldi int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -556,8 +669,8 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-math.cc b/src/cudamatrix/cu-math.cc index 047e808ae03..a9cd9efcfce 100644 --- a/src/cudamatrix/cu-math.cc +++ b/src/cudamatrix/cu-math.cc @@ -245,7 +245,7 @@ void Randomize(const CuMatrixBase &src, template void NormalizePerRow(const CuMatrixBase& in, const Real target_rms, const bool add_log_stddev, CuMatrixBase* out) { - const Real kSquaredNormFloor = 1.35525271560688e-20; // 2^-66 + const Real kSquaredNormFloor = 1.3552527156068805425e-20; // 2^-66 if (add_log_stddev) { KALDI_ASSERT(in.NumRows() == out->NumRows()); KALDI_ASSERT(in.NumCols() + 1 == out->NumCols()); @@ -291,6 +291,100 @@ void NormalizePerRow(const CuMatrixBase& in, const double target_rms, const bool add_log_stddev, CuMatrixBase* out); +// A note on the derivative of NormalizeComponent... +// let both row_in and row_out be vectors of dimension D. +// Let p = row_in^T row_in / (D * target_rms^2), and let +// f = 1.0 / sqrt(max(kSquaredNormFloor, p)), and we compute row_out as: +// row_out = f row_in. +// Suppose we have a quantity deriv_out which is the derivative +// of the objective function w.r.t. row_out. We want to compute +// deriv_in which is the derivative of the objective function w.r.t. +// row_in. Let the objective function be F. One term is obvious: we have +// deriv_in = f deriv_out + .... +// next we have to take into account the derivative that gets back-propagated +// through f. Obviously, dF/df = deriv_out^T row_in. +// And df/dp = (p <= kSquaredNormFloor ? 0.0 : -0.5 p^{-1.5}) = (f == 1.0 / sqrt(kSquaredNormFloor) ? 0.0 : -0.5 f^3), +// and dp/d(row_in) = 2/(D * target_rms^2) row_in. [it's vector_valued]. +// So this term in dF/d(row_in) equals: +// dF/df df/dp dp/d(row_in) = 2/(D * target_rms^2) (f == 1.0 / sqrt(kSquaredNormFloor) ? 0.0 : -0.5 f^3) (deriv_out^T row_in) row_in +// So +// deriv_in = f deriv_out + (f == 1.0 ? 0.0 : -f^3 / (D * target_rms^2) ) (deriv_out^T row_in) row_in +// if add_log_stddev_ true, the deriv_in has another term as +// dF/dx_i = dF/df . df/dx_i => df/dx_i = x_i/(x^T x) +template +void DiffNormalizePerRow(const CuMatrixBase &in_value, + const CuMatrixBase &out_deriv, + const Real target_rms, const bool add_log_stddev, + CuMatrixBase* in_deriv) { + const Real kSquaredNormFloor = 1.3552527156068805425e-20; // 2^-66 +#if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + Timer tim; + size_t dimBlock = CU1DBLOCK; + size_t dimGrid = in_deriv->NumRows(); + cuda_diff_normalize_per_row(dimGrid, dimBlock, in_deriv->Data(), + in_deriv->Stride(), in_value.Data(), + in_value.Dim(), out_deriv.Data(), + out_deriv.Stride(), target_rms, add_log_stddev); + CU_SAFE_CALL(cudaGetLastError()); + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); + } else +#endif + { + const CuSubMatrix out_deriv_no_log(out_deriv, 0, out_deriv.NumRows(), + 0, in_value.NumCols()); + CuVector dot_products(out_deriv.NumRows()); + dot_products.AddDiagMatMat(1.0, out_deriv_no_log, kNoTrans, in_value, + kTrans, 0.0); + CuVector in_norm(in_value.NumRows()); + Real d_scaled = (in_value.NumCols() * target_rms * target_rms); + in_norm.AddDiagMat2(1.0, in_value, kNoTrans, 0.0); + + if (add_log_stddev) { + CuVector log_stddev_deriv(in_norm), // log_stddev deriv as dF/dy .* (x^T x)^-1 + out_deriv_for_stddev(out_deriv.NumRows(), kUndefined); + // f = log(sqrt(max(epsi, x^T x / D))) + // df/dx = epsi^2 * D < x^T x ? (1/(x^T x)) * x : 0. + // we don't compute this exactly below for the case when x^2 x is very + // small, but we do make sure that the deriv isn't infinity when the input + // is zero. + log_stddev_deriv.ApplyFloor(in_value.NumCols() * kSquaredNormFloor); + log_stddev_deriv.ApplyPow(-1.0); + out_deriv_for_stddev.CopyColFromMat(out_deriv, (out_deriv.NumCols() - 1)); + log_stddev_deriv.MulElements(out_deriv_for_stddev); + if (in_deriv) + in_deriv->AddDiagVecMat(1.0, log_stddev_deriv, in_value, kNoTrans, 1.0); + } + in_norm.Scale(1.0 / d_scaled); + in_norm.ApplyFloor(kSquaredNormFloor); + in_norm.ApplyPow(-0.5); + if (in_deriv) { + if (in_deriv->Data() != out_deriv_no_log.Data()) + in_deriv->AddDiagVecMat(1.0, in_norm, out_deriv_no_log, kNoTrans, 1.0); + else + in_deriv->MulRowsVec(in_norm); + in_norm.ReplaceValue(1.0 / sqrt(kSquaredNormFloor), 0.0); + in_norm.ApplyPow(3.0); + dot_products.MulElements(in_norm); + + in_deriv->AddDiagVecMat(-1.0 / d_scaled, dot_products, in_value, kNoTrans, + 1.0); + } + } +} + +template +void DiffNormalizePerRow(const CuMatrixBase &in_value, + const CuMatrixBase &out_deriv, + const float target_rms, const bool add_log_stddev, + CuMatrixBase* in_deriv); +template +void DiffNormalizePerRow(const CuMatrixBase &in_value, + const CuMatrixBase &out_deriv, + const double target_rms, const bool add_log_stddev, + CuMatrixBase* in_deriv); + + // not calling this Sigmoid to reduce the chance of future collisions. template static inline Real ScalarSigmoid(Real a) { @@ -317,10 +411,11 @@ template void CpuComputeLstmNonlinearity(const MatrixBase &input_mat, const MatrixBase ¶ms_mat, MatrixBase *output) { - int32 num_rows = input_mat.NumRows(); - int32 cell_dim = input_mat.NumCols() / 5; + int32 num_rows = input_mat.NumRows(), + input_cols = input_mat.NumCols(), + cell_dim = input_cols / 5; + KALDI_ASSERT(input_cols == (cell_dim * 5) || input_cols == (cell_dim * 5) + 3); KALDI_ASSERT(output->NumRows() == num_rows); - KALDI_ASSERT(input_mat.NumCols() % 5 == 0); KALDI_ASSERT(params_mat.NumRows() == 3); KALDI_ASSERT(params_mat.NumCols() == cell_dim); KALDI_ASSERT(output->NumCols() == 2 * cell_dim); @@ -330,6 +425,11 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input_mat, int32 params_stride = params_mat.Stride(); for (int32 r = 0; r < num_rows; r++) { const Real *input_row = input_mat.RowData(r); + // i_scale and f_scale relate to dropout, they will normally be 1.0. + Real i_scale = (input_cols == cell_dim*5 ? 1.0:input_row[cell_dim*5]), + f_scale = (input_cols == cell_dim*5 ? 1.0:input_row[cell_dim*5 + 1]), + o_scale = (input_cols == cell_dim*5 ? 1.0:input_row[cell_dim*5 + 2]); + Real *output_row = output_mat.RowData(r); for (int32 c = 0; c < cell_dim; c++) { Real i_part = input_row[c]; @@ -342,9 +442,9 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input_mat, Real w_oc = params_data[c + params_stride * 2]; Real i_t = ScalarSigmoid(i_part + w_ic * c_prev); Real f_t = ScalarSigmoid(f_part + w_fc * c_prev); - Real c_t = f_t * c_prev + i_t * ScalarTanh(c_part); + Real c_t = f_t * f_scale * c_prev + i_t * i_scale * ScalarTanh(c_part); Real o_t = ScalarSigmoid(o_part + w_oc * c_t); - Real m_t = o_t * ScalarTanh(c_t); + Real m_t = o_t * o_scale * ScalarTanh(c_t); output_row[c] = c_t; output_row[c + cell_dim] = m_t; } @@ -355,10 +455,11 @@ template void ComputeLstmNonlinearity(const CuMatrixBase &input, const CuMatrixBase ¶ms, CuMatrixBase *output) { - int32 num_rows = input.NumRows(); - int32 cell_dim = input.NumCols() / 5; + int32 num_rows = input.NumRows(), + input_cols = input.NumCols(), + cell_dim = input_cols / 5; + KALDI_ASSERT(input_cols == (cell_dim * 5) || input_cols == (cell_dim * 5) + 3); KALDI_ASSERT(output->NumRows() == num_rows); - KALDI_ASSERT(input.NumCols() % 5 == 0); KALDI_ASSERT(params.NumRows() == 3); KALDI_ASSERT(params.NumCols() == cell_dim); KALDI_ASSERT(output->NumCols() == 2 * cell_dim); @@ -367,6 +468,8 @@ void ComputeLstmNonlinearity(const CuMatrixBase &input, if (CuDevice::Instantiate().Enabled()) { Timer tim; + int have_dropout_mask = (input_cols == (cell_dim * 5) + 3); + // Each thread block is working on 1 row of the data. // It's best that cell dim is a multiple fo CU1DBLOCK dim3 dimBlock(CU1DBLOCK); @@ -374,7 +477,7 @@ void ComputeLstmNonlinearity(const CuMatrixBase &input, cuda_lstm_nonlinearity(dimGrid, dimBlock, input.Data(), input.Stride(), params.Data(), params.Stride(), output->Stride(), - cell_dim, num_rows, output->Data()); + cell_dim, have_dropout_mask, num_rows, output->Data()); CU_SAFE_CALL(cudaGetLastError()); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); @@ -414,10 +517,12 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, MatrixBase *value_sum_out, MatrixBase *deriv_sum_out, MatrixBase *self_repair_sum_out) { - int32 num_rows = input.NumRows(); - int32 cell_dim = input.NumCols() / 5; + int32 num_rows = input.NumRows(), + input_cols = input + .NumCols(), + cell_dim = input.NumCols() / 5; // Check dimensions. - KALDI_ASSERT(input.NumCols() % 5 == 0); + KALDI_ASSERT(input_cols == (cell_dim * 5) || input_cols == (cell_dim * 5) + 3); KALDI_ASSERT(params.NumRows() == 3); KALDI_ASSERT(params.NumCols() == cell_dim); KALDI_ASSERT(output_deriv.NumRows() == num_rows); @@ -481,15 +586,15 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, // Sigmoid(i_t_input), Sigmoid(f_t_input), // Tanh(c_part), Sigmoid(o_t_input), Tanh(c_t) Real i_t_self_repair = ( - deriv_sum_in(0, c) / count < sr_config(0) ? sr_config(5) : 0.0); + deriv_sum_in_mat(0, c) / count < sr_config(0) ? sr_config(5) : 0.0); Real f_t_self_repair = ( - deriv_sum_in(1, c) / count < sr_config(1) ? sr_config(6) : 0.0); + deriv_sum_in_mat(1, c) / count < sr_config(1) ? sr_config(6) : 0.0); Real c_part_self_repair = ( - deriv_sum_in(2, c) / count < sr_config(2) ? sr_config(7) : 0.0); + deriv_sum_in_mat(2, c) / count < sr_config(2) ? sr_config(7) : 0.0); Real o_t_self_repair = ( - deriv_sum_in(3, c) / count < sr_config(3) ? sr_config(8) : 0.0); + deriv_sum_in_mat(3, c) / count < sr_config(3) ? sr_config(8) : 0.0); Real c_t_self_repair = ( - deriv_sum_in(4, c) / count < sr_config(4) ? sr_config(9) : 0.0); + deriv_sum_in_mat(4, c) / count < sr_config(4) ? sr_config(9) : 0.0); // Note on how we add self-repair for sigmoids/tanh's. If self-repair // is activated for this unit, then... // For sigmoids we'd add -self_repair_scale * (2 * sigmoid(x) - 1.0) @@ -512,6 +617,14 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, c_part = input_mat(r, c + 2 * cell_dim), o_part = input_mat(r, c + 3 * cell_dim), c_prev = input_mat(r, c + 4 * cell_dim); + + Real i_scale = (input_cols == cell_dim * 5 ? 1.0 : + input_mat(r, cell_dim * 5)), + f_scale = (input_cols == cell_dim * 5 ? 1.0 : + input_mat(r, cell_dim * 5 + 1)), + o_scale = (input_cols == cell_dim * 5 ? 1.0 : + input_mat(r, cell_dim * 5 + 2)); + // For greater clarity, we give some of the quantities in the // forward equations their own names. Real i_t_input = i_part + w_ic * c_prev, @@ -519,7 +632,7 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, f_t_input = f_part + w_fc * c_prev, f_t = ScalarSigmoid(f_t_input), tanh_c_part = ScalarTanh(c_part), - c_t = f_t * c_prev + i_t * tanh_c_part, + c_t = f_t * f_scale * c_prev + i_t * i_scale * tanh_c_part, o_t_input = o_part + w_oc * c_t, o_t = ScalarSigmoid(o_t_input), tanh_c_t = ScalarTanh(c_t); @@ -551,25 +664,25 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, // comes directly from the output of this function. Real dc_t_out = output_deriv_mat(r, c); Real dm_t = output_deriv_mat(r, c + cell_dim); - Real dtanh_c_t = o_t * dm_t; - Real do_t = tanh_c_t * dm_t; + Real dtanh_c_t = o_t * o_scale * dm_t; + Real do_t = o_scale * tanh_c_t * dm_t; Real do_t_input = (o_t * (1.0F - o_t) * do_t - (2.0F * o_t - 1.0F) * o_t_self_repair); Real dc_t = ((1.0F - tanh_c_t * tanh_c_t) * dtanh_c_t + dc_t_out + do_t_input * w_oc) - tanh_c_t * c_t_self_repair; - Real dtanh_c_part = i_t * dc_t; - Real df_t = dc_t * c_prev; - Real df_t_input = (df_t * f_t * (1.0F - f_t) - - (2.0F * f_t - 1.0F) * f_t_self_repair); - Real di_t = dc_t * tanh_c_part; - Real di_t_input = (di_t * i_t * (1.0F - i_t) - - (2.0F * i_t - 1.0F) * i_t_self_repair); + Real dtanh_c_part = i_t * i_scale * dc_t; + Real df_t = dc_t * f_scale * c_prev; + Real df_t_input = ((df_t * f_t * (1.0F - f_t) + - (2.0F * f_t - 1.0F) * f_t_self_repair)); + Real di_t = dc_t * i_scale * tanh_c_part; + Real di_t_input = ((di_t * i_t * (1.0F - i_t) + - (2.0F * i_t - 1.0F) * i_t_self_repair)); w_ic_deriv_sum += c_prev * di_t_input; w_fc_deriv_sum += c_prev * df_t_input; w_oc_deriv_sum += c_t * do_t_input; - Real dc_prev = w_ic * di_t_input + w_fc * df_t_input + f_t * dc_t; + Real dc_prev = w_ic * di_t_input + w_fc * df_t_input + f_t * f_scale * dc_t; Real do_part = do_t_input; Real dc_part = ((1.0F - tanh_c_part * tanh_c_part) * dtanh_c_part - tanh_c_part * c_part_self_repair); @@ -605,7 +718,7 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, // deriv_sum_out and deriv_sum_in might point to the same memory. for (int32 i = 0; i < 5; i++) (*self_repair_sum_out_mat)(i, c) = - (deriv_sum_in(i, c) / count < sr_config(i) ? num_rows : 0); + (deriv_sum_in_mat(i, c) / count < sr_config(i) ? num_rows : 0); (*deriv_sum_out_mat)(0, c) += i_t_deriv_sum; (*deriv_sum_out_mat)(1, c) += f_t_deriv_sum; @@ -630,10 +743,11 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, CuMatrixBase *value_sum_out, CuMatrixBase *deriv_sum_out, CuMatrixBase *self_repair_sum_out) { - int32 num_rows = input.NumRows(); - int32 cell_dim = input.NumCols() / 5; + int32 num_rows = input.NumRows(), + cell_dim = input.NumCols() / 5, + input_cols = input.NumCols(); // Check dimensions. - KALDI_ASSERT(input.NumCols() % 5 == 0); + KALDI_ASSERT(input_cols == (cell_dim * 5) || input_cols == (cell_dim*5) + 3); KALDI_ASSERT(params.NumRows() == 3); KALDI_ASSERT(params.NumCols() == cell_dim); KALDI_ASSERT(output_deriv.NumRows() == num_rows); @@ -668,6 +782,7 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, // Each thread block is working on 1 row of the data. // It's best that cell dim is a multiple fo CU1DBLOCK + int have_dropout_mask = (input_cols == (cell_dim * 5) + 3); // Use 2D block (8x32 threads) as we need to compute column sum. // Use 1D grid to cover the data matrix width `cell_dim`. @@ -681,7 +796,8 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, dim3 dimGrid(n_blocks(cell_dim, dimBlock.x)); if (input_deriv == NULL) { if (params_deriv == NULL) { - cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, num_rows, + cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, + have_dropout_mask, num_rows, input.Data(), input.Stride(), params.Data(), params.Stride(), output_deriv.Data(), output_deriv.Stride(), deriv_sum_in.Data(), @@ -699,7 +815,8 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, 0); } else { - cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, num_rows, + cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, + have_dropout_mask, num_rows, input.Data(), input.Stride(), params.Data(), params.Stride(), output_deriv.Data(), output_deriv.Stride(), deriv_sum_in.Data(), @@ -717,7 +834,8 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, } } else { if (params_deriv == NULL) { - cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, num_rows, + cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, + have_dropout_mask, num_rows, input.Data(), input.Stride(), params.Data(), params.Stride(), output_deriv.Data(), output_deriv.Stride(), deriv_sum_in.Data(), @@ -727,7 +845,8 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, NULL, 0, NULL, 0, NULL, 0, NULL, 0); } else { - cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, num_rows, + cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, + have_dropout_mask, num_rows, input.Data(), input.Stride(), params.Data(), params.Stride(), output_deriv.Data(), output_deriv.Stride(), deriv_sum_in.Data(), diff --git a/src/cudamatrix/cu-math.h b/src/cudamatrix/cu-math.h index 9952ca5b9d2..757449b6d4e 100644 --- a/src/cudamatrix/cu-math.h +++ b/src/cudamatrix/cu-math.h @@ -88,6 +88,9 @@ void Group2norm(const CuMatrixBase &src, a multiple of 5). The column-space is interpreted as 5 consecutive blocks, each of dimension C, which we name: (i_part, f_part, c_part, o_part, c_{t-1}). + This function will also accept input of dimension N by 5C + 3, + and the three final elements will be used as scaling factors + on i_t, f_t and o_t (useful as per-frame dropout masks). @param [in] params A matrix, of dimension 3 by C, with rows containing the three diagonal parameter matrices used in LSTMs, namely w_{ic}, w_{fc} and w_{oc}. @@ -101,7 +104,14 @@ void Group2norm(const CuMatrixBase &src, o_t = Sigmoid(o_part + w_{oc}*c_t) m_t = o_t * Tanh(c_t) - + Note on dropout: if the dropout mask is provided, let the + mask values be i_t_mask, f_t_mask and o_t_mask (for each + matrix row, these are scalars while i_t, f_t and o_t are of + dimension C, because this is 'per-frame' dropout as described in + http://www.danielpovey.com/files/2017_interspeech_dropout.pdf). + Then the modification to the equations above consists of + replacing 'i_t' with 'i_t_mask * i_t' in the RHS of the equations + above, and the same type of change for f_t and o_t. */ template void ComputeLstmNonlinearity(const CuMatrixBase &input, @@ -134,6 +144,9 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input, a multiple of 5). The column-space is interpreted as 5 consecutive blocks, each of dimension C, which we name: (i_part, f_part, c_part, o_part, c_{t-1}). + This function will also accept input of dimension N by 5C + 3, + and the three final elements will be interpreted as scaling factors + on i_t, f_t and o_t (useful as per-frame dropout masks). @param [in] params The same as in ComputeLstmNonlinearity(). A matrix, of dimension 3 by C, with rows containing the three diagonal parameter matrices used in LSTMs, namely @@ -165,9 +178,13 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input, May be NULL; if not, this function writes, to this location, the backpropagated derivative of the objective function w.r.t. the 'input' matrix. This matrix should - have the same dimension as 'input' i.e. N by 5C. In - addition to the regular backpropagated derivative, the - output will include small values relating to 'self-repair'. + have the same dimension as 'input'. In addition to the + regular backpropagated derivative, the output will include + small values relating to 'self-repair'. If the input + is of column-dimension 5C + 3 (i.e. we are using dropout + masks), the derivatives w.r.t. the dropout masks will not + be set; they will retain their value prior to this + function call. @param [out] params_deriv May be NULL; if not, this is where this function *writes* [not adds] the backpropagated derivative of the objective @@ -196,23 +213,6 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input, processed outside this function into self-repair stats for diagnostics. */ -/// Normalize nonlinearity modifies the vector of activations -/// by scaling it so that the root-mean-square equals 1.0. -/// -/// The output y_i = scale * x_i, -/// and we want to RMS value of the y_i to equal target_rms, -/// so y^t y = D * target_rms^2 (if y is one row of the input). -/// we need to have scale = 1.0 / sqrt(x^t x / (D * target_rms^2)). -/// there is also flooring involved, to avoid division-by-zero -/// problems. It's important for the backprop, that the floor's -/// square root is exactly representable as float. -/// If add_log_stddev_ is true, log(max(epsi, sqrt(x^t x / D))) -/// is an extra dimension of the output. -template -void NormalizePerRow(const CuMatrixBase& in, const Real target_rms, - const bool add_log_stddev, CuMatrixBase* out); - - template void BackpropLstmNonlinearity(const CuMatrixBase &input, @@ -241,6 +241,49 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, MatrixBase *deriv_sum_out, MatrixBase *self_repair_sum_out); +/// Normalize nonlinearity modifies the vector of activations +/// by scaling it so that the root-mean-square equals 1.0. +/// +/// The output y_i = scale * x_i, +/// and we want to RMS value of the y_i to equal target_rms, +/// so y^t y = D * target_rms^2 (if y is one row of the input). +/// we need to have scale = 1.0 / sqrt(x^t x / (D * target_rms^2)). +/// there is also flooring involved, to avoid division-by-zero +/// problems. It's important for the backprop, that the floor's +/// square root is exactly representable as float. +/// If add_log_stddev_ is true, log(max(epsi, sqrt(x^t x / D))) +/// is an extra dimension of the output. +template +void NormalizePerRow(const CuMatrixBase& in, const Real target_rms, + const bool add_log_stddev, CuMatrixBase* out); + +// A note on the derivative of NormalizeComponent... +// let both row_in and row_out be vectors of dimension D. +// Let p = row_in^T row_in / (D * target_rms^2), and let +// f = 1.0 / sqrt(max(kSquaredNormFloor, p)), and we compute row_out as: +// row_out = f row_in. +// Suppose we have a quantity deriv_out which is the derivative +// of the objective function w.r.t. row_out. We want to compute +// deriv_in which is the derivative of the objective function w.r.t. +// row_in. Let the objective function be F. One term is obvious: we have +// deriv_in = f deriv_out + .... +// next we have to take into account the derivative that gets back-propagated +// through f. Obviously, dF/df = deriv_out^T row_in. +// And df/dp = (p <= kSquaredNormFloor ? 0.0 : -0.5 p^{-1.5}) = (f == 1.0 / sqrt(kSquaredNormFloor) ? 0.0 : -0.5 f^3), +// and dp/d(row_in) = 2/(D * target_rms^2) row_in. [it's vector_valued]. +// So this term in dF/d(row_in) equals: +// dF/df df/dp dp/d(row_in) = 2/(D * target_rms^2) (f == 1.0 / sqrt(kSquaredNormFloor) ? 0.0 : -0.5 f^3) (deriv_out^T row_in) row_in +// So +// deriv_in = f deriv_out + (f == 1.0 ? 0.0 : -f^3 / (D * target_rms^2) ) (deriv_out^T row_in) row_in +// if add_log_stddev_ true, the deriv_in has another term as +// dF/dx_i = dF/df . df/dx_i => df/dx_i = x_i/(x^T x) +template +void DiffNormalizePerRow(const CuMatrixBase &in_value, + const CuMatrixBase &out_deriv, + const Real target_rms, const bool add_log_stddev, + CuMatrixBase* in_deriv); + + } // namespace cu } // namespace kaldi diff --git a/src/cudamatrix/cu-matrix-speed-test.cc b/src/cudamatrix/cu-matrix-speed-test.cc index 12b274dc4dc..2c7106d45bf 100644 --- a/src/cudamatrix/cu-matrix-speed-test.cc +++ b/src/cudamatrix/cu-matrix-speed-test.cc @@ -164,8 +164,8 @@ template void TestCuMatrixTransposeCross(int32 dim) { AssertEqual(ref, Mf); } -template void TestCuMatrixAddMat(int32 dim, - int32 num_row_blocks, int32 num_col_blocks) { +template void TestCuMatrixAddMat(int32 dim, int32 num_row_blocks, + int32 num_col_blocks) { BaseFloat time_in_secs = 0.025; CuMatrix A(dim, dim), B(dim * num_row_blocks, dim * num_col_blocks); A.SetRandn(); @@ -181,14 +181,15 @@ template void TestCuMatrixAddMat(int32 dim, } BaseFloat fdim = dim; BaseFloat gflops = (fdim * fdim * num_row_blocks * num_col_blocks * iter) - / (tim.Elapsed() * 1.0e+09); + / (tim.Elapsed() * 1.0e+09); KALDI_LOG << "For CuMatrix::AddMat" << NameOf() << ", for dim = " - << dim << "numRowBlocks = "<< num_row_blocks << "numColBlocks = " - << num_col_blocks << ", speed was " << gflops << " gigaflops."; + << dim << "numRowBlocks = "<< num_row_blocks << "numColBlocks = " + << num_col_blocks << ", speed was " << gflops << " gigaflops."; } template void TestCuMatrixAddMatBlocks(int32 dim, - int32 num_row_blocks, int32 num_col_blocks) { + int32 num_row_blocks, + int32 num_col_blocks) { BaseFloat time_in_secs = 0.025; CuMatrix A(dim, dim), B(dim * num_row_blocks, dim * num_col_blocks); A.SetRandn(); @@ -200,10 +201,10 @@ template void TestCuMatrixAddMatBlocks(int32 dim, } BaseFloat fdim = dim; BaseFloat gflops = (fdim * fdim * num_row_blocks * num_col_blocks * iter) - / (tim.Elapsed() * 1.0e+09); + / (tim.Elapsed() * 1.0e+09); KALDI_LOG << "For CuMatrix::AddMatBlocks" << NameOf() << ", for dim = " - << dim << ", numRowBlocks = "<< num_row_blocks << ", numColBlocks = " - << num_col_blocks << ", speed was " << gflops << " gigaflops."; + << dim << ", numRowBlocks = "<< num_row_blocks << ", numColBlocks = " + << num_col_blocks << ", speed was " << gflops << " gigaflops."; } template void TestCuMatrixMatMat(int32 dim) { @@ -235,18 +236,18 @@ template void TestCuMatrixMatMatBatched(int32 dim, int32 batchCou a[i]->SetRandn(); b[i]->SetRandn(); A.push_back(new CuSubMatrix(*(a[i]), 0, a[i]->NumRows(), 0, - a[i]->NumCols())); + a[i]->NumCols())); B.push_back(new CuSubMatrix(*(b[i]), 0, b[i]->NumRows(), 0, - b[i]->NumCols())); + b[i]->NumCols())); C.push_back(new CuSubMatrix(*(c[i]), 0, c[i]->NumRows(), 0, - c[i]->NumCols())); + c[i]->NumCols())); } BaseFloat time_in_secs = 0.025; Timer tim; int32 iter = 0; for (;tim.Elapsed() < time_in_secs; iter++) { AddMatMatBatched(static_cast(1.0), C, A, kNoTrans, B, kNoTrans, - static_cast(0.0)); + static_cast(0.0)); } for (int32 i = 0; i< batchCount; i++) { delete a[i]; delete b[i]; delete c[i]; @@ -256,7 +257,7 @@ template void TestCuMatrixMatMatBatched(int32 dim, int32 batchCou BaseFloat fdim = dim; BaseFloat gflops = (fdim * fdim * fdim * iter * batchCount) / (tim.Elapsed() * 1.0e+09); KALDI_LOG << "For CuMatrix::AddMatMatBatched" << NameOf() << ", for dim = " << dim - << ", batchSize = " << batchCount << ", speed was " << gflops << " gigaflops."; + << ", batchSize = " << batchCount << ", speed was " << gflops << " gigaflops."; } template void TestCuMatrixAddDiagVecMat(int32 dim, MatrixTransposeType trans) { @@ -997,7 +998,7 @@ template void CudaMatrixSpeedTest() { TestCuMatrixAddMatBlocks(sizes[s], 3, 3); for (int32 s = 0; s < ns; s++) TestCuMatrixMatMat(sizes[s]); - for (int32 s = 0; s < ns; s++) + for (int32 s = 0; s + 1 < ns; s++) TestCuMatrixMatMatBatched(sizes[s], 10); for (int32 s = 0; s < ns; s++) { TestCuMatrixAddDiagVecMat(sizes[s], kNoTrans); @@ -1049,7 +1050,7 @@ template void CudaMatrixSpeedTest() { TestCuMatrixCopyUpperToLower(sizes[s]); for (int32 s = 0; s < ns; s++) TestCuMatrixSetZeroAboveDiag(sizes[s]); - for (int32 s = 0; s < ns; s++) + for (int32 s = 0; s + 2 < ns; s++) TestCuMatrixLookup(sizes[s]); for (int32 s = 0; s < ns; s++) TestCuMatrixCopyRows1(sizes[s]); @@ -1084,8 +1085,9 @@ template void CudaMatrixSpeedTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); else @@ -1102,9 +1104,9 @@ int main() { #else kaldi::CudaMatrixSpeedTest(); #endif - } #if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().PrintProfile(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-matrix-test.cc b/src/cudamatrix/cu-matrix-test.cc index da587e450e3..2157c97156f 100644 --- a/src/cudamatrix/cu-matrix-test.cc +++ b/src/cudamatrix/cu-matrix-test.cc @@ -376,6 +376,31 @@ static void UnitTestCuMatrixCopyRowsFromVec() { } +template +static void UnitTestCuMatrixCopyColsFromVec() { + for (int32 p = 0; p < 2; p++) { + int32 num_rows = 100 + Rand() % 255; + int32 num_cols = 100 + Rand() % 200; + + int32 vec_dim; + if (p % 2 == 0) vec_dim = num_rows; + else vec_dim = num_cols * num_rows; + + CuVector cu_vec(vec_dim); + cu_vec.SetRandn(); + Vector vec(cu_vec); + + CuMatrix cu_mat(num_rows, num_cols); + cu_mat.CopyColsFromVec(cu_vec); + Matrix mat(num_rows, num_cols); + mat.CopyColsFromVec(vec); + + Matrix mat2(cu_mat); + AssertEqual(mat, mat2); + } +} + + template static void UnitTestCuMatrixCopyRows() { for (int32 p = 0; p < 2; p++) { @@ -845,6 +870,27 @@ static void UnitTestCuMatrixMax() { AssertEqual(Ha,Ha2); } +template +static void UnitTestCuMatrixMin() { + Matrix Ha(100,100); + Matrix Hb(100,100); + Ha.SetRandn(); + Hb.SetRandn(); + + CuMatrix Da(100,100); + CuMatrix Db(100,100); + Da.CopyFromMat(Ha); + Db.CopyFromMat(Hb); + + Da.Min(Db); + Ha.Min(Hb); + + Matrix Ha2(100,100); + Da.CopyToMat(&Ha2); + + AssertEqual(Ha, Ha2); +} + template @@ -1435,13 +1481,13 @@ static void UnitTestCuMatrixAddMatMatBatched() { Ha[i]->SetRandn(); Hb[i]->SetRandn(); HA.push_back(new SubMatrix(*(Ha[i]), 0, Ha[i]->NumRows(), 0, - Ha[i]->NumCols())); + Ha[i]->NumCols())); HB.push_back(new SubMatrix(*(Hb[i]), 0, Hb[i]->NumRows(), 0, - Hb[i]->NumCols())); + Hb[i]->NumCols())); HC1.push_back(new SubMatrix(*(Hc1[i]), 0, Hc1[i]->NumRows(), 0, - Hc1[i]->NumCols())); + Hc1[i]->NumCols())); HC2.push_back(new SubMatrix(*(Hc2[i]), 0, Hc2[i]->NumRows(), 0, - Hc2[i]->NumCols())); + Hc2[i]->NumCols())); // first create a CuMatrix intance and then creat a CuSubMatrix instance from that Da[i] = new CuMatrix(200, 100); @@ -1451,19 +1497,19 @@ static void UnitTestCuMatrixAddMatMatBatched() { Da[i]->CopyFromMat(*(Ha[i])); Db[i]->CopyFromMat(*(Hb[i])); DA.push_back(new CuSubMatrix(*(Da[i]), 0, Da[i]->NumRows(), 0, - Da[i]->NumCols())); + Da[i]->NumCols())); DB.push_back(new CuSubMatrix(*(Db[i]), 0, Db[i]->NumRows(), 0, - Db[i]->NumCols())); + Db[i]->NumCols())); DC1.push_back(new CuSubMatrix(*(Dc1[i]), 0, Dc1[i]->NumRows(), 0, - Dc1[i]->NumCols())); + Dc1[i]->NumCols())); DC2.push_back(new CuSubMatrix(*(Dc2[i]), 0, Dc2[i]->NumRows(), 0, - Dc2[i]->NumCols())); + Dc2[i]->NumCols())); } AddMatMatBatched(static_cast(0.5f), DC1, DA, kNoTrans, DB, kNoTrans, - static_cast(0.0f)); + static_cast(0.0f)); AddMatMatBatched(static_cast(0.5f), DC2, DA, kTrans, DB, kTrans, - static_cast(0.0f)); + static_cast(0.0f)); // used to store results from DC1 and DC2 for equality check Matrix Hca1(200,200); @@ -1574,7 +1620,7 @@ static void UnitTestCuMatrixAddMatTp() { template static void UnitTestCuMatrixTranspose() { - for (int32 i = 1; i < 10; i++) { + for (int32 i = 1; i < 2; i++) { MatrixIndexT dimM = 5 * i + Rand() % 10, dimN = dimM; if (i % 2 == 0) dimN += 5; @@ -1582,8 +1628,11 @@ static void UnitTestCuMatrixTranspose() { CuMatrix A(dimM, dimN); A.SetRandn(); CuMatrix B(A, kTrans); - A.Transpose(); - AssertEqual(A, B); + + Matrix hA(A); + Matrix hB(B); + hB.Transpose(); + AssertEqual(hA, hB); } } @@ -2592,6 +2641,7 @@ template void CudaMatrixUnitTest() { UnitTestCuMatrixMulElements(); UnitTestCuMatrixDivElements(); UnitTestCuMatrixMax(); + UnitTestCuMatrixMin(); UnitTestCuMatrixMulColsVec(); UnitTestCuMatrixMulRowsVec(); UnitTestCuMatrixDivRowsVec(); @@ -2615,6 +2665,7 @@ template void CudaMatrixUnitTest() { UnitTestCuMatrixSumColumnRanges(); UnitTestCuMatrixCopyRows(); UnitTestCuMatrixCopyRowsFromVec(); + UnitTestCuMatrixCopyColsFromVec(); UnitTestCuMatrixCopyToRows(); UnitTestCuMatrixAddRows(); UnitTestCuMatrixAddToRows(); @@ -2678,8 +2729,9 @@ template void CudaMatrixUnitTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -2689,7 +2741,6 @@ int main() { kaldi::CudaMatrixUnitTest(); - #if HAVE_CUDA == 1 if (CuDevice::Instantiate().DoublePrecisionSupported()) { kaldi::CudaMatrixUnitTest(); @@ -2704,9 +2755,9 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } - SetVerboseLevel(4); #if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', + SetVerboseLevel(4); CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index f16b7f0bf52..cfa570233c3 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc @@ -721,6 +721,31 @@ void CuMatrixBase::Max(const CuMatrixBase& A) { } +template +void CuMatrixBase::Min(const CuMatrixBase& A) { + #if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + Timer tim; + + KALDI_ASSERT(num_cols_ == A.NumCols()); + KALDI_ASSERT(num_rows_ == A.NumRows()); + + dim3 dimGrid, dimBlock; + GetBlockSizesForSimpleMatrixOperation(NumRows(), NumCols(), + &dimGrid, &dimBlock); + + cuda_min(dimGrid, dimBlock, data_, A.data_, Dim(), A.Stride()); + CU_SAFE_CALL(cudaGetLastError()); + + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); + } else + #endif + { + Mat().Min(A.Mat()); + } +} + + template void CuMatrixBase::MulColsVec(const CuVectorBase &scale) { #if HAVE_CUDA == 1 @@ -942,7 +967,7 @@ void CuMatrixBase::AddMat(Real alpha, const CuMatrixBase& A, template void CuMatrixBase::AddMatBlocks(Real alpha, const CuMatrixBase &A, - MatrixTransposeType transA) { + MatrixTransposeType transA) { if (num_rows_ == 0 || num_cols_ == 0) return; int32 num_row_blocks, num_col_blocks; if (transA == kNoTrans) { @@ -961,8 +986,8 @@ void CuMatrixBase::AddMatBlocks(Real alpha, const CuMatrixBase &A, GetBlockSizesForSimpleMatrixOperation(NumRows(), NumCols(), &dimGrid, &dimBlock); cuda_add_mat_blocks(dimGrid, dimBlock, alpha, A.data_, num_row_blocks, - num_col_blocks, data_, Dim(), A.Stride(), - (transA == kTrans ? 1 : 0)); + num_col_blocks, data_, Dim(), A.Stride(), + (transA == kTrans ? 1 : 0)); CU_SAFE_CALL(cudaGetLastError()); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); @@ -980,7 +1005,7 @@ void CuMatrixBase::AddMatBlocks(Real alpha, const CuMatrixBase &A, for (int32 i = 0; i < num_row_blocks; i++) { for (int32 j = 0; j < num_col_blocks; j++) { Mat().AddMat(alpha, SubMatrix(A.Mat(), i * nr, nr, j * nc, nc), - transA); + transA); } } } @@ -1097,10 +1122,10 @@ void CuMatrixBase::AddMatMat( if (CuDevice::Instantiate().Enabled()) { Timer tim; CU_SAFE_CALL(cublas_gemm(GetCublasHandle(), - (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - m, n, k, alpha, B.data_, B.Stride(), - A.data_, A.Stride(), beta, data_, Stride())); + (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + m, n, k, alpha, B.data_, B.Stride(), + A.data_, A.Stride(), beta, data_, Stride())); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -1151,8 +1176,8 @@ void CuMatrixBase::SymAddMat2( cublasOperation_t trans = (transA == kTrans ? CUBLAS_OP_N : CUBLAS_OP_T); MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_); CU_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, trans, - num_rows_, A_other_dim, alpha, A.Data(), - A.Stride(), beta, this->data_, this->stride_)); + num_rows_, A_other_dim, alpha, A.Data(), A.Stride(), + beta, this->data_, this->stride_)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -1977,9 +2002,10 @@ double TraceMatMat(const CuMatrixBase &A, template void AddMatMatBatched(const Real alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const Real beta) { + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, const Real beta) { KALDI_ASSERT(A.size() == B.size() && B.size() == C.size()); int32 size = A.size(); @@ -2032,11 +2058,12 @@ void AddMatMatBatched(const Real alpha, std::vector* > &C, CU_SAFE_CALL(cudaMemcpy(device_abc_array, host_abc_array, 3*size*sizeof(Real*), cudaMemcpyHostToDevice)); CU_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(), - (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - m, n, k, alpha, device_b_array, B[0]->Stride(), - device_a_array, A[0]->Stride(), beta, - device_c_array, C[0]->Stride(), size)); + (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + m, n, k, alpha, device_b_array, + B[0]->Stride(), device_a_array, + A[0]->Stride(), beta, device_c_array, + C[0]->Stride(), size)); CuDevice::Instantiate().Free(device_abc_array); delete[] host_abc_array; @@ -2053,15 +2080,17 @@ void AddMatMatBatched(const Real alpha, std::vector* > &C, template void AddMatMatBatched(const float alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const float beta); + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, const float beta); template void AddMatMatBatched(const double alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const double beta); + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, const double beta); template void CuMatrixBase::CopyRowsFromVec(const CuVectorBase &v) { @@ -2131,6 +2160,43 @@ void CuMatrixBase::CopyRowsFromVec(const VectorBase &v) { } } +template +void CuMatrixBase::CopyColsFromVec(const CuVectorBase &rv) { +#if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + Timer tim; + if (rv.Dim() == num_rows_ * num_cols_) { + // treat rv as a matrix of the size (num_cols x num_rows_) + // and use transposed copy to fill *this + // see CuMatrixBase::CopyFromMat() for more detail of the impl + MatrixDim rv_dim = { num_cols_, num_rows_, num_rows_ }; + const int32 warpSize = 32; + dim3 dimBlock(warpSize, CU1DBLOCK / warpSize); + dim3 dimGrid(n_blocks(rv_dim.cols, warpSize), + n_blocks(rv_dim.rows, warpSize)); + cuda_copy_from_mat_trans(dimGrid, dimBlock, data_, rv.Data(), Dim(), + rv_dim); + CU_SAFE_CALL(cudaGetLastError()); + } else if (rv.Dim() == num_rows_) { + // use 2D block (8x32) and large enough grid to cover matrix *this + // dimBlock.x need to be at least warpSize for coalesced memory access. + const int32 warpSize = 32; + dim3 dimBlock(warpSize, CU1DBLOCK / warpSize); + dim3 dimGrid(n_blocks(num_cols_, dimBlock.x), + n_blocks(num_rows_, dimBlock.y)); + cuda_copy_cols_from_vec(dimGrid, dimBlock, Data(), Dim(), rv.Data()); + CU_SAFE_CALL(cudaGetLastError()); + } else { + KALDI_ERR<< "Wrong sized arguments"; + } + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); + } else +#endif + { + Mat().CopyColsFromVec(rv.Vec()); + } +} + template void CuMatrixBase::CopyColFromVec(const CuVectorBase &v, @@ -2797,7 +2863,7 @@ void CuMatrix::Transpose() { return; // Copy and swap for all cases. // No need for a separate kernel of squared matrix in-place transpose. - // It has the same posible peak performance as copy transpose, + // It has the same possible peak performance as copy_transpose, // if allocate/deallocate overhead can be ignored. CuMatrix tmp(*this, kTrans); this->Swap(&tmp); diff --git a/src/cudamatrix/cu-matrix.h b/src/cudamatrix/cu-matrix.h index 38a6c25071b..0a4c4b0669e 100644 --- a/src/cudamatrix/cu-matrix.h +++ b/src/cudamatrix/cu-matrix.h @@ -51,9 +51,11 @@ Real TraceMatMat(const CuMatrixBase &A, const CuMatrixBase &B, /// C[i] = alpha * A[i](^T)*B[i](^T) + beta * C[i]. template void AddMatMatBatched(const Real alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const Real beta); + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, + const Real beta); /** * Matrix for CUDA computing. @@ -94,7 +96,6 @@ class CuMatrixBase { /// Copies column r from column indexes[r] of src. /// As a special case, if indexes[i] == -1, sets column i to zero /// indexes.size() must equal this->NumCols(), - /// all elements of "reorder" must be in [-1, src.NumCols()-1], /// and src.NumRows() must equal this.NumRows() void CopyCols(const CuMatrixBase &src, const CuArray &indexes); @@ -103,14 +104,12 @@ class CuMatrixBase { /// Add column indices[r] of src to column r. /// As a special case, if indexes[i] == -1, skip column i /// indices.size() must equal this->NumCols(), - /// all elements of "reorder" must be in [-1, src.NumCols()-1], /// and src.NumRows() must equal this.NumRows() void AddCols(const CuMatrixBase &src, const CuArray &indices); /// Copies row r from row indexes[r] of src. - /// As a special case, if indexes[i] < 0, sets row i to zero - /// "reorder".size() must equal this->NumRows(), and + /// As a special case, if indexes[i] < 0, sets row i to zero. /// src.NumCols() must equal this.NumCols() void CopyRows(const CuMatrixBase &src, const CuArray &indexes); @@ -134,9 +133,7 @@ class CuMatrixBase { /// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]). /// If indexes[r] < 0, does not add anything. - /// "reorder".size() must equal this->NumRows(), - /// all elements of "reorder" must be in [0, src.NumRows()-1], - /// and src.NumCols() must equal this.NumCols() + /// src.NumCols() must equal this.NumCols() void AddRows(Real alpha, const CuMatrixBase &src, const CuArray &indexes); @@ -182,10 +179,13 @@ class CuMatrixBase { const CuSparseMatrix &B, MatrixTransposeType trans); - friend void AddMatMatBatched(const Real alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const Real beta); + friend void AddMatMatBatched(const Real alpha, + std::vector* > &C, + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, + const Real beta); /// Adds "value" to the diagonal elements of the matrix. The matrix /// *this does not have to be square. @@ -247,6 +247,11 @@ class CuMatrixBase { /// Version of CopyRowsFromVec() that takes a CPU-based vector. void CopyRowsFromVec(const VectorBase &v); + /// Copies vector into matrix, column-by-column. + /// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows(); + /// this has two modes of operation. + void CopyColsFromVec(const CuVectorBase &v); + /// Copy vector into specific column of matrix. void CopyColFromVec(const CuVectorBase &v, const MatrixIndexT col); @@ -400,6 +405,8 @@ class CuMatrixBase { void DivElements(const CuMatrixBase &A); /// Do, elementwise, *this = max(*this, A). void Max(const CuMatrixBase &A); + /// Do, elementwise, *this = min(*this, A). + void Min(const CuMatrixBase &A); /// scale i'th column by scale[i] void MulColsVec(const CuVectorBase &scale); /// scale i'th row by scale[i] diff --git a/src/cudamatrix/cu-rand-speed-test.cc b/src/cudamatrix/cu-rand-speed-test.cc index cf07301cb63..3c33b780a12 100644 --- a/src/cudamatrix/cu-rand-speed-test.cc +++ b/src/cudamatrix/cu-rand-speed-test.cc @@ -56,64 +56,166 @@ std::string MeanVariance(const CuMatrixBase& m) { return std::string("mean ") + ToString(mean) + ", std-dev " + ToString(std::sqrt(var)); } +template +std::string MeanVariance(const CuVectorBase& v) { + std::ostringstream os; + Real mean = v.Sum() / v.Dim(); + CuVector tmp(v); + tmp.Add(-mean); + tmp.ApplyPow(2.0); + Real var = tmp.Sum() / tmp.Dim(); + return std::string("mean ") + ToString(mean) + ", std-dev " + ToString(std::sqrt(var)); +} + + template -void CuRandUniformMatrixSpeedTest() { +void CuRandUniformMatrixSpeedTest(const int32 iter) { Timer t; CuRand rand; - CuMatrix m(249,2011); - for (int32 i = 0; i < 200; i++) { + CuMatrix m(249,1001, kUndefined); + for (int32 i = 0; i < iter; i++) { rand.RandUniform(&m); } - KALDI_LOG << __func__ << NameOf() << " t = " << t.Elapsed() << "s, " << MeanVariance(m); + CuMatrix m2(256,1024, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(&m2); + } + // flops = number of generated random numbers per second, + Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(m) << ")"; } template -void CuRandGaussianMatrixSpeedTest() { +void CuRandUniformMatrixBaseSpeedTest(const int32 iter) { Timer t; CuRand rand; - CuMatrix m(249,2011); - for (int32 i = 0; i < 200; i++) { + CuMatrix m(249,1001, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(dynamic_cast*>(&m)); + } + CuMatrix m2(256,1024, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(dynamic_cast*>(&m2)); + } + // flops = number of generated random numbers per second, + Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(m) << ")"; +} + +template +void CuRandGaussianMatrixSpeedTest(const int32 iter) { + Timer t; + CuRand rand; + CuMatrix m(249,1001, kUndefined); + for (int32 i = 0; i < iter; i++) { rand.RandGaussian(&m); } - KALDI_LOG << __func__ << NameOf() << " t = " << t.Elapsed() << "s, " << MeanVariance(m); + CuMatrix m2(256,1024, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandGaussian(&m2); + } + // flops = number of generated random numbers per second, + Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(m) << ")"; } template -void CuRandGaussianVectorSpeedTest() { +void CuRandGaussianMatrixBaseSpeedTest(const int32 iter) { Timer t; CuRand rand; - CuVector v(2011); - for (int32 i = 0; i < 200; i++) { + CuMatrix m(249,1001, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandGaussian(dynamic_cast*>(&m)); + } + CuMatrix m2(256,1024, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandGaussian(dynamic_cast*>(&m2)); + } + // flops = number of generated random numbers per second, + Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(m) << ")"; +} + +template +void CuRandUniformVectorSpeedTest(const int32 iter) { + Timer t; + CuRand rand; + CuVector v(2011, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(&v); + } + CuVector v2(2048, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(&v2); + } + // flops = number of generated random numbers per second, + Real flops = iter * (v.Dim() + v2.Dim()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(v) << ")"; +} + +template +void CuRandGaussianVectorSpeedTest(const int32 iter) { + Timer t; + CuRand rand; + CuVector v(2011, kUndefined); + for (int32 i = 0; i < iter; i++) { rand.RandGaussian(&v); } - KALDI_LOG << __func__ << NameOf() << " t = " << t.Elapsed() << "s"; + CuVector v2(2048, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandGaussian(&v2); + } + // flops = number of generated random numbers per second, + Real flops = iter * (v.Dim() + v2.Dim()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(v) << ")"; } } // namespace kaldi int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 iter = 10; // Be quick on CPU, #if HAVE_CUDA == 1 + for (int32 loop = 0; loop < 2; loop++) { // NO for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().SetDebugStrideMode(true); - if (loop == 0) + if ( loop == 0) CuDevice::Instantiate().SelectGpuId("no"); - else + else { CuDevice::Instantiate().SelectGpuId("yes"); + iter = 400; // GPUs are faster, + } #endif - kaldi::CuRandUniformMatrixSpeedTest(); - kaldi::CuRandGaussianMatrixSpeedTest(); - kaldi::CuRandGaussianVectorSpeedTest(); + Timer t; + kaldi::CuRandUniformMatrixSpeedTest(iter); + kaldi::CuRandUniformMatrixBaseSpeedTest(iter); + kaldi::CuRandUniformVectorSpeedTest(iter); + kaldi::CuRandGaussianMatrixSpeedTest(iter); + kaldi::CuRandGaussianMatrixBaseSpeedTest(iter); + kaldi::CuRandGaussianVectorSpeedTest(iter); fprintf(stderr, "---\n"); - kaldi::CuRandUniformMatrixSpeedTest(); - kaldi::CuRandGaussianMatrixSpeedTest(); - kaldi::CuRandGaussianVectorSpeedTest(); - fprintf(stderr, "\n"); - } - + kaldi::CuRandUniformMatrixSpeedTest(iter); + kaldi::CuRandUniformMatrixBaseSpeedTest(iter); + kaldi::CuRandUniformVectorSpeedTest(iter); + kaldi::CuRandGaussianMatrixSpeedTest(iter); + kaldi::CuRandGaussianMatrixBaseSpeedTest(iter); + kaldi::CuRandGaussianVectorSpeedTest(iter); + fprintf(stderr, "--- ELAPSED %fs.\n\n", t.Elapsed()); #if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().PrintProfile(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-rand.cc b/src/cudamatrix/cu-rand.cc index 6506896b10b..82a9e5b7057 100644 --- a/src/cudamatrix/cu-rand.cc +++ b/src/cudamatrix/cu-rand.cc @@ -1,6 +1,6 @@ // cudamatrix/cu-rand.cc -// Copyright 2016 Brno University of Technology (author Karel Vesely) +// Copyright 2016-2017 Brno University of Technology (author Karel Vesely) // See ../../COPYING for clarification regarding multiple authors // @@ -21,18 +21,50 @@ namespace kaldi { +#if HAVE_CUDA == 1 +/// Wrappers of curand functions to interface both float and double as 1 function, + +/// Wrapper of curandGenerateUniform(), curandGenerateUniformDouble(), +template +curandStatus_t curandGenerateUniformWrap(curandGenerator_t gen, Real *ptr, size_t num); +// +template<> +curandStatus_t curandGenerateUniformWrap(curandGenerator_t gen, float *ptr, size_t num) { + return curandGenerateUniform(gen, ptr, num); +} template<> -void CuRand::RandUniform(CuMatrixBase *tgt) { +curandStatus_t curandGenerateUniformWrap(curandGenerator_t gen, double *ptr, size_t num) { + return curandGenerateUniformDouble(gen, ptr, num); +} + +/// Wrapper of curandGenerateNormal(), curandGenerateNormalDouble(), +template +curandStatus_t curandGenerateNormalWrap( + curandGenerator_t gen, Real *ptr, size_t num); +// +template<> +curandStatus_t curandGenerateNormalWrap( + curandGenerator_t gen, float *ptr, size_t num) { + return curandGenerateNormal(gen, ptr, num, 0.0 /*mean*/, 1.0 /*stddev*/); +} +template<> +curandStatus_t curandGenerateNormalWrap( + curandGenerator_t gen, double *ptr, size_t num) { + return curandGenerateNormalDouble(gen, ptr, num, 0.0 /*mean*/, 1.0 /*stddev*/); +} +/// End of wrappers. +#endif + + +template +void CuRand::RandUniform(CuMatrixBase *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix, // so we should not use it to generate random numbers over whole stride. - CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); - // We need even number of `elements', or it crahes! - // (possibly touching 1 element after array, into the padding of memory alignment), - size_t tmp_elems_even = (1 + (tmp.NumRows()*tmp.Stride() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateUniform(gen_, tmp.Data(), tmp_elems_even)); + CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); + CU_SAFE_CALL(curandGenerateUniformWrap(gen_, tmp.Data(), tmp.NumRows() * tmp.Stride())); tgt->CopyFromMat(tmp); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -42,19 +74,13 @@ void CuRand::RandUniform(CuMatrixBase *tgt) { } } -template<> -void CuRand::RandUniform(CuMatrixBase *tgt) { +template +void CuRand::RandUniform(CuMatrix *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix, - // so we should not use it to generate random numbers over whole stride. - CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); - // We need even number of `elements', or it crahes! - // (possibly touching 1 element after array, into the padding of memory alignment), - size_t tmp_elems_even = (1 + (tmp.NumRows()*tmp.Stride() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateUniformDouble(gen_, tmp.Data(), tmp_elems_even)); - tgt->CopyFromMat(tmp); + // Here we don't need to use 'tmp' matrix, + CU_SAFE_CALL(curandGenerateUniformWrap(gen_, tgt->Data(), tgt->NumRows() * tgt->Stride())); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -63,40 +89,34 @@ void CuRand::RandUniform(CuMatrixBase *tgt) { } } -template<> -void CuRand::RandGaussian(CuMatrixBase *tgt) { +template +void CuRand::RandUniform(CuVectorBase *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix, - // so we should not use it to generate random numbers over whole stride. - CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); - // We need even number of `elements', or it crahes! - // (possibly touching 1 element after array, into the padding of memory alignment), - size_t tmp_elems_even = (1 + (tmp.NumRows()*tmp.Stride() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateNormal(gen_, tmp.Data(), tmp_elems_even, 0.0, 1.0)); - tgt->CopyFromMat(tmp); + CU_SAFE_CALL(curandGenerateUniformWrap(gen_, tgt->Data(), tgt->Dim())); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif { - tgt->Mat().SetRandn(); + tgt->Vec().SetRandUniform(); } } -template<> -void CuRand::RandGaussian(CuMatrixBase *tgt) { +template +void CuRand::RandGaussian(CuMatrixBase *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix, // so we should not use it to generate random numbers over whole stride. - CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); - // We need even number of `elements', or it crahes! - // (possibly touching 1 element after array, into the padding of memory alignment), - size_t tmp_elems_even = (1 + (tmp.NumRows()*tmp.Stride() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateNormalDouble(gen_, tmp.Data(), tmp_elems_even, 0.0, 1.0)); - tgt->CopyFromMat(tmp); + // Also, we ensure to have 'even' number of elements for calling 'curand' + // by possibly adding one column. Even number of elements is required by + // curandGenerateUniform(), curandGenerateUniformDouble(). + MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1, + CuMatrix tmp(tgt->NumRows(), num_cols_even, kUndefined); + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.NumRows()*tmp.Stride())); + tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols())); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -105,28 +125,47 @@ void CuRand::RandGaussian(CuMatrixBase *tgt) { } } -template<> -void CuRand::RandGaussian(CuVectorBase *tgt) { +template +void CuRand::RandGaussian(CuMatrix *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - MatrixIndexT dim_even = (1 + (tgt->Dim() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateNormal(gen_, tgt->Data(), dim_even, 0.0, 1.0)); + // Here we don't need to use 'tmp' matrix, if the number of elements is even, + MatrixIndexT num_elements = tgt->NumRows() * tgt->Stride(); + if (0 == (num_elements % 2)) { + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tgt->Data(), num_elements)); + } else { + // We use 'tmp' matrix with one column added, this guarantees 'even' number of elements. + MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1, + CuMatrix tmp(tgt->NumRows(), num_cols_even, kUndefined); + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.NumRows()*tmp.Stride())); + tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols())); + } CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif { - tgt->Vec().SetRandn(); + tgt->Mat().SetRandn(); } } -template<> -void CuRand::RandGaussian(CuVectorBase *tgt) { +template +void CuRand::RandGaussian(CuVectorBase *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - MatrixIndexT dim_even = (1 + (tgt->Dim() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateNormalDouble(gen_, tgt->Data(), dim_even, 0.0, 1.0)); + // To ensure 'even' number of elements, we use 'tmp' vector of even length. + // Even number of elements is required by 'curand' functions: + // curandGenerateUniform(), curandGenerateUniformDouble(). + MatrixIndexT num_elements = tgt->Dim(); + if (0 == (num_elements % 2)) { + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tgt->Data(), tgt->Dim())); + } else { + MatrixIndexT dim_even = tgt->Dim() + (tgt->Dim() % 2); // + 0 or 1, + CuVector tmp(dim_even, kUndefined); + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.Dim())); + tgt->CopyFromVec(tmp.Range(0,tgt->Dim())); + } CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif diff --git a/src/cudamatrix/cu-rand.h b/src/cudamatrix/cu-rand.h index 60587391edd..2c8204b6b5f 100644 --- a/src/cudamatrix/cu-rand.h +++ b/src/cudamatrix/cu-rand.h @@ -68,8 +68,11 @@ class CuRand { /// Fill with uniform [0..1] floats, void RandUniform(CuMatrixBase *tgt); + void RandUniform(CuMatrix *tgt); + void RandUniform(CuVectorBase *tgt); /// Fill with Normal random numbers, void RandGaussian(CuMatrixBase *tgt); + void RandGaussian(CuMatrix *tgt); void RandGaussian(CuVectorBase *tgt); /// align probabilities to discrete 0/1 states (use uniform sampling), diff --git a/src/cudamatrix/cu-sp-matrix-speed-test.cc b/src/cudamatrix/cu-sp-matrix-speed-test.cc index 455bf58608f..ded4baed49b 100644 --- a/src/cudamatrix/cu-sp-matrix-speed-test.cc +++ b/src/cudamatrix/cu-sp-matrix-speed-test.cc @@ -146,5 +146,5 @@ int main() { #if HAVE_CUDA == 1 CuDevice::Instantiate().PrintProfile(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-sp-matrix-test.cc b/src/cudamatrix/cu-sp-matrix-test.cc index 3e3991afc81..c0f1119acea 100644 --- a/src/cudamatrix/cu-sp-matrix-test.cc +++ b/src/cudamatrix/cu-sp-matrix-test.cc @@ -363,9 +363,9 @@ template void CudaSpMatrixUnitTest() { int main() { using namespace kaldi; - - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -394,8 +394,8 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-sparse-matrix-test.cc b/src/cudamatrix/cu-sparse-matrix-test.cc index 8f885815c72..6514ddbfa87 100644 --- a/src/cudamatrix/cu-sparse-matrix-test.cc +++ b/src/cudamatrix/cu-sparse-matrix-test.cc @@ -25,6 +25,8 @@ #include "util/common-utils.h" #include "cudamatrix/cu-matrix-lib.h" +using namespace kaldi; + namespace kaldi { template @@ -185,19 +187,20 @@ void CudaSparseMatrixUnitTest() { int main() { - for (kaldi::int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 - kaldi::CuDevice::Instantiate().SetDebugStrideMode(true); + for (; loop < 2; loop++) { + CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) - kaldi::CuDevice::Instantiate().SelectGpuId("no"); + CuDevice::Instantiate().SelectGpuId("no"); else - kaldi::CuDevice::Instantiate().SelectGpuId("yes"); + CuDevice::Instantiate().SelectGpuId("yes"); #endif kaldi::CudaSparseMatrixUnitTest(); #if HAVE_CUDA == 1 - if (kaldi::CuDevice::Instantiate().DoublePrecisionSupported()) { + if (CuDevice::Instantiate().DoublePrecisionSupported()) { kaldi::CudaSparseMatrixUnitTest(); } else { KALDI_WARN << "Double precision not supported"; @@ -210,10 +213,10 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } - kaldi::SetVerboseLevel(4); + SetVerboseLevel(4); #if HAVE_CUDA == 1 - kaldi::CuDevice::Instantiate().PrintProfile(); + } + CuDevice::Instantiate().PrintProfile(); #endif return 0; } diff --git a/src/cudamatrix/cu-sparse-matrix.h b/src/cudamatrix/cu-sparse-matrix.h index 1298ee5ea5f..4da74871bac 100644 --- a/src/cudamatrix/cu-sparse-matrix.h +++ b/src/cudamatrix/cu-sparse-matrix.h @@ -121,10 +121,6 @@ class CuSparseMatrix { ~CuSparseMatrix() { } - // Use the CuMatrix::CopyFromSmat() function to copy from this to - // CuMatrix. - // Also see CuMatrix::AddSmat(). - protected: // The following two functions should only be called if we did not compile // with CUDA or could not get a CUDA card; in that case the contents are diff --git a/src/cudamatrix/cu-test.cc b/src/cudamatrix/cu-test.cc index c27e2b64691..66b62f097c9 100644 --- a/src/cudamatrix/cu-test.cc +++ b/src/cudamatrix/cu-test.cc @@ -575,9 +575,8 @@ static void CuMatrixUnitTest() { int main() { using namespace kaldi; - - for (int32 loop = 0; loop < 2; loop++) { #if HAVE_CUDA == 1 + for (int32 loop = 0; loop < 2; loop++) { if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); else @@ -593,9 +592,8 @@ int main() { { kaldi::CuMatrixUnitTest(); } - } - #if HAVE_CUDA == 1 + } kaldi::CuDevice::Instantiate().PrintProfile(); #endif diff --git a/src/cudamatrix/cu-tp-matrix-test.cc b/src/cudamatrix/cu-tp-matrix-test.cc index 675cd19a56c..f5018aef6b7 100644 --- a/src/cudamatrix/cu-tp-matrix-test.cc +++ b/src/cudamatrix/cu-tp-matrix-test.cc @@ -187,9 +187,9 @@ template void CudaTpMatrixUnitTest() { int main() { using namespace kaldi; - - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -211,8 +211,8 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-vector-speed-test.cc b/src/cudamatrix/cu-vector-speed-test.cc index 81f6f2bf14d..cf3f126937f 100644 --- a/src/cudamatrix/cu-vector-speed-test.cc +++ b/src/cudamatrix/cu-vector-speed-test.cc @@ -319,6 +319,6 @@ int main() { #else kaldi::CudaVectorSpeedTest(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-vector-test.cc b/src/cudamatrix/cu-vector-test.cc index a17a7baa930..6537bab70c6 100644 --- a/src/cudamatrix/cu-vector-test.cc +++ b/src/cudamatrix/cu-vector-test.cc @@ -755,9 +755,10 @@ int main(int argc, char *argv[]) { po.PrintUsage(); exit(1); } - - for (int32 loop = 0; loop < 2; loop++) { + + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -765,7 +766,6 @@ int main(int argc, char *argv[]) { CuDevice::Instantiate().SelectGpuId(use_gpu); #endif - kaldi::CuVectorUnitTest(); #if HAVE_CUDA == 1 if (CuDevice::Instantiate().DoublePrecisionSupported()) { @@ -781,8 +781,8 @@ int main(int argc, char *argv[]) { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc index c91a49ca2e4..b825b9c0a6e 100644 --- a/src/cudamatrix/cu-vector.cc +++ b/src/cudamatrix/cu-vector.cc @@ -50,7 +50,7 @@ Real VecVec(const CuVectorBase &a, if (CuDevice::Instantiate().Enabled()) { Timer tim; CU_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(), - 1, &result)); + 1, &result)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -255,6 +255,14 @@ void CuVectorBase::SetRandn() { tmp.RandGaussian(this); } +template +void CuVectorBase::SetRandUniform() { + if (dim_ == 0) return; + CuRand tmp; + tmp.RandUniform(this); +} + + template Real CuVectorBase::Sum() const { @@ -444,9 +452,9 @@ void CuVectorBase::AddMatVec(const Real alpha, // Everything is backwards in CuBlas. We need to reverse rows, columns, // transpose-ness. CU_SAFE_CALL(cublas_gemv(GetCublasHandle(), - (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), - M.NumCols(), M.NumRows(), alpha, M.Data(), - M.Stride(), v.Data(), 1, beta, data_, 1)); + (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), + M.NumCols(), M.NumRows(), alpha, M.Data(), + M.Stride(), v.Data(), 1, beta, data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -471,7 +479,7 @@ void CuVectorBase::AddSpVec(const Real alpha, // Note: in our opinion the CuSpMatrix represents a lower-triangular matrix, but // in CUBLAS, for some stupid reason, everything is reversed. CU_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(), - alpha, M.Data(), v.Data(), 1, beta, data_, 1)); + alpha, M.Data(), v.Data(), 1, beta, data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -636,7 +644,7 @@ void CuVectorBase::MulTp(const CuTpMatrix &M, const MatrixTransposeT if (dim_ == 0) return; Timer tim; cublas_tpmv(GetCublasHandle(), (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), - M.NumRows(), M.Data(), data_, 1); + M.NumRows(), M.Data(), data_, 1); CuDevice::Instantiate().AccuProfile("CuVectorBase::MulTp", tim.Elapsed()); } else #endif @@ -1081,7 +1089,7 @@ void CuVectorBase::CopyDiagFromMat(const CuMatrix &M) { KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols())); Timer tim; CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1, - data_, 1)); + data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -1165,19 +1173,25 @@ void CuVectorBase::AddRowSumMat(Real alpha, const CuMatrixBase &mat, } - template -void CuVectorBase::AddColSumMat(Real alpha, - const CuMatrixBase &mat, +void CuVectorBase::AddColSumMat(Real alpha, const CuMatrixBase &mat, Real beta) { - KALDI_ASSERT(mat.NumRows() == Dim()); - - CuVector ones(mat.NumCols()); - ones.Set(1.0); - this->AddMatVec(alpha, mat, kNoTrans, ones, beta); -} +#if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + Timer tim; + KALDI_ASSERT(mat.NumRows() == Dim()); + cuda_add_col_sum_mat(mat.NumRows(), CU1DBLOCK, Data(), mat.Data(), + mat.Dim(), alpha, beta); + CU_SAFE_CALL(cudaGetLastError()); + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); + } else +#endif + { + Vec().AddColSumMat(alpha, mat.Mat(), beta); + } +} template void CuVectorBase::InvertElements() { diff --git a/src/cudamatrix/cu-vector.h b/src/cudamatrix/cu-vector.h index cff5270e6cf..53641556669 100644 --- a/src/cudamatrix/cu-vector.h +++ b/src/cudamatrix/cu-vector.h @@ -125,7 +125,9 @@ class CuVectorBase { MatrixIndexT ApplyCeiling(Real ceiling_val); void ApplyPow(Real power); Real Sum() const; + void SetRandn(); + void SetRandUniform(); CuSubVector Range(const MatrixIndexT o, const MatrixIndexT l) { return CuSubVector(*this, o, l); diff --git a/src/cudamatrix/cublas-wrappers.h b/src/cudamatrix/cublas-wrappers.h index 69a591240a5..b8ea7c8b2c6 100644 --- a/src/cudamatrix/cublas-wrappers.h +++ b/src/cudamatrix/cublas-wrappers.h @@ -25,79 +25,89 @@ namespace kaldi { #if HAVE_CUDA == 1 -inline cublasStatus_t cublas_gemm(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n,int k, float alpha, - const float *A, int lda, const float *B, int ldb, float beta, - float *C, int ldc) { +inline cublasStatus_t cublas_gemm( + cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n,int k, float alpha, + const float *A, int lda, const float *B, int ldb, float beta, + float *C, int ldc) { return cublasSgemm_v2(handle,transa,transb,m,n,k,&alpha,A,lda,B,ldb,&beta,C,ldc); } -inline cublasStatus_t cublas_gemm(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n,int k, double alpha, - const double *A, int lda, const double *B, int ldb, double beta, - double *C, int ldc) { +inline cublasStatus_t cublas_gemm( + cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n,int k, double alpha, + const double *A, int lda, const double *B, int ldb, double beta, + double *C, int ldc) { return cublasDgemm_v2(handle,transa,transb,m,n,k,&alpha,A,lda,B,ldb,&beta,C,ldc); } -inline cublasStatus_t cublas_ger(cublasHandle_t handle, int m, int n, float alpha, - const float *x, int incx, const float *y, int incy, float *A, int lda ) { +inline cublasStatus_t cublas_ger( + cublasHandle_t handle, int m, int n, float alpha, + const float *x, int incx, const float *y, int incy, float *A, int lda ) { return cublasSger_v2(handle,m,n,&alpha,x,incx,y,incy,A,lda); } inline cublasStatus_t cublas_ger(cublasHandle_t handle, int m, int n, double alpha, const double *x, int incx, const double *y, int incy, double *A, int lda ) { return cublasDger_v2(handle,m,n,&alpha,x,incx,y,incy,A,lda); } -inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, float alpha, - const float *A[], int lda, const float *B[], int ldb, float beta, - float *C[], int ldc, int batchCount) { +inline cublasStatus_t cublas_gemmBatched( + cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n, int k, float alpha, + const float *A[], int lda, const float *B[], int ldb, float beta, + float *C[], int ldc, int batchCount) { return cublasSgemmBatched(handle, transa, transb, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, batchCount); } -inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, double alpha, - const double *A[], int lda, const double *B[], int ldb, double beta, - double *C[], int ldc, int batchCount) { +inline cublasStatus_t cublas_gemmBatched( + cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n, int k, double alpha, + const double *A[], int lda, const double *B[], int ldb, double beta, + double *C[], int ldc, int batchCount) { return cublasDgemmBatched(handle, transa, transb, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, batchCount); } -inline cublasStatus_t cublas_trsm(cublasHandle_t handle, int m, int n, float alpha, - const float* A, int lda, float* B, int ldb) { +inline cublasStatus_t cublas_trsm(cublasHandle_t handle, int m, int n, + float alpha, const float* A, int lda, + float* B, int ldb) { return cublasStrsm_v2(handle,CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_UPPER,CUBLAS_OP_N,CUBLAS_DIAG_NON_UNIT,m,n,&alpha,A,lda,B,ldb); } -inline cublasStatus_t cublas_trsm(cublasHandle_t handle, int m, int n, double alpha, - const double* A, int lda, double* B, int ldb) { +inline cublasStatus_t cublas_trsm(cublasHandle_t handle, int m, int n, + double alpha, const double* A, int lda, + double* B, int ldb) { return cublasDtrsm_v2(handle,CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_UPPER,CUBLAS_OP_N,CUBLAS_DIAG_NON_UNIT,m,n,&alpha,A,lda,B,ldb); } -inline cublasStatus_t cublas_syrk(cublasHandle_t handle, cublasFillMode_t uplo, - cublasOperation_t trans, int n, int k, float alpha, - const float *A, int lda, float beta, float *C, int ldc) { +inline cublasStatus_t cublas_syrk( + cublasHandle_t handle, cublasFillMode_t uplo, + cublasOperation_t trans, int n, int k, float alpha, + const float *A, int lda, float beta, float *C, int ldc) { return cublasSsyrk_v2(handle,uplo,trans,n,k,&alpha,A,lda,&beta,C,ldc); } -inline cublasStatus_t cublas_syrk(cublasHandle_t handle, cublasFillMode_t uplo, - cublasOperation_t trans, int n, int k, double alpha, - const double *A, int lda, double beta, double *C, int ldc) { +inline cublasStatus_t cublas_syrk( + cublasHandle_t handle, cublasFillMode_t uplo, + cublasOperation_t trans, int n, int k, double alpha, + const double *A, int lda, double beta, double *C, int ldc) { return cublasDsyrk_v2(handle,uplo,trans,n,k,&alpha,A,lda,&beta,C,ldc); } inline cublasStatus_t cublas_dot(cublasHandle_t handle, int n, const float *x, - int incx, const float *y, int incy, float *result) { + int incx, const float *y, int incy, + float *result) { return cublasSdot_v2(handle, n, x, incx, y, incy, result); } inline cublasStatus_t cublas_dot(cublasHandle_t handle, int n, const double *x, - int incx, const double *y, int incy, double *result) { + int incx, const double *y, int incy, + double *result) { return cublasDdot_v2(handle, n, x, incx, y, incy, result); } inline cublasStatus_t cublas_asum(cublasHandle_t handle, int n, const float* x, - int incx, float *result) { + int incx, float *result) { return cublasSasum_v2(handle, n, x, incx, result); } inline cublasStatus_t cublas_asum(cublasHandle_t handle, int n, const double* x, - int incx, double *result) { + int incx, double *result) { return cublasDasum_v2(handle, n, x, incx, result); } inline cublasStatus_t cublas_nrm2(cublasHandle_t handle, int n, const float* x, - int incx, float *result) { + int incx, float *result) { return cublasSnrm2_v2(handle, n, x, incx, result); - } inline cublasStatus_t cublas_nrm2(cublasHandle_t handle, int n, const double* x, - int incx, double *result) { + int incx, double *result) { return cublasDnrm2_v2(handle, n, x, incx, result); } inline cudaError_t cublas_copy(cublasHandle_t handle, int n, const float* x, @@ -115,49 +125,53 @@ inline cudaError_t cublas_copy(cublasHandle_t handle, int n, const double* x, return cudaGetLastError(); } inline cublasStatus_t cublas_copy(cublasHandle_t handle, int n, const float* x, - int incx, float* y, int incy) { + int incx, float* y, int incy) { return cublasScopy_v2(handle,n,x,incx,y,incy); } inline cublasStatus_t cublas_copy(cublasHandle_t handle, int n, const double* x, - int incx, double* y, int incy) { + int incx, double* y, int incy) { return cublasDcopy_v2(handle,n,x,incx,y,incy); } inline cublasStatus_t cublas_scal(cublasHandle_t handle, int n, float alpha, - float* mat, int incx) { + float* mat, int incx) { return cublasSscal_v2(handle, n, &alpha, mat, incx); } inline cublasStatus_t cublas_scal(cublasHandle_t handle, int n, double alpha, - double* mat, int incx) { + double* mat, int incx) { return cublasDscal_v2(handle, n, &alpha, mat, incx); } inline cublasStatus_t cublas_axpy(cublasHandle_t handle, int n, float alpha, - const float* x, int incx, float* y, int incy) { + const float* x, int incx, float* y, int incy) { return cublasSaxpy_v2(handle, n, &alpha, x, incx, y, incy); } inline cublasStatus_t cublas_axpy(cublasHandle_t handle, int n, double alpha, - const double* x, int incx, double* y, int incy) { + const double* x, int incx, double* y, int incy) { return cublasDaxpy_v2(handle, n, &alpha, x, incx, y, incy); } -inline cublasStatus_t cublas_gemv(cublasHandle_t handle, cublasOperation_t trans, - int m, int n, float alpha, const float* A, int lda, const float* x, - int incx, float beta, float* y, int incy) { +inline cublasStatus_t cublas_gemv( + cublasHandle_t handle, cublasOperation_t trans, + int m, int n, float alpha, const float* A, int lda, const float* x, + int incx, float beta, float* y, int incy) { return cublasSgemv_v2(handle,trans,m,n,&alpha,A,lda,x,incx,&beta,y,incy); } -inline cublasStatus_t cublas_gemv(cublasHandle_t handle, cublasOperation_t trans, - int m, int n, double alpha, const double* A, int lda, const double* x, - int incx, double beta, double* y, int incy) { +inline cublasStatus_t cublas_gemv( + cublasHandle_t handle, cublasOperation_t trans, + int m, int n, double alpha, const double* A, int lda, const double* x, + int incx, double beta, double* y, int incy) { return cublasDgemv_v2(handle,trans,m,n,&alpha,A,lda,x,incx,&beta,y,incy); } -inline cublasStatus_t cublas_spmv(cublasHandle_t handle, cublasFillMode_t uplo, - int n, float alpha, const float *AP, const float *x, int incx, - float beta, float *y, int incy) { +inline cublasStatus_t cublas_spmv( + cublasHandle_t handle, cublasFillMode_t uplo, + int n, float alpha, const float *AP, const float *x, int incx, + float beta, float *y, int incy) { return cublasSspmv_v2(handle, uplo, n, &alpha, AP, x, incx, &beta, y, incy); } -inline cublasStatus_t cublas_spmv(cublasHandle_t handle, cublasFillMode_t uplo, - int n, double alpha, const double *AP, const double *x, int incx, - double beta, double *y, int incy) { +inline cublasStatus_t cublas_spmv( + cublasHandle_t handle, cublasFillMode_t uplo, + int n, double alpha, const double *AP, const double *x, int incx, + double beta, double *y, int incy) { return cublasDspmv_v2(handle, uplo, n, &alpha, AP, x, incx, &beta, y, incy); } @@ -167,20 +181,22 @@ inline cublasStatus_t cublas_spmv(cublasHandle_t handle, cublasFillMode_t uplo, // row-by-row, but CUDA views the same layout as upper-triangular, // column-by-column. inline cublasStatus_t cublas_tpmv(cublasHandle_t handle, cublasOperation_t trans, - int n, const float* Ap, float* x, int incx) { + int n, const float* Ap, float* x, int incx) { return cublasStpmv_v2(handle, CUBLAS_FILL_MODE_UPPER, trans, CUBLAS_DIAG_NON_UNIT, n, Ap, x, incx); } inline cublasStatus_t cublas_tpmv(cublasHandle_t handle, cublasOperation_t trans, - int n, const double* Ap, double* x,int incx) { + int n, const double* Ap, double* x,int incx) { return cublasDtpmv_v2(handle, CUBLAS_FILL_MODE_UPPER, trans, CUBLAS_DIAG_NON_UNIT, n, Ap, x, incx); } inline cublasStatus_t cublas_spr(cublasHandle_t handle, cublasFillMode_t uplo, - int n, float alpha, const float *x, int incx, float *AP) { + int n, float alpha, const float *x, int incx, + float *AP) { return cublasSspr_v2(handle, uplo, n, &alpha, x, incx, AP); } inline cublasStatus_t cublas_spr(cublasHandle_t handle, cublasFillMode_t uplo, - int n, double alpha, const double *x, int incx, double *AP) { + int n, double alpha, const double *x, int incx, + double *AP) { return cublasDspr_v2(handle, uplo, n, &alpha, x, incx, AP); } diff --git a/src/decoder/Makefile b/src/decoder/Makefile index fe489d1cb3f..93db701cb7a 100644 --- a/src/decoder/Makefile +++ b/src/decoder/Makefile @@ -11,7 +11,7 @@ OBJFILES = training-graph-compiler.o lattice-simple-decoder.o lattice-faster-dec LIBNAME = kaldi-decoder -ADDLIBS = ../lat/kaldi-lat.a ../sgmm/kaldi-sgmm.a ../hmm/kaldi-hmm.a \ +ADDLIBS = ../lat/kaldi-lat.a ../hmm/kaldi-hmm.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ ../tree/kaldi-tree.a ../util/kaldi-util.a ../thread/kaldi-thread.a \ ../matrix/kaldi-matrix.a ../base/kaldi-base.a diff --git a/src/decoder/nbest-decoder.h b/src/decoder/nbest-decoder.h index 8db071d6591..daecc84e7b2 100644 --- a/src/decoder/nbest-decoder.h +++ b/src/decoder/nbest-decoder.h @@ -179,7 +179,7 @@ class NBestDecoder { continue; // skip that token } LatticeWeight path_w(lmscore, amscore); - CompactLatticeWeight path_weight(path_w, vector()); + CompactLatticeWeight path_weight(path_w, std::vector()); std::vector arcs_reverse; // reverse order output arcs // outer loop for word tokens @@ -230,8 +230,8 @@ class NBestDecoder { // ShortestPath(fst, &fst_one); // ConvertLattice(fst_one, fst_out, true); // return true; - // } - + // } + private: // TokenStore is a store of linked tokens with its own allocator @@ -388,7 +388,7 @@ class NBestDecoder { return tok2; } } - + inline bool CombineN(Elem *head, Token *new_tok) { // n-best version if (!new_tok) return false; Elem *e = head; @@ -435,7 +435,7 @@ class NBestDecoder { } inline Token* Advance(Token *source, Arc &arc, int32 frame, BaseFloat cutoff) { - // compute new weight + // compute new weight Weight w = Times(source->c, arc.weight); Weight amscore = Weight::One(); if (arc.ilabel > 0) { // emitting arc @@ -446,7 +446,7 @@ class NBestDecoder { if (w.Value() > cutoff) { // prune return NULL; } - // create new token + // create new token Token *tok; if (arc.olabel > 0) { // create new token // find or create corresponding Token @@ -593,10 +593,10 @@ class NBestDecoder { // KALDI_ASSERT(state == tok->arc_.nextstate); for (fst::ArcIterator > aiter(fst_, state); !aiter.Done(); aiter.Next()) { - // for all a in A(state) + // for all a in A(state) Arc arc = aiter.Value(); if (arc.ilabel != 0) { // propagate only emitting - Token *new_tok = + Token *new_tok = token_store_.Advance(tok, arc, frame, next_weight_cutoff); if (new_tok) { Elem *e_found = toks_.Find(arc.nextstate); @@ -637,7 +637,7 @@ class NBestDecoder { queue_.erase(queue_.begin()); Elem *elem = toks_.Find(state); // would segfault if state not // in toks_ but this can't happen. - + // we have to pop all tokens with the same state // this may create some unneccessary repetitions, since only the new token // needs to be forwarded, but I don't know yet how to solve this diff --git a/src/doc/Kaldi.pptx b/src/doc/Kaldi.pptx old mode 100755 new mode 100644 diff --git a/src/doc/KaldiMatrix.pptx b/src/doc/KaldiMatrix.pptx old mode 100755 new mode 100644 diff --git a/src/doc/KaldiModels.pptx b/src/doc/KaldiModels.pptx old mode 100755 new mode 100644 diff --git a/src/doc/KaldiScripts.pptx b/src/doc/KaldiScripts.pptx old mode 100755 new mode 100644 diff --git a/src/doc/README b/src/doc/README index 566f0d0bf64..27de5defc9f 100644 --- a/src/doc/README +++ b/src/doc/README @@ -1,3 +1,9 @@ +#!/bin/bash + +if [ $0 != "doc/README" ]; then + echo "$0: this should be run from one level up (in src/)." + exit 1 +fi #This directory contains some of the source for the Doxygen documentation (the #code itself, and its comments, is the rest of the source). Doxygen will create @@ -6,7 +12,7 @@ #not work, search for "Kaldi main page" online and you will hopefully get a #version of the documentation. -# Note: I generally run this file by typing ". doc/README" from src/, +# Note: I generally run this file by typing "doc/README" from src/, # but this relies on having dsa encryption set up with Sourceforge. # instructions (from Vassil Panayotov) on how to do this: # type @@ -20,6 +26,12 @@ # cd to src/ doc/make_tools.sh + +echo "$0: running doc/get_version_info.sh" +doc/get_version_info.sh +echo "$0: done" + + doxygen cp doc/*.pptx html/; # get the style sheet in the html/ directory. @@ -29,7 +41,6 @@ doxygen -w html header.html footer.html stylesheet.css rm header.html footer.html mv stylesheet.css html/ - if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then cp ../misc/logo/KaldiIco.png html/favicon.ico tar -czf html.tar.gz html @@ -61,4 +72,3 @@ fi # moved the header.html to doc/ and edited it to include the following snippet, # and added it to the repo. # - diff --git a/src/doc/dependencies.dox b/src/doc/dependencies.dox index bff6983e0d6..63d2658b726 100644 --- a/src/doc/dependencies.dox +++ b/src/doc/dependencies.dox @@ -34,9 +34,10 @@ and you can reserve these on the queue by adding some extra option to qsub. See \ref queue for more information. - We have started a separate project called Kluster that shows you - how to create such a cluster on Amazon's EC2; MIT's StarCluster is a larger and better-supported project that provides the same functionality. Most of the scripts should be suitable for a locally hosted cluster based on Debian or @@ -51,9 +52,8 @@ course it will be slower, and you may have to reduce the number of jobs used in some of the example scripts to avoid exhausting your machine's memory. - Kaldi is best tested on Debian and Red Hat Linux, but will run on any - Linux distribution, or on Cygwin or Mac OsX. We are working on FreeBSD - installation scripts. + Kaldi is best tested on Debian and Red Hat Linux, but will run on any + Linux distribution, or on Cygwin or Mac OsX. Kaldi's scripts have been written in such a way that if you replace SGE with a similar mechanism with different syntax (such as Tork), it should be @@ -68,7 +68,6 @@ \section dependencies_packages Software packages required - The following is a non-exhaustive list of some of the packages you need in order to install Kaldi. The full list is not important since the installation scripts will tell you what you are missing. @@ -98,6 +97,11 @@ (the corresponding packages are automake and libtool). - Note: some of the example scripts now use SRILM; we make it easy to install that, although you still have to register online to download it. + - SRILM: some of the example scripts use this. It's generally a better + and more complete language modeling toolkit than IRSTLM; the only drawback + is the license, which is not free for commercial use. You have to + enter your name on the download page to download it, so the installation + script requires some human interaction. - sph2pipe: this is for converting sph format files into other formats such as wav. It's needed for the example scripts that use LDC data. - sclite: this is for scoring and is not necessary as we have our own, simple @@ -109,5 +113,9 @@ - CLAPACK, the linear algebra library (we download the headers). This is useful only on systems where you don't have ATLAS and are instead compiling with CLAPACK. + - OpenBLAS: this is an alernative to ATLAS or CLAPACK. The scripts don't + use it by default but we provide installation scripts so you can install + it if you want to compare it against ATLAS (it's more actively + maintained than ATLAS). */ diff --git a/src/doc/dnn3.dox b/src/doc/dnn3.dox index b6dbea42fd6..547707d417b 100644 --- a/src/doc/dnn3.dox +++ b/src/doc/dnn3.dox @@ -40,7 +40,7 @@ namespace kaldi { - \subpage dnn3_code_data_types - \subpage dnn3_code_compilation - \subpage dnn3_code_optimization - - [documentation on scripts to come] + - \subpage dnn3_scripts_context */ diff --git a/src/doc/dnn3_code_compilation.dox b/src/doc/dnn3_code_compilation.dox index 59844a5d488..f536bb07449 100644 --- a/src/doc/dnn3_code_compilation.dox +++ b/src/doc/dnn3_code_compilation.dox @@ -917,6 +917,21 @@ as an optimization. In this case the associated \ref NnetComputation::debug_inf will correspond to the debug information of one of the matrices that we merged. +\subsection dnn3_compile_compiler_shortcut Shortcut compilation + +A feature available from Kaldi version 5.1 is 'shortcut' compilation (enabled +by default). This is done only when the ComputationRequest has a suitably +regular structure; this basically means that there are more than two different +"n" indexes in the computation, they are numbered consecutively from zero, +nd for each "n" index, the requested set of "t" and "x" indexes is the same +and in a regular order. What the shortcut compilation does is reduce the +computation request down to just two distinct "n" indexes (zero and one), +compile the mini-request, and then expand the resulting compilation-- basically, +it extrapolates the compiled computation to what it would have been if +the entire original computation request had been supplied. Shortcut +compilation significantly cuts down compilation time. + + - Up: \ref dnn3 - Previous: \ref dnn3_code_data_types - Next: \ref dnn3_code_optimization diff --git a/src/doc/dnn3_code_optimization.dox b/src/doc/dnn3_code_optimization.dox index 89a61a7fc32..accf2f50793 100644 --- a/src/doc/dnn3_code_optimization.dox +++ b/src/doc/dnn3_code_optimization.dox @@ -319,7 +319,7 @@ struct NnetOptimizeConfig { }; \endverbatim The top-level call to the optimization code is just a function call. -We show the code for this function below: +We show some partial code for this function below: \verbatim void Optimize(const NnetOptimizeConfig &config, const Nnet &nnet, diff --git a/src/doc/dnn3_scripts_context.dox b/src/doc/dnn3_scripts_context.dox new file mode 100644 index 00000000000..884e8c79f51 --- /dev/null +++ b/src/doc/dnn3_scripts_context.dox @@ -0,0 +1,246 @@ +// doc/dnn3_scripts_context.dox + + +// Copyright 2015 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +namespace kaldi { +namespace nnet3 { + +/** + \page dnn3_scripts_context Context and chunk-size in the "nnet3" setup + + \section dnn3_scripts_context_intro Introduction + + This page discusses certain issues of terminology in the nnet3 setup + about chunk sizes for decoding and training, and left and right context. + This will be helpful in understanding some of the scripts. At the current + time don't have any 'overview' documentation of nnet3 from a scripting perspective, + so this will have to stand as an isolated piece of documentation. + + \section dnn3_scripts_context_basics The basics + + If you have read the previous documentation available for \ref dnn3, you will + realize that the "nnet3" setup supports setups other than simple feedforward + DNNs. It can be used for time delay neural networks (TDNNs) where temporal + splicing (frame splicing) is done at internal layers of the network; and also + for recurrent topologies (RNNs, LSTMs, BLSTMs, etc.). So nnet3 + "knows about" the time axis. Below we estabilish some terminology. + + \subsection dnn3_scripts_context_basics_context Left and right context + + Suppose we want a network to compute an output for a specific time index; + to be concrete, say time t = 154. If the network does frame splicing + internally (or anything else nontrivial with the 't' indexes), it may not be able to + compute this output without seeing a range of input frames. For example, + it may be impossible to compute the output without seeing the range of + 't' values from t = 150 through t = 157. In this case (glossing over details), + we'd say that the network has a \b left-context of 4 and a \b right-context of 3. + The actual computation of the context is a bit more complex as it has to + take into account special cases like where, say, the behavior for odd and + even 't' values is different (c.f. Round() descriptors in + \ref dnn3_dt_nnet_descriptor_config). + + There are cases with recurrent topologies where, in addition to the + "required" left and right context, we want to give the training or the + decoding "extra" context. For such topologies, the network can make use + of context beyond the required context. + In the scripts you'll generally see variables called + \b extra-left-context and \b extra-right-context, which mean + "the amount of context that we're going to provide in addition to what is required". + + In some circumstances the names \b left-context and + \b right-context simply mean the total left and right context that we're + adding to the chunks, i.e. the sums of the model left/right context and the + extra left/right context. So in some circumstances you may have to work out + from the context whether a variable refers to the model left/right context + of the left/right context of the chunks of data. + + In Kaldi version 5.0 and earlier the left and right context in the chunks + of data is not affected by whether the chunks were at the + beginning or end of the utterance; at the ends we pad the input with copies of the + first or last frame. This means that for recurrent topologies, we might end up + padding the start or end of the utterance with a lot of frames (up to 40 or so). + This is wasteful and rather strange. + In versions 5.1 and later, you can specify configuration values \b extra-left-context-initial and + \b extra-right-context-final that allow the start/end of the utterance to have a different + amount of context. If you specify these values, you would normally specify them both to be 0 + (i.e. no extra context). However, for back compatibility to older setups, they + generally default to -1 (meaning, just copy the default extra-left-context and extra-right-context). + + + \subsection dnn3_scripts_context_basics_chunk Chunk size + + The \b chunk-size is the number of (output) frames for each chunk of data + that we evaluate in training or decoding. In the get_egs.sh script + and train_dnn.py it is also referred to as \b frames-per-eg (in some contexts, + this is not the same as the chunk size; see below). In decoding we call this + the \b frames-per-chunk. + + \subsubsection dnn3_scripts_context_basics_chunk_dnn Non-recurrent, non-chain case + + For the very simplest types of networks, such as feedforward networks or TDNNs + trained with the cross-entropy objective function, we randomize the entire + dataset at the frame level and we just train on one frame at a time. In order + for the training jobs to mostly do sequential I/O, we aim pre-randomize the + data at the frame level. However, when you consider that we might easily + require 10 frames each of left and right context, and we have to write this out, + we could easily be increasing the amount of data by a factor of 20 or so when we + generate the training examples. To solve this problem we include labels for + a range of time values, controlled by \b frames-per-eg (normally 8), and include + enough left/right context that we can train on any of those 8 frames. Then + when we train the model, any given training job will pick one of those 8 frames to + train on. + + \subsubsection dnn3_scripts_context_basics_chunk_rnn Recurrent or chain case + + In models that are RNNs or LSTMs or are \ref chain, we always train on fairly large + chunks (generally in the range 40 to 150 frames). This is referred to as the + \b chunk-size. When we decode, we also generally evaluate the neural net on fairly + large chunks of data (like, 30, 50 or 100 frames). This is usually referred to + as the \b frames-per-chunk. For recurrent networks we tend to + make sure that the \b chunk-size/\b frames-per-chunk + and the \b extra-left-context and \b extra-right-context are about the same in + training and decoding, because this generally gives the best results (although + sometimes it's best to make the extra-context values slightly larger in decoding). + One might expect that in decoding time longer context would always be better, but + this does not always seem to be the case (however, see \ref dnn3_scripts_context_looped + below, where we mention a way around this). + + + \subsubsection dnn3_scripts_context_basics_chunk_subsampling Interaction of chunk size with frame-subsampling-factor + + In cases where there is frame-subsampling at the output (like the chain model), + the chunk-size is still measured in multiples of 't', and we make sure (via + rounding up in the code) that it's a multiple of the frame-subsampling factor. + Bear in mind that if the \b chunk-size is 90 and the \b frame-subsampling-factor + is 3, then we're only evaluating 30 distinct output indexes for each chunk of + 90 frames (e.g. t=0, t=3 ... t=87). + + \subsection dnn3_scripts_context_basics_variable Variable chunk size + + Variable chunk size is something used in training that is only available in Kaldi version + 5.1 or later. This is a mechanism to allow fairly large chunks while avoiding + the loss of data due to files that are not exact multiples of the chunk size. + Instead of specifying the chunk size as (say) 150, we might specify the chunk + size as a comma-separated list like 150,120,90,75, and the commands that generate the + training examples are allowed to create chunks of any of those sizes. The + first chunk size specified is referred to as the primary chunk size, and is + "special" in that for any given utterance, we are allowed pick at most two of the + non-primary chunk size; the remaining chunks must be of the primary chunk size. + This restriction makes it easier to work out the optimal split of a file of + a given length into chunks, and allows us to bias the chunk-generation to + chunks of a certain length. + + + \subsection dnn3_scripts_context_basics_minibatch Minibatch size + + The program nnet3-merge-egs merges individual training examples into + minibatches containing many different examples (each original example + gets a different 'n' index). The \b minibatch-size is the desired + size of minibatch, by which we mean the number of examples (frames or + sequences) that we combine into one(for example, minibatch-size=128). + When the chunk sizes + are variable (and taking into account that the context may be different + at the start/end of utterances if we set the \b extra-left-context-initial + and \b extra-right-context-final), it's important to ensure that only + ``similar'' examples are merged into minibatches; this prevents expensive + recompilation from happening on every single minibatch. + + In Kaldi version + 5.1 and later, nnet3-merge-egs only merges together chunks of the same + structure (i.e. the same chunk-size and left and right context). + It keeps reading chunks from the input until it finds that + for some structure of input, there are \b minibatch-size examples ready + to merge into one. In Kaldi versions prior to 5.1 we generally discarded + the "odd-numbered" examples that couldn't be fit into a normal-sized + minibatch, but this becomes problematic now that there are many different + chunk-sizes (we'd discard too much data). + + \subsubsection dnn3_scripts_context_basics_minibatch_variable Variable minibatch size + + From Kaldi 5.1 and later, + the --minibatch-size is a more general string that allows the user more + control than just having a fixed minibatch size. For example, you can specify --minibatch-size=64,128 and + for each type of example it will try to accumulate batches of the + largest specified size (128) and output + them, until it reaches the end of the input; then it will output + a minibatch of size 64 if there are >= 64 egs left. Ranges are also + supported, e.g. --minibatch-size=1:64 means to output minibatches of size 64 + until the end of the input, then output all remaining examples as a single + minibatch. You may also specify different rules for examples of different + sizes (run nnet3-merge-egs without arguments for details of this); this can be useful + to stay within GPU memory limits. + + \section dnn3_scripts_context_looped Looped decoding + + Looped decoding in nnet3 is another feature that is new in Kaldi version 5.1. + It is applicable to forward-recurrent neural networks such as RNNs and LSTMs + (but not to BLSTMs). It allows us to re-use hidden-state activations from + previously-computed chunks. This allows us to have effectively unlimited left + context. The reason why it's called ``looped decoding'' relates to the way + it's implemented: we create a computation whose last statement is a 'goto' + that jumps to somewhere in the middle, so effectively it has a loop like + 'while(1)'. (Note: the computations have statements that request user input or + provide output, so the loop doesn't cause the computation to run indefinitely when called; + it will stop when an I/O operation is reached). Looped computation is intended to solve two problems: wasteful + computation, and latency. Suppose we trained our LSTMs with 40 frames of left + context and a chunk-size of 100. Without looped computation, we'd probably + want to decode with chunks of size about 100 and we'd left-pad the input with around 40 + frames. But this takes about 40\% extra computation; and the chunk size of 1 + second would be a problem for latency/responsiveness in a real-time + application. With looped computation, we can choose any chunk size that's + convenient, because the effective left context is infinite; and the chunk size + doesn't affect the computed output any more. + + However, there is a slight problem with what we sketched out above. In + practice, we've found for LSTMs that decoding works best with about the same + chunk sizes and context as we trained with. That is, adding more context than + we trained on is not helpful. Our theory about why this happens is that + as the context gets longer we reach parts of activation space that were unreachable + before. The maximum value of the cells \f$c_t\f$ in LSTMs rises linearly with + the number of frames we've seen. Following this theory, we made a modification + to LSTMs that seems to fix the problem. We scale the \f$c_t\f$ in the LSTM equations + by a value slightly less than one in the recurrence (for example, like 0.9). + This puts a bound on the maximum hidden activation activations and makes them + increase less dramatically with increasing recurrence time. It's specified + as a configuration value in the LSTM components in the "xconfig" configuration files + with the "decay-time" value, e.g. "decay-time=20". This doesn't seem to + degrade the Word Error Rates, and it removes the discrepancy between regular + and looped decoding (i.e. it makes the networks tolerant to longer context than + was seen in training). + + The script steps/nnet3/decode_looped.sh (only available from Kaldi version 5.1) + takes only two chunk- or context-related configuration values: + \b frames-per-chunk (which only affects the speed/latency tradeoff and not + results), and \b extra-left-context-initial, which should be set to + match the training condition (generally this will be zero, in up-to-date + scripts). + + + At the time of writing, we have not yet created a program similar to + online2-wav-nnet3-latgen-faster that uses the looped decoder; that is + on our TODO list (it's not inherently difficult). + + + - Up: \ref dnn3 + - Previous: \ref dnn3_code_optimization + +*/ + +} +} diff --git a/src/doc/examples.dox b/src/doc/examples.dox old mode 100755 new mode 100644 diff --git a/src/doc/get_version_info.sh b/src/doc/get_version_info.sh new file mode 100755 index 00000000000..568e53c88dd --- /dev/null +++ b/src/doc/get_version_info.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +# Note: this script assumes that it's part of a git repository where +# the official kaldi repo is a remote named 'upstream', as shown +# here: +# git remote -vv | grep upstream +# upstream git@github.com:kaldi-asr/kaldi.git (fetch) +# upstream git@github.com:kaldi-asr/kaldi.git (push) +# Since Dan is going to be the one running this script and that's +# how he does it, this should work fine. + + + +# the tuples are: + +if [ "$0" != "doc/get_version_info.sh" ] || [ $# -ne 0 ]; then + echo "$0: you should run this script without arguments, from the src/ directory." + echo "... It generates 5.0.html, 5.1.html, and so on." +fi + +if ! git fetch upstream; then + echo "$0: command 'git fetch upstream' failed" + exit 1 +fi + + +# echo "fooXXabcYYbar" | perl -ane ' if (m/XX(.+)YY/) { $a=$`;$x=$1;$y=$'\''; $x =~ s/a/b/g; print "${a}XX${x}YY${y}"; } else {print;}' + +# Note: when you add new tuples here you'll want to add ndew +# \htmlinclude directives in versions.dox. +# the tuples will generally be of the form: "x.x master yyyyyy" +# where yyyyy is the result of git log -1 src/.version done on +# that version of Kaldi (we only update the .version file when +# the major/minor version number changes). +for tuple in "5.0 master c160a9883" "5.1 master 2145519961"; do + major_minor_number=$(echo $tuple | awk '{print $1}') # e.g. 5.0 + branch=$(echo $tuple | awk '{print $2}') # e.g. 'master', or '5.1' (it's a branch name) + first_commit=$(echo $tuple | awk '{print $3}') + + + + tempfile=$(mktemp /tmp/temp.XXXXXX) + echo "$0: for version=$major_minor_number, writing git output to $tempfile" + + patch_number=0 + # git rev-list --reverse $first_commit..$branch lists the revisions from + # $first_commit to $branch... --boundary causes it to include $first_commit + # in the range, but with a dash (-) included for the first commit, so we + # use a sed command to get rid of that. + for rev in $(git rev-list --reverse $first_commit..$branch --boundary | sed s/-//); do + # %h is abbrev. commit hash, %H is long commit hash, %cd is the commit date, + # %%s is the one-line log message; x09 is tab. + # so we're printing " " + # we'll later parse this and generate HTML. + pretty_str="${patch_number}%x09%h%x09%H%x09%cd%x09%s"; + git log --date=short --pretty="$pretty_str" -1 $rev + patch_number=$[patch_number+1] + done > $tempfile + + htmlfile=doc/$major_minor_number.html + echo "$0: for version=$major_minor_number, processing $tempfile to $htmlfile" + + cat $tempfile | perl -e ' + ($major_minor_number) = @ARGV; + while () { + if (! m/^(\S+)\t(\S+)\t(\S+)\t(\S+)\t(.+)/) { + die "Could not parse line $_ in git output"; + } else { + $patch_number = $1; $short_commit = $2; $long_commit = $3; + $commit_date = $4; $commit_subject = $5; + if ($commit_subject =~ m/\(#(\d+)\)\s*$/) { + $pull_request_number = $1; + $pre_match = $`; # part before what was matched. + $pre_match =~ s//>/g; + # if commit subject line ends with e.g. (#1302), which will + # be a pull request; create a href to github for that. + $commit_subject = $pre_match . + "(#$pull_request_number)"; + } else { + $commit_subject =~ s//>/g; + } + $commit_href = + "$short_commit"; + $line = "$major_minor_number.$patch_number $commit_href $commit_date $commit_subject
\n"; + print $line; + } + print "

\n"; + } ' "$major_minor_number" >$htmlfile || exit 1 + echo "$0: generated file $htmlfile with $(wc -l <$htmlfile) lines" + # you might want to comment the command below if you are debugging the script. + rm $tempfile +done diff --git a/src/doc/graph_recipe_test.dox b/src/doc/graph_recipe_test.dox index 860b91a157c..ead544416bd 100644 --- a/src/doc/graph_recipe_test.dox +++ b/src/doc/graph_recipe_test.dox @@ -25,19 +25,19 @@ namespace kaldi { \page graph_recipe_test Decoding-graph creation recipe (test time) Here we explain our normal graph creation approach step by step, along - with certain data-preparation stages that are related to it. + with certain data-preparation stages that are related to it. Most of the details of this approach are not hardcoded into our tools; we are just explaining how it is currently being done. If this section is confusing, the best remedy is probably to read - "Speech Recognition - with Weighted Finite-State Transducers" by Mohri et al. - Be warned: that paper is quite long, and reading it will take at least a + "Speech Recognition + with Weighted Finite-State Transducers" by Mohri et al. + Be warned: that paper is quite long, and reading it will take at least a few hours for those not already familiar with FSTs. Another good resource is the OpenFst website which will provide more context on things like symbol tables. \section graph_symtab Preparing the initial symbol tables - + We need to prepare the OpenFst symbol tables words.txt and phones.txt. These assign integer id's to all the words and phones in our system. Note that OpenFst reserves symbol zero for epsilon. An example of how the @@ -56,7 +56,7 @@ symbol tables look for the WSJ task is: ## tail -2 words.txt }RIGHT-BRACE 123683 #0 123684 -## head data/phones.txt +## head data/phones.txt 0 SIL 1 SPN 2 @@ -65,10 +65,10 @@ AA 4 AA_B 5 \endverbatim The words.txt file contains the single disambiguation symbol "#0" (used for epsilon -on the input of G.fst). This is the last-numbered word in our recipe. Be careful -with this if your +on the input of G.fst). This is the last-numbered word in our recipe. Be careful +with this if your lexicon contains a word "#0". The phones.txt file does not contain disambiguation -symbols but after creating L.fst we will create a file phones_disambig.txt that +symbols but after creating L.fst we will create a file phones_disambig.txt that has the disambiguation symbols in (this is just useful for debugging). \section graph_lexicon Preparing the lexicon L @@ -77,7 +77,7 @@ has the disambiguation symbols in (this is just useful for debugging). Our C++ tools will never interact with this, it will just be used by a script that creates lexicon FST. A small part of our WSJ lexicon is: \verbatim -## head data/lexicon.txt +## head data/lexicon.txt !SIL SIL @@ -93,7 +93,7 @@ they are treated as distinct phones (however, we do handle the tree-building specially for this setup; read about the roots file in \ref tree_building). Notice that we allow words with empty phonetic representations. -This lexicon will be used to create the L.fst used in training (without +This lexicon will be used to create the L.fst used in training (without disambiguation symbols). We also create a lexicon with disambiguation symbols, used in decoding-graph creation. An extract of this file is here: @@ -125,11 +125,11 @@ ZH_S 339 #3 343 \endverbatim The numbers are so high because in this (WSJ) recipe we added -stress and position information to the phones. +stress and position information to the phones. Note that the disambiguation symbols used for the empty words (i.e. \ and \) have to be distinct from those used for the normal -words, so the "normal" disambiguation symbols in this -example start from \#3. +words, so the "normal" disambiguation symbols in this +example start from \#3. The command to convert the lexicon without disambiguation symbols into an FST is: @@ -143,7 +143,7 @@ Here, the script make_lexicon_fst.pl creates the text representation of the FST. The 0.5 is the silence probability (i.e. at the beginning of sentence and after each word, we output silence with probability 0.5; the probability mass assigned to having no silence is -1.0 - 0.5 = 0.5. The rest of the commands in this example +1.0 - 0.5 = 0.5. The rest of the commands in this example relate to converting the FST into compiled form; fstarcsort is necessary because we are going to compose later. @@ -156,9 +156,9 @@ the input symbol is the first phone of that word. It is important both for the efficiency of composition and the effectiveness of minimization that the output symbol should be as early as possible (i.e. at the beginning not the end of the word). At the end of each -word, to handle optional silence, the transition corresponding to -the last phone is in two forms: one to the loop state and one to -the "silence state" which has a transition to the loop state. +word, to handle optional silence, the transition corresponding to +the last phone is in two forms: one to the loop state and one to +the "silence state" which has a transition to the loop state. We don't bother putting optional silence after silence words, which we define as words that have just one phone that is the silence phone. @@ -203,7 +203,7 @@ gunzip -c data_prep/lm.arpa.gz | \ arpa2fst --disambig-symbol=#0 \ --read-symbol-table=data/words.txt - data/G.fst \endverbatim -The last command (fstisstochastic) is a diagnostic step (see \ref fst_algo_stochastic). +The last command (fstisstochastic) is a diagnostic step (see \ref fst_algo_stochastic). In one typical example, it prints out the numbers: \verbatim 9.14233e-05 -0.259833 @@ -211,7 +211,7 @@ In one typical example, it prints out the numbers: The first number is small, so it confirms that there is no state that has the probability mass of its arcs plus final-state significantly less than one. The second number is significant, and this means that there are states that -have "too much" probability mass (the numeric values of the weights in the +have "too much" probability mass (the numeric values of the weights in the FSTs can generally be interpreted as negated log probabilities). Having some states with "too much" probability mass is normal for the FST representations of language models with backoff. During later graph creation steps we will @@ -229,7 +229,8 @@ inside Kaldi processes, not at the script level. \verbatim fsttablecompose data/L_disambig.fst data/G.fst | \ fstdeterminizestar --use-log=true | \ - fstminimizeencoded > somedir/LG.fst + fstminimizeencoded | fstpushspecial | \ + fstarcsort --sort-type=ilabel > somedir/LG.fst \endverbatim There are some small differences from OpenFst's algorithms. We use a more efficient composition algorithm (see \ref fst_algo_composition) @@ -238,16 +239,27 @@ inside Kaldi processes, not at the script level. program fstdeterminizestar. The option --use-log=true asks the program to first cast the FST to the log semiring; this preserves stochasticity (in the log semiring); see \ref fst_algo_stochastic. - - We do minimization with the program "fstminimizeencoded". This is mostly the + + We do minimization with the program "fstminimizeencoded". This is mostly the same as the version of OpenFst's minimization algorithm that applies to weighted acceptors; the only change relevant here is that - it avoids pushing weights, hence preserving stochasticity (see \ref fst_algo_minimization + it avoids pushing weights, hence preserving stochasticity (see \ref fst_algo_minimization for details). + The program "fstpushspecial" is similar to OpenFst's "fstpush" program, but if + the weights don't sum to one it ensures that all the states "sum to" the same + value (possibly different from one), rather than trying to push the "extra" + weight to the start or end of the graph. This has the advantage that it + can never fail ("fstpush" can fail or loop for a very long time if the FST "sums to" infinity); + it is also much faster. See push-special.cc for more detailed documentation. + + The "fstarcsort" stage sorts the arcs in a way that will help later composition + operations to be fast. + + \section graph_clg Preparing CLG - To get a transducer whose inputs are context-dependent phones, we need to prepare an FST + To get a transducer whose inputs are context-dependent phones, we need to prepare an FST called CLG, which is equivalent to C o L o G, where L and G are the lexicon and grammar and C represents the phonetic context. For a triphone system, the input symbols of C would be of the form a/b/c (i.e. triples of phones), and the output symbols would be single @@ -255,7 +267,7 @@ inside Kaldi processes, not at the script level. windows, and how we generalize to different context sizes. Firstly, we describe how we would create the context FST C if we were to make it by itself and compose normally (our scripts do not actually work this way, for efficiency and scalability - reasons). + reasons). \subsection graph_c Making the context transducer @@ -274,7 +286,7 @@ inside Kaldi processes, not at the script level. this doesn't represent a phone since (assuming P = 1), the central element is \ which is not a phone. In this case we let the input symbol of the arc be #-1 which is a special symbol we introduce for this purpose (we don't use - epsilon here as the standard recipe does, as it can lead to nondeterminizability + epsilon here as the standard recipe does, as it can lead to nondeterminizability when there are empty words). The end-of-utterance case is a little complicated. The context FST has, on the @@ -283,7 +295,7 @@ inside Kaldi processes, not at the script level. all symbols we need to flush out the last triphone (e.g. a/b/\, where \ represents undefined context). The natural way to do this would be to have a transition with a/b/\ on its input and \ on its output, from - the state a/b to a final state (e.g. b/\ or a special final state). But this is + the state a/b to a final state (e.g. b/\ or a special final state). But this is inefficient for composition, because if it was not the end of the utterance we would have to explore such transitions before finding them pruned away. Instead we use $ as the end-of-utterance symbol, and make sure it appears once @@ -292,11 +304,11 @@ inside Kaldi processes, not at the script level. avoid the hassle having to work out how many subsequential symbols to add to LG, we just allow it to accept any number of such symbols at the end of utterance. This is acheived by the function AddSubsequentialLoop() and the command-line program - fstaddsubsequentialloop. + fstaddsubsequentialloop. -If we wanted C on its own, we would first need a list of -disambiguation symbols; and we would also need to work out an unused symbol id we could use +If we wanted C on its own, we would first need a list of +disambiguation symbols; and we would also need to work out an unused symbol id we could use for the subsequential symbol, as follows: \verbatim grep '#' data/phones_disambig.txt | awk '{print $2}' > $dir/disambig_phones.list @@ -313,17 +325,17 @@ The program fstmakecontextfst needs the list of phones, a list of disambiguation and the identity of the subsequential symbol. In addition to C.fst, it writes out the file "ilabels" that interprets the symbols on the left of C.fst (see \ref tree_ilabel). The composition with LG can be done as follows: -\verbatim +\verbatim fstaddsubsequentialloop $subseq_sym $dir/LG.fst | \ fsttablecompose $dir/C.fst - > $dir/CLG.fst \endverbatim For printing out C.fst and anything using the same symbols -that index "ilabels", we can make a suitable symbol table using the following +that index "ilabels", we can make a suitable symbol table using the following command: \verbatim fstmakecontextsyms data/phones.txt $dir/ilabels > $dir/context_syms.txt -\endverbatim -This command knows about the "ilabels" format (\ref tree_ilabel). +\endverbatim +This command knows about the "ilabels" format (\ref tree_ilabel). An example random path through the CLG fst (for Resource Management), printed out with this symbol table, is as follows: \verbatim @@ -338,7 +350,7 @@ out with this symbol table, is as follows: 6 7 ay/z/sil 7 8 z/sil/ 8 -\endverbatim +\endverbatim \subsection graph_compose_c Composing with C dynamically @@ -350,7 +362,7 @@ fstcomposecontext --read-disambig-syms=$dir/disambig_phones.list \ --write-disambig-syms=$dir/disambig_ilabels.list \ $dir/ilabels < $dir/LG.fst >$dir/CLG.fst \endverbatim - If we had different context parameters N and P than the defaults (3 and 1), we + If we had different context parameters N and P than the defaults (3 and 1), we would supply extra options to this program. This program writes the file "ilabels" (see \ref tree_ilabel) which interprets the input symbols of CLG.fst. The first few lines of an ilabels file from the Resource @@ -375,29 +387,29 @@ of epsilon, to ensure determinizability. \subsection graph_change_ilabel Reducing the number of context-dependent input symbols - After creating CLG.fst, there is an optional graph creation stage + After creating CLG.fst, there is an optional graph creation stage that can reduce its size. We use the program make-ilabel-transducer, which works out from the decision tree and the HMM topology information, which subsets of context-dependent phones would - correspond to the same compiled graph and can therefore be merged (we pick + correspond to the same compiled graph and can therefore be merged (we pick an arbitrary element of each subset and convert all context windows to that context window). This is a similar concept to HTK's logical-to-physical mapping. The command is: \verbatim make-ilabel-transducer --write-disambig-syms=$dir/disambig_ilabels_remapped.list \ - $dir/ilabels $tree $model $dir/ilabels.remapped > $dir/ilabel_map.fst + $dir/ilabels $tree $model $dir/ilabels.remapped > $dir/ilabel_map.fst \endverbatim This program requires the tree and the model; it outputs a new ilabel_info object called "ilabels.remapped"; this is in the same format as the original "ilabels" file, but has fewer lines. The FST "ilabel_map.fst" is composed with CLG.fst and remaps the labels. After doing this we determinize - and minimize so we can immediately realize any size reductions: + and minimize so we can immediately realize any size reductions: \verbatim fstcompose $dir/ilabel_map.fst $dir/CLG.fst | \ fstdeterminizestar --use-log=true | \ fstminimizeencoded > $dir/CLG2.fst \endverbatim - For typical setups this stage does not actually reduce the graph size + For typical setups this stage does not actually reduce the graph size by very much (5\% to 20\% reduction is typical), and in any case it is only the size of intermediate graph-creation stages that we are reducing by this mechanism. But the savings could become significant @@ -421,24 +433,24 @@ of epsilon, to ensure determinizability. sequences of three arcs. H also has self-loops on the initial state for each of the disambiguation symbols (\#-1, \#0, \#1, \#2, \#3 and so on). - The section of script that makes the H transducer (we call it Ha + The section of script that makes the H transducer (we call it Ha because it lacks self-loops at this point), is: -\verbatim +\verbatim make-h-transducer --disambig-syms-out=$dir/disambig_tstate.list \ --transition-scale=1.0 $dir/ilabels.remapped \ $tree $model > $dir/Ha.fst \endverbatim There is an optional argument to set the transition scale; in our - current training scripts, this scale is 1.0. This scale only + current training scripts, this scale is 1.0. This scale only affects the parts of the transitions that do not relate to self-loop probabilities, and in the normal topology (Bakis model) it has no effect at all; see \ref hmm_scale for more explanation. - In addition to the FST, the program also writes a list of + In addition to the FST, the program also writes a list of disambiguation symbols which must be removed later. - + \section graph_hclg Making HCLG - The first step in making the final graph HCLG is to make the + The first step in making the final graph HCLG is to make the HCLG that lacks self-loops. The command in our current script is as follows: \verbatim @@ -457,15 +469,15 @@ make-h-transducer --disambig-syms-out=$dir/disambig_tstate.list \ \section graph_selfloops Adding self-loops to HCLG Adding self-loops to HCLG is done by the following command: -\verbatim +\verbatim add-self-loops --self-loop-scale=0.1 \ --reorder=true $model < $dir/HCLGa.fst > $dir/HCLG.fst \endverbatim See \ref hmm_scale for an explanation of how the self-loop-scale of 0.1 - is applied (note that it also affects the non-self-loop probabilities). + is applied (note that it also affects the non-self-loop probabilities). For an explanation of the "reorder" option, see \ref hmm_reorder; the "reorder" option increases decoding speed but is not compatible with - the \ref decoder_kaldi "kaldi decoder". + the \ref decoder_kaldi "kaldi decoder". The add-self-loops program does not just add self-loops; it may also have to duplicate states and add epsilon transitions in order to ensure that the self-loops can be added in a consistent way. This @@ -476,7 +488,7 @@ make-h-transducer --disambig-syms-out=$dir/disambig_tstate.list \ G.fst, LG.fst, CLG.fst and HCLGa.fst, but not for HCLG.fst. We do not determinize again after the add-self-loops stage; this would fail because we have already removed the disambiguation symbols. Anyway, - this would be slow and we believe that there is nothing further to be gained from + this would be slow and we believe that there is nothing further to be gained from determinizing and minimizing at this point. diff --git a/src/doc/hmm.dox b/src/doc/hmm.dox index 5788b95d9c0..c410b1ba5a1 100644 --- a/src/doc/hmm.dox +++ b/src/doc/hmm.dox @@ -61,9 +61,12 @@ namespace kaldi { \endverbatim There is one TopologyEntry in this particular HmmTopology object, and it covers phones 1 through 8 (so in this example there are just eight phones and they all -share the same topology). There are three emitting states; each has a self-loop +share the same topology). There are three emitting states (i.e. states that +have pdfs associated with them and 'emit' feature vectors); each has a self-loop and a transition to the next state. There is also a fourth, non-emitting state, -state 3 (there is no \ entry for it) which has no transitions out of it. This is +state 3 (there is no \ entry for it) which has no transitions out of it +(implicitly, it connects to the next phone in the sequence). +This is a standard feature of these topology entries; Kaldi treats the first state (state zero) as the start state, and the last state, which should always be nonemitting and have no transitions out of it, has final-probability one. You diff --git a/src/doc/kaldi_for_dummies.dox b/src/doc/kaldi_for_dummies.dox index 69d561e8bff..9afe831ecc4 100644 --- a/src/doc/kaldi_for_dummies.dox +++ b/src/doc/kaldi_for_dummies.dox @@ -348,8 +348,8 @@ directories. You may find such links in, for example, This script will help you to get decoding results.

Task

-From \c kaldi-trunk/egs/voxforge/local copy the script \c score.sh into -exactly same location in your project (\c kaldi-trunk/egs/digits/local). +From \c kaldi-trunk/egs/voxforge/s5/local copy the script \c score.sh into +similar location in your project (\c kaldi-trunk/egs/digits/local). \subsection kaldi_for_dummies_srilm SRILM installation @@ -413,13 +413,13 @@ b.) \c path.sh
export KALDI_ROOT=`pwd`/../.. # Setting paths to useful tools -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH # Defining audio data directory (modify it for your installation directory!) export DATA_ROOT="/home/{user}/kaldi-trunk/egs/digits/digits_audio" -# Variable that stores path to MITLM library -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/tools/mitlm-svn/lib +# Enable SRILM +source $KALDI_ROOT/tools/env.sh # Variable needed for proper data sorting export LC_ALL=C @@ -519,7 +519,7 @@ echo "===== MAKING G.fst =====" echo lang=data/lang -cat $local/tmp/lm.arpa | arpa2fst - | fstprint | utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt --osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | fstrmepsilon | fstarcsort --sort_type=ilabel > $lang/G.fst +arpa2fst --disambig-symbol=#0 --read-symbol-table=$lang/words.txt $local/tmp/lm.arpa $lang/G.fst echo echo "===== MONO TRAINING =====" diff --git a/src/doc/mainpage.dox b/src/doc/mainpage.dox index 4cc684e85b8..3b21f6174b0 100644 --- a/src/doc/mainpage.dox +++ b/src/doc/mainpage.dox @@ -34,14 +34,15 @@ location. kaldi-asr.org/doc is the definitive location of this documentation. Kaldi's code repository is now located at http://github.com/kaldi-asr/kaldi - - See also the top level of kaldi-asr.org, where + + See also the top level of kaldi-asr.org, where you can download pre-built models.

- - \subpage about + - \subpage about - \subpage other - - \subpage install + - \subpage install + - \subpage versions - \subpage dependencies - \subpage legal - \subpage tutorial @@ -49,26 +50,26 @@ - \subpage examples - \subpage glossary - \subpage data_prep - - \subpage build_setup - - \subpage style - - \subpage history - - \subpage matrix - - \subpage matrixwrap + - \subpage build_setup + - \subpage style + - \subpage history + - \subpage matrix + - \subpage matrixwrap - \subpage cudamatrix - - \subpage io + - \subpage io - \subpage io_tut - - \subpage error - - \subpage parse_options + - \subpage error + - \subpage parse_options - \subpage util - \subpage clustering - \subpage hmm - \subpage tree_internals - \subpage tree_externals - - \subpage graph + - \subpage graph - \subpage graph_recipe_test - \subpage graph_recipe_train - - \subpage fst_algo - - \subpage decoders + - \subpage fst_algo + - \subpage decoders - \subpage lattices - \subpage model - \subpage feat @@ -76,7 +77,7 @@ - \subpage dnn - \ref dnn1 - \ref dnn2 - - \ref dnn3 + - \ref dnn3 - \ref chain - \subpage online_decoding - \subpage kws diff --git a/src/doc/online_decoding.dox b/src/doc/online_decoding.dox index 52be3d38bca..799bfb5895f 100644 --- a/src/doc/online_decoding.dox +++ b/src/doc/online_decoding.dox @@ -410,6 +410,36 @@ utils/mkgraph.sh $lang_own $model_dir $graph_own_dir || exit 1; where $model_dir is the model directory which contains the model "final.mdl" and the tree "tree". We now can use $graph_own_dir/HCLG.fst to replace the old HCLG.fst. + + +\section online_decoding_nnet3 Online decoding with nnet3 models + +Online decoding with nnet3 models is basically the same as with nnet2 +models as described in \ref online_decoding_nnet2. However, there are +some limitations as to the model type you can use. In Kaldi 5.0 and +earlier, online nnet3 decoding does not support recurrent models. +In Kaldi 5.1 and later, online nnet3 decoding supports "forward" +recurrent models such as LSTMs, but not bidirectional ones like BLSTMs. +In addition, online nnet3 decoding with recurrent +models may not give optimal results unless +you use "Kaldi-5.1-style" configuration, including the "decay-time" +option and specifying --extra-left-context-initial 0; see +\ref dnn3_scripts_context for more discussions of these issues. + + +Many of the issues in online nnet3 decoding are the same as in nnet2 +decoding and the command lines are quite similar. For online nnet3 +decoding with Kaldi 5.1 and later, the best example script for online +decoding including model training is, at the +time of writing, egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh +(at the time of writing this is only available in the 'shortcut' branch, +as Kaldi 5.1 has not yet been merged to master); +and downloadable models that can be used with online nnet3 decoding, please +see http://kaldi-asr.org/models.html (the first model there, the ASPIRE model, +includes instructions in a README file). + + + */ diff --git a/src/doc/transform.dox b/src/doc/transform.dox index 6d487722124..dfeaf6f66d5 100644 --- a/src/doc/transform.dox +++ b/src/doc/transform.dox @@ -31,7 +31,7 @@ namespace kaldi { relate to the commonalities: - \ref transform_apply - \ref transform_perspk - - \ref transform_utt2spk + - \ref transform_utt2spk - \ref transform_compose - \ref transform_weight @@ -49,8 +49,8 @@ namespace kaldi { We next discuss regression class trees and transforms that use them: - \ref transform_regtree - - + + \section transform_apply Applying global linear or affine feature transforms In the case of feature-space transforms and projections that are global, @@ -59,22 +59,22 @@ namespace kaldi { projection is represented as a matrix by which we will left-multiply a feature vector, so the transformed feature is \f$ A x \f$. An affine transform or projection is represented the same way, but we imagine a 1 has been appended to the - feature vector, so the transformed feature is + feature vector, so the transformed feature is \f$ W \left[ \begin{array}{c} x \\ 1 \end{array} \right] \f$ where \f$ W = \left[ A ; b \right] \f$, with A and b being the linear transform and the constant offset. Note that this convention differs from some of the literature, where the 1 may appear as - the first dimension rather than the last. + the first dimension rather than the last. Global transforms and projections are generally written as a type Matrix to a single file, and speaker or utterance-specific transforms or projections are stored in a table of such matrices (see \ref io_sec_tables) - indexed by speaker-id or utterance-id. + indexed by speaker-id or utterance-id. Transforms may be applied to features using the program transform-feats. Its syntax is \verbatim transform-feats -\endverbatim +\endverbatim where is an rspecifier, is an wspecifier, and may be an rxfilename or an rspecifier (see \ref io_sec_specifiers and \ref io_sec_xfilename). The program will work out whether the transform @@ -83,14 +83,14 @@ namespace kaldi { This program is typically used as part of a pipe. A typical example is: \verbatim - feats="ark:splice-feats scp:data/train.scp ark:- | + feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats $dir/0.mat ark:- ark:-|" some-program some-args "$feats" some-other-args ... \endverbatim Here, the file 0.mat contains a single matrix. An example of applying speaker-specific transforms is: \verbatim - feats="ark:add-deltas scp:data/train.scp ark:- | + feats="ark:add-deltas scp:data/train.scp ark:- | transform-feats --utt2spk=ark:data/train.utt2spk ark:$dir/0.trans ark:- ark:-|" some-program some-args "$feats" some-other-args ... \endverbatim @@ -98,33 +98,33 @@ A per-utterance example would be as above but removing the --utt2spk option. In this example, the archive file 0.trans would contain transforms (e.g. CMLLR transforms) indexed by speaker-id, and the file data/train.utt2spk would have lines of the form "utt-id spk-id" (see next section for more explanation). -The program transform-feats does not care how the transformation matrix was +The program transform-feats does not care how the transformation matrix was estimated, it just applies it to the features. After it has been through all the features it prints out the average per-frame log determinant. This can be useful when comparing objective functions (this log determinant would have to be added to the per-frame likelihood printed out by programs like gmm-align, gmm-acc-stats, or gmm-decode-kaldi). If the linear part A of the transformation (i.e. ignoring the offset term) is not square, -then the program will instead print out the per-frame average of +then the program will instead print out the per-frame average of \f$ \frac{1}{2} \mathbf{logdet} (A A^T) \f$. It refers to this as the pseudo-log-determinant. -This is useful in checking convergence of MLLT estimation where the transformation matrix +This is useful in checking convergence of MLLT estimation where the transformation matrix being applied is the MLLT matrix times an LDA matrix. \section transform_perspk Speaker-independent versus per-speaker versus per-utterance adaptation Programs that estimate transforms are generally set up to do a particular kind of adaptation, i.e. speaker-independent versus (speaker- or utterance-specific). For example, LDA -and MLLT/STC transforms are speaker-independent but fMLLR transforms are speaker- or +and MLLT/STC transforms are speaker-independent but fMLLR transforms are speaker- or utterance-specific. Programs that estimate speaker- or utterance-specific transforms will work in per-utterance mode by default, but in per-speaker mode if the --spk2utt -option is supplied (see below). +option is supplied (see below). One program that can accept either speaker-independent or speaker- or utterance-specific transforms is transform-feats. This program detects whether the first argument (the transform) is an rxfilename (see \ref io_sec_xfilename) or an rspecifier (see \ref io_sec_specifiers). If the former, it treats it as a speaker-independent transform (e.g. a file containing a single matrix). -If the latter, there are two choices. If no --utt2spk option is provided, +If the latter, there are two choices. If no --utt2spk option is provided, it treats the transform as a table of matrices indexed by utterance id. If an --utt2spk option is provided (utt2spk is a table of strings indexed by utterance that contains the string-valued speaker id), then the transforms are assumed to be indexed by speaker id, and the table @@ -133,13 +133,13 @@ provided to the --utt2spk option is used to map each utterance to a speaker id. \section transform_utt2spk Utterance-to-speaker and speaker-to-utterance maps At this point we give a general overview of the --utt2spk and --spk2utt options. - These options are accepted by programs that deal with transformations; they are used when + These options are accepted by programs that deal with transformations; they are used when you are doing per-speaker (as opposed to per-utterance) adaptation. Typically programs that process already-created transforms will need the --utt2spk - option and programs that create the transforms will need the --spk2utt option. + option and programs that create the transforms will need the --spk2utt option. A typical case is that there will be a file called some-directory/utt2spk that looks like: -\verbatim +\verbatim spk1utt1 spk1 spk1utt2 spk1 spk2utt1 spk2 @@ -148,11 +148,11 @@ spk2utt2 spk2 \endverbatim where these strings are just examples, they stand for generic speaker and utterance identifiers; and there will be a file called some-directory/spk2utt that looks like: -\verbatim +\verbatim spk1 spk1utt1 spk1utt2 spk2 spk2utt1 spk2utt2 ... -\endverbatim +\endverbatim and you will supply options that look like --utt2spk=ark:some-directory/utt2spk or --spk2utt=ark:some-directory/spk2utt. The 'ark:' prefix is necessary because these files are given as rspecifiers by the Table code, and are interpreted as archives @@ -177,7 +177,7 @@ spk2 spk2utt1 spk2utt2 for more discussion of this issue. \section transform_compose Composing transforms - + Another program that accepts generic transforms is the program compose-transforms. The general syntax is "compose-transforms a b c", and it performs the multiplication c = a b (although this involves a little more than matrix multiplication if a is affine). @@ -197,7 +197,7 @@ spk2 spk2utt1 spk2utt2 feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats 0.mat ark:- ark:- | transform-feats ark:1.trans ark:- ark:- |" - ... + ... \endverbatim In general, the transforms a and b that are the inputs to compose-transforms may be either speaker-independent transforms or speaker- or utterance-specific @@ -208,11 +208,11 @@ spk2 spk2utt1 spk2utt2 represent either tables or normal files (i.e. either {r,w}specifiers or {r,w}xfilenames), subject to consistency requirements. - If a is an affine transform, in order to perform the composition correctly, compose-transforms + If a is an affine transform, in order to perform the composition correctly, compose-transforms needs to know whether b is affine or linear (it does not know this because it does not have access to the dimension of the features that are transformed by b). This is controlled by the option --b-is-affine (bool, default false). - If b is affine but you forget to set this option and a is affine, compose-transforms + If b is affine but you forget to set this option and a is affine, compose-transforms will treat b as a linear transform from dimension (the real input feature dimension) plus one, and will output a transform whose input dimension is (the real input feature dimension) plus two. There is no way for "transform-feats" to interpret this when it is to be applied to features, @@ -225,7 +225,7 @@ Eliminating silence frames can be helpful when estimating speaker adaptive transforms such as CMLLR. This even appears to be true when using a multi-class approach with a regression tree (for which, see \ref transform_regtree). The way we implement this is by weighting down the posteriors associated with -silence phones. This takes place as a modification to the \ref hmm_post +silence phones. This takes place as a modification to the \ref hmm_post "state-level posteriors". An extract of a bash shell script that does this is below (this script is discussed in more detail in \ref transform_cmllr_global): \verbatim @@ -249,7 +249,7 @@ class LdaEstimate { void Accumulate(const VectorBase &data, int32 class_id, BaseFloat weight=1.0); }; -\endverbatim +\endverbatim The program acc-lda accumulates LDA statistics using the acoustic states (i.e. pdf-ids) as the classes. It requires the transition model in order to map the alignments (expressed in terms of transition-ids) to pdf-ids. However, it is not limited to a particular type of acoustic model. @@ -262,16 +262,16 @@ when using LDA as an initialization for HLDA. \section transform_splice Frame splicing -Frame splicing (e.g. splicing nine consecutive frames together) is typically done +Frame splicing (e.g. splicing nine consecutive frames together) is typically done to the raw MFCC features prior to LDA. The program splice-feats does this. A typical line from a script that uses this is the following: \verbatim feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats $dir/0.mat ark:- ark:-|" \endverbatim -and the "feats" variable would later be used as an rspecifier (c.f. \ref io_sec_specifiers) +and the "feats" variable would later be used as an rspecifier (c.f. \ref io_sec_specifiers) by some program that needs to read features. In this example we don't specify the number of frames to splice -together because we are using the defaults (--left-context=4, --right-context=4, or +together because we are using the defaults (--left-context=4, --right-context=4, or 9 frames in total). \section transform_delta Delta feature computation @@ -279,7 +279,7 @@ together because we are using the defaults (--left-context=4, --right-context=4, Computation of delta features is done by the program add-deltas, which uses the function ComputeDeltas. The delta feature computation has the same default setup as HTK's, i.e. to compute the first delta feature we multiply by the features -by a sliding window of values [ -2, 1, 0, 1, 2 ], and then normalize by +by a sliding window of values [ -2, -1, 0, 1, 2 ], and then normalize by dividing by (2^2 + 1^2 + 0^2 + 1^2 + 2^2 = 10). The second delta feature is computed by applying the same approach to the first delta feature. The number of frames of context on each side is controlled by --delta-window (default: 2) @@ -311,9 +311,9 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" case they need to be defined slightly differently for the accepted and rejected dimensions. Suppose the original feature dimension is D and the - reduced feature dimension is K. + reduced feature dimension is K. Let us forget the iteration superscript r, and use subscript j for state and - m for Gaussian mixture. + m for Gaussian mixture. For accepted dimensions (\f$0 \leq i < K\f$), the statistics are: \f[ \mathbf{G}^{(i)} = \sum_{t,j,m} \gamma_{jm}(t) \frac{1}{ \sigma^2_{jm}(i) } (\mu_{jm} - \mathbf{x}(t)) (\mu_{jm} - \mathbf{x}(t))^T @@ -333,13 +333,13 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" same, so in the code we only store statistics for K+1 rather than D dimensions. Also, it is convenient for the program that accumulates the statistics to only have - access to the K-dimensional model, so during HLDA accumulation we accumulate + access to the K-dimensional model, so during HLDA accumulation we accumulate statistics sufficient to estimate the K-dimensional means \f$\mu_{jm}\f$, and insead of - G we accumulate the following statistics: for accepted dimensions (\f$0 \leq i < K\f$), + G we accumulate the following statistics: for accepted dimensions (\f$0 \leq i < K\f$), \f[ \mathbf{S}^{(i)} = \sum_{t,j,m} \gamma_{jm}(t) \frac{1}{ \sigma^2_{jm}(i) } \mathbf{x}(t) \mathbf{x}(t)^T \f] - and for rejected dimensions \f$K \leq i < D\f$ + and for rejected dimensions \f$K \leq i < D\f$ \f[ \mathbf{S}^{(i)} = \sum_{t,j,m} \gamma_{jm}(t) \mathbf{x}(t) \mathbf{x}(t)^T , \f] @@ -350,13 +350,13 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" \f] and for \f$K \leq i < D\f$, \f[ - \mathbf{G}^{(i)} = \mathbf{S}^{(i)} - \beta \mu \mu^T, + \mathbf{G}^{(i)} = \mathbf{S}^{(i)} - \beta \mu \mu^T, \f] where \f$ \beta = \sum_{j,m} \gamma_{jm} \f$ is the total count and \f$\mu = \frac{1}{\beta} \sum_{j,m} \mu_{j,m}\f$ is the global feature mean. After computing the transform from the G statistics using the same computation as MLLT, we output the transform, and we also use the first K rows of the transform to project the means into dimension K and write out the transformed model. - + The computation described here is fairly slow; it is \f$ O(K^3) \f$ on each frame, and K is fairly large (e.g. 117). This is the price we pay for compact statistics; if we stored full mean and variance statistics, the per-frame computation would be \f$O(K^2)\f$. @@ -366,14 +366,14 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" the frames. If this option is activated, we need to store two separate versions of the sufficient statistics for the means. One version of the mean statistics, accumulated on the subset, is only used in the HLDA computation, and - corresponds to the quantities \f$\gamma_{jm}\f$ and \f$\mu_{jm}\f$ in the formulas above. + corresponds to the quantities \f$\gamma_{jm}\f$ and \f$\mu_{jm}\f$ in the formulas above. The other version of the mean statistics is accumulated on all the training data - and is used to write out the transformed model. - + and is used to write out the transformed model. + The overall HLDA estimation process is as follows (see rm_recipe_2/scripts/train_tri2j.sh): - First initialize it with LDA (we store both the reduced dimension matrix and the full matrix). - - Start model-building and training process. On certain (non-consecutive) + - Start model-building and training process. On certain (non-consecutive) iterations where we have decided to do the HLDA update, do the following: - Accumulate HLDA statistics (S, plus statistics for the full-dimensional means). The program that accumulates these (gmm-acc-hlda) needs the model, the un-transformed features, @@ -384,14 +384,14 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" transformation matrix which it needs to start the optimization and to correctly report auxiliary function changes. It outputs the new transform (both full and reduced dimension), and the model with newly estimated and transformed means. - + \section transform_mllt Global Semi-tied Covariance (STC) / Maximum Likelihood Linear Transform (MLLT) estimation Global STC/MLLT is a square feature-transformation matrix. For more details, - see "Semi-tied Covariance Matrices for Hidden Markov Models", by Mark Gales, + see "Semi-tied Covariance Matrices for Hidden Markov Models", by Mark Gales, IEEE Transactions on Speech and Audio Processing, vol. 7, 1999, pages 272-281. Viewing it as a feature-space transform, the objective function is the average - per-frame log-likelihood of the transformed features given the model, plus the + per-frame log-likelihood of the transformed features given the model, plus the log determinant of the transform. The means of the model are also rotated by transform in the update phase. The sufficient statistics are the following, for \f$ 0 \leq i < D \f$ where D is the feature dimension: @@ -399,9 +399,9 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" \mathbf{G}^{(i)} = \sum_{t,j,m} \gamma_{jm}(t) \frac{1}{ \sigma^2_{jm}(i) } (\mu_{jm} - \mathbf{x}(t)) (\mu_{jm} - \mathbf{x}(t))^T \f] See the reference, Equations (22) and (23) for the update equations. These are - basically a simplified form of the diagonal row-by-row Constrained MLLR/fMLLR update + basically a simplified form of the diagonal row-by-row Constrained MLLR/fMLLR update equations, where the first-order term of the quadratic equation disappears. Note that - our implementation differs from that reference by using a column of the inverse of the matrix + our implementation differs from that reference by using a column of the inverse of the matrix rather than the cofactor, since multiplying by the determinant does not make a difference to the result and could potentially cause problems with floating-point underflow or overflow. @@ -411,9 +411,9 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" - Estimate the LDA transformation matrix (we only need the first rows of this, not the full matrix). Call this matrix \f$\mathbf{M}\f$. - - Start a normal model building process, always using features transformed with \f$\mathbf{M}\f$. + - Start a normal model building process, always using features transformed with \f$\mathbf{M}\f$. At certain selected iterations (where we will update the MLLT matrix), we do the following: - - Accumulate MLLT statistics in the current fully-transformed space + - Accumulate MLLT statistics in the current fully-transformed space (i.e., on top of features transformed with \f$\mathbf{M}\f$). For efficiency we do this using a subset of the training data. - Do the MLLT update; let this produce a square matrix \f$\mathbf{T}\f$. @@ -423,34 +423,34 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" The programs involved in MLLT estimation are gmm-acc-mllt and est-mllt. We also need the programs gmm-transform-means (to transform the Gaussian means using \f$\mathbf{T}\f$), and compose-transforms (to do the multiplication \f$\mathbf{M} \leftarrow \mathbf{T} \mathbf{M} \f$). - + \section transform_cmllr_global Global CMLLR/fMLLR transforms Constrained Maximum Likelihood Linear Regression (CMLLR), also known as feature-space MLLR (fMLLR), is an affine feature transform of the form \f$ \mathbf{x} \rightarrow \mathbf{A} \mathbf{x} + \mathbf{b} \f$, - which we write in the form \f$ \mathbf{x} \rightarrow \mathbf{W} \mathbf{x}^+ \f$, where + which we write in the form \f$ \mathbf{x} \rightarrow \mathbf{W} \mathbf{x}^+ \f$, where \f$\mathbf{x}^+ = \left[\begin{array}{c} \mathbf{x} \\ 1 \end{array} \right]\f$ is the feature with - a 1 appended. Note that this differs from some of the literature where the 1 comes first. + a 1 appended. Note that this differs from some of the literature where the 1 comes first. For a review paper that explains CMLLR and the estimation techniques we use, see "Maximum likelihood linear transformations for HMM-based speech recognition" by Mark Gales, - Computer Speech and Language Vol. 12, pages 75-98. + Computer Speech and Language Vol. 12, pages 75-98. The sufficient statistics we store are: \f[ \mathbf{K} = \sum_{t,j,m} \gamma_{j,m}(t) \Sigma_{jm}^{-1} \mu_{jm} \mathbf{x}(t)^+ \f] where \f$\Sigma_{jm}^{-1}\f$ is the inverse covariance matrix, and for \f$0 \leq i < D \f$ where D is the feature dimension, - \f[ \mathbf{G}^{(i)} = \sum_{t,j,m} \gamma_{j,m}(t) \frac{1}{\sigma^2_{j,m}(i)} \mathbf{x}(t)^+ \left.\mathbf{x}(t)^+\right.^T \f] + \f[ \mathbf{G}^{(i)} = \sum_{t,j,m} \gamma_{j,m}(t) \frac{1}{\sigma^2_{j,m}(i)} \mathbf{x}(t)^+ \left.\mathbf{x}(t)^+\right.^T \f] Our estimation scheme is the standard one, see Appendix B of the reference (in particular section B.1, "Direct method over rows"). We differ by using a column of the inverse in place of the cofactor row, i.e. ignoring the factor of the determinant, as it does not affect the result and causes danger of numerical underflow or overflow. - Estimation of global Constrained MLLR (CMLLR) transforms is done by the + Estimation of global Constrained MLLR (CMLLR) transforms is done by the class FmllrDiagGmmAccs, - and by the program gmm-est-fmllr (also see gmm-est-fmllr-gpost). The syntax + and by the program gmm-est-fmllr (also see gmm-est-fmllr-gpost). The syntax of gmm-est-fmllr is: \verbatim gmm-est-fmllr [options] \ @@ -486,27 +486,27 @@ feats="ark:add-deltas --print-args=false scp:data/test.scp ark:- | gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \ "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali 2>$dir/decode.log -\endverbatim +\endverbatim \section transform_lvtln Linear VTLN (LVTLN) In recent years, there have been a number of papers that describe implementations of Vocal Tract Length Normalization (VTLN) that - work out a linear feature transform corresponding to each VTLN + work out a linear feature transform corresponding to each VTLN warp factor. See, for example, ``Using VTLN for broadcast news transcription'', by D. Y. Kim, S. Umesh, M. J. F. Gales, T. Hain and P. C. Woodland, ICSLP 2004. - + We implement a method in this general category using the class LinearVtln, and programs such as gmm-init-lvtln, gmm-train-lvtln-special, and gmm-est-lvtln-trans. The LinearVtln object essentially stores a set of linear feature transforms, one for each warp factor. Let these linear feature transform matrices be \f[\mathbf{A}^{(i)}, 0\leq i < N, \f] - where for instance we might have \f$N\f$=31, corresponding to 31 different warp - factors. We will describe below how we obtain these matrices below. + where for instance we might have \f$N\f$=31, corresponding to 31 different warp + factors. We will describe below how we obtain these matrices below. The way the speaker-specific transform is estimated is as follows. First, we require some kind of model and a corresponding alignment. In the - example scripts we do this either with a small monophone model, or with + example scripts we do this either with a small monophone model, or with a full triphone model. From this model and alignment, and using the original, unwarped features, we compute the conventional statistics for estimating CMLLR. When computing the LVTLN transform, what we do is take each matrix @@ -514,33 +514,33 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ maximizes the CMLLR auxiliary function for the transform \f$\mathbf{W} = \left[ \mathbf{A}^{(i)} \, ; \, \mathbf{b} \right]\f$. This value of \f$\mathbf{W}\f$ that gave the best auxiliary function value - (i.e. maximizing over i) becomes the transform for that speaker. Since we + (i.e. maximizing over i) becomes the transform for that speaker. Since we are estimating a mean offset here, we are essentially combining a kind of model-based cepstral mean normalization (or alternatively an offset-only form of CMLLR) with VTLN warping implemented - as a linear transform. This avoids us having to implement mean normalization + as a linear transform. This avoids us having to implement mean normalization as a separate step. We next describe how we estimate the matrices \f$\mathbf{A}^{(i)}\f$. We don't do this in the same way as described in the referenced paper; our method is simpler (and easier to justify). Here we describe our computation for a particular warp factor; in the current scripts we have 31 distinct warp - factors ranging from 0.85, 0.86, ..., 1.15. + factors ranging from 0.85, 0.86, ..., 1.15. We take a subset of feature data (e.g. several tens of utterances), and for this subset we compute both the original and transformed features, where the transformed features are computed using a conventional VLTN computation - (see \ref feat_vtln). - Call the original and transformed features \f$\mathbf{x}(t)\f$ and \f$\mathbf{y}(t)\f$ respectively, + (see \ref feat_vtln). + Call the original and transformed features \f$\mathbf{x}(t)\f$ and \f$\mathbf{y}(t)\f$ respectively, where \f$t\f$ will range over the frames of the selected utterances. We compute the affine transform that maps \f$\mathbf{x}\f$ to \f$\mathbf{y}\f$ in a least-squares - sense, i.e. if \f$\mathbf{y}' = \mathbf{A} \mathbf{x} + \mathbf{b}\f$, + sense, i.e. if \f$\mathbf{y}' = \mathbf{A} \mathbf{x} + \mathbf{b}\f$, we compute \f$\mathbf{A}\f$ and \f$\mathbf{b}\f$ that minimizes the sum-of-squares difference \f$\sum_t (\mathbf{y}'(t) - \mathbf{y}(t) )^T (\mathbf{y}'(t) - \mathbf{y}(t) )\f$. Then we normalize the diagonal variance as follows: we compute the variance of the original features as \f$\mathbf{\Sigma}^{(x)}\f$ and of the linearly transformed features as \f$\mathbf{\Sigma}^{(y')}\f$, and for each dimension index d we multiply the - d'th row of \f$\mathbf{A}\f$ by - \f$\sqrt{ \frac{\mathbf{\Sigma}^{(x)}_{d,d}}{\mathbf{\Sigma}^{(y')}_{d,d}}}\f$. + d'th row of \f$\mathbf{A}\f$ by + \f$\sqrt{ \frac{\mathbf{\Sigma}^{(x)}_{d,d}}{\mathbf{\Sigma}^{(y')}_{d,d}}}\f$. The resulting matrix will become \f$\mathbf{A}^{(i)}\f$ for some value of i. The command-line tools support the option to ignore the log determinant term @@ -579,8 +579,8 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ are speaker-specific; other quantities (i.e. \f$\mathbf{A}\f$ and \f$\mathbf{B}\f$) are global and shared across all speakers. - The most important factor in this equation is the middle one, - with the exponential function in it. + The most important factor in this equation is the middle one, + with the exponential function in it. The factor \f$\mathbf{D}_s\f$ gives us the ability to combine model-based mean and optionally variance normalization (i.e. offset-only or diagonal-only CMLLR) @@ -596,7 +596,7 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ there would be no point to this technique as the other quantities in the equation would add no degrees of freedom. The tools support three kinds of constraints on \f$\mathbf{D}_s\f$: it may be of the form - \f$[ {\mathbf I} \, \;\, {\mathbf 0} ]\f$ (no adaptation), or + \f$[ {\mathbf I} \, \;\, {\mathbf 0} ]\f$ (no adaptation), or \f$[ {\mathbf I} \, \;\, {\mathbf m} ]\f$ (offset only), or \f$[ {\mathrm{diag}}( {\mathbf d} ) \, \;\, {\mathbf m} ]\f$ (diagonal CMLLR); this is controlled by the --normalize-type options to the command-line tools. @@ -613,9 +613,9 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ if we were to warp by a factor f and then a factor g, this should be the same as warping by the combined factor fg. Let l = log(f) and m = log(g). Then we achieve this - property via the identity + property via the identity \f[ \exp( l \mathbf{A} ) \exp( m \mathbf{A}) = \exp( (l+m) \mathbf{A} ) . \f] - + The ET computation for a particular speaker is as follows; this assumes we are given \f$\mathbf{A}\f$ and \f$\mathbf{B}\f$. We accumulate conventional CMLLR sufficient statistics for the speaker. In the update phase we iteratively optimize @@ -636,9 +636,9 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ \f$\mathbf{B}\f$, or the model. - If updating \f$\mathbf{A}\f$, we do this given fixed values of \f$t_s\f$ and \f$\mathbf{D}_s\f$. The update is not guaranteed to - converge, but converges rapidly in practice; it's based on a + converge, but converges rapidly in practice; it's based on a quadratic "weak-sense auxiliary function" - where the quadratic term is obtained using a first-order truncation + where the quadratic term is obtained using a first-order truncation of the Taylor series expansion of the matrix exponential function. After updating \f$\mathbf{A}\f$, we modify \f$\mathbf{B}\f$ in order to renormalize the \f$t_s\f$ to zero; this involves premultiplying @@ -646,11 +646,11 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ value of \f$t_s\f$. - If updating \f$\mathbf{B}\f$, this is also done using fixed values of - \f$t_s\f$ and \f$\mathbf{D}_s\f$, and the update is similar to MLLT + \f$t_s\f$ and \f$\mathbf{D}_s\f$, and the update is similar to MLLT (a.k.a. global STC). For purposes of the accumulation and update, we imagine we are estimating an MLLT matrix just to the left of \f$\mathbf{A}\f$, i.e. some matrix - \f$\mathbf{C} \in \Re^{D\times D}\f$; let us define + \f$\mathbf{C} \in \Re^{D\times D}\f$; let us define \f$\mathbf{C}^+ = \left[ \begin{array}{cc} \mathbf{C} & 0 \\ 0 & 1 \end{array} \right]\f$. The transform will be \f$\mathbf{W}_s = \mathbf{D}_s \mathbf{C}^+ \exp ( t_s \mathbf{A} ) \mathbf{B}\f$. @@ -660,24 +660,24 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ \f$\exp ( t_s \mathbf{A} ) \mathbf{B}\f$ as a feature-space transform (i.e. as part of the features). After estimating \f$\mathbf{C}\f$, we will use the identity \f[ - \mathbf{C}^+ \exp ( t_s \mathbf{A} ) = \exp ( t_s \mathbf{C}^+ \mathbf{A} \left.\mathbf{C}^+\right.^{-1} ) \mathbf{C}^+ + \mathbf{C}^+ \exp ( t_s \mathbf{A} ) = \exp ( t_s \mathbf{C}^+ \mathbf{A} \left.\mathbf{C}^+\right.^{-1} ) \mathbf{C}^+ \f] so the update becomes: \f[ \mathbf{A} \leftarrow \mathbf{C}^+ \mathbf{A} \left.\mathbf{C}^+\right.^{-1} , \ \ \mathbf{B} \leftarrow \mathbf{C}^+ \mathbf{B} . \f] At this point we need to transform the model means with the matrix - \f$\mathbf{C}\f$. The reader might question how this interacts with the + \f$\mathbf{C}\f$. The reader might question how this interacts with the fact that for estimating \f$\mathbf{C}\f$, we viewed the quantity \f$\mathbf{D}_s\f$ as a model-space transform. If \f$\mathbf{D}_s\f$ only - contains a mean offset, we can still prove that the auxiliary function + contains a mean offset, we can still prove that the auxiliary function would increase, except we would have to change the offsets appropriately (this is not necessary to do explicitly, as we will re-estimate them on - the next iteration anyway). However, if \f$\mathbf{D}_s\f$ has non-unit - diagonal (i.e. is diagonal not offset CMLLR), this re-estimation process - is not guaranteed to improve the likelihood; the tools will print a warning + the next iteration anyway). However, if \f$\mathbf{D}_s\f$ has non-unit + diagonal (i.e. is diagonal not offset CMLLR), this re-estimation process + is not guaranteed to improve the likelihood; the tools will print a warning in this case. In order to avoid encountering this case, our scripts - train in a mode where \f$\mathbf{D}_s\f$ is an offset-only transform; but + train in a mode where \f$\mathbf{D}_s\f$ is an offset-only transform; but in test time we allow \f$\mathbf{D}_s\f$ to be a diagonal CMLLR transform, which seems to give slightly better results than the offset-only case. @@ -704,7 +704,7 @@ expanded features). For very fast operation, it is possible to apply these approaches using a very tiny model with a phone-based language model, and some of our example scripts demonstrate this. There is also the capability in the feature extraction code to subtract the mean on a per-utterance basis (the ---subtract-mean option to compute-mfcc-feats and compute-plp-feats). +--subtract-mean option to compute-mfcc-feats and compute-plp-feats). In order to support per-utterance and per-speaker mean and variance normalization we provide the programs compute-cmvn-stats and apply-cmvn. The program diff --git a/src/doc/versions.dox b/src/doc/versions.dox new file mode 100644 index 00000000000..0a16c5f1d3a --- /dev/null +++ b/src/doc/versions.dox @@ -0,0 +1,96 @@ +// doc/versions.dox + +// Copyright 2017 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +// note: you have to run the file get_version_info.sh in order +// to generate the HTML files that we include via \htmlinclude. +// Any time you add a new version you need to edit get_version_info.sh + + +/** + + \page versions Versions of Kaldi + + \section versions_scheme Versioning scheme + + During its lifetime, Kaldi has has three different versioning methods. + Originally Kaldi was a subversion (svn)-based project, and was hosted + on Sourceforge. Then Kaldi was moved to github, and for some time the + only version-number available was the git hash of the commit. + + In January 2017 we introduced a version number scheme. The first version + of Kaldi was 5.0.0, in recognition of the fact that the project had + already existed for quite a long time. The basic scheme is major/minor/patch, + but the "patch" version number may also encompass features (usually + back-compatible ones). The "patch number" automatically increases whenever + a commit to Kaldi is merged on github. + + We only intend to change the major or minor + version number when making relatively larger changes, or non-back compatible + changes. Version 5.1 of Kaldi is currently being prepared. When that is + finished (probably in early Feburary 2017), the latest version of 5.0.x will + be backed up to a branch named '5.0', and 'master' will point to version 5.1.0. + We may continue to update the 5.0 branch with fixes and the like, depending on + demand. + + We always plan to recommend that Kaldi users check out the latest version of + 'master', since actively supporting multiple versions would increase our workload. + + \section versions_versions Versions (and changes) + + This section lists the version numbers of Kaldi with the commit messages + for each patch commit (by "patch commit" we mean a commit that does not + increase the major or minor version number). + Each time we add a new major/minor version number we will include a longer + section explaining the changes involved. + + \subsection versions_versions_50 Version 5.0 + + This is the first major/minor version number after introducing the versioning scheme. + The latest revision of version 5.0 is saved as branch "5.0" on github. + + Specific patches: + + \htmlinclude 5.0.html + + + \subsection versions_versions_51 Version 5.1 + + Version 5.1 is the current master branch of Kaldi. + Some of the major changes introduced in version 5.1 are: + - Kaldi now requires C++11 to compile, and we support only the latest + version of OpenFst (1.6.0). (This simplifies Kaldi's code, and will later + enable the threading code to be + rewritten + to use C++11's better and more portable mechanisms). + - The way chunk size and feature context is handled in nnet3 is changed + to allow variable chunk size and shorter context at utterance boundaries. + See \ref dnn3_scripts_context for more information. + - A new decoding mechanism, \ref dnn3_scripts_context_looped, is introduced + in nnet3; this allows faster and more-easily-online decoding for + recurrent setups (but only unidirectionally-recurrent ones, like LSTMs + but not BLSTMs). + - \ref online_decoding_nnet3 is now rewritten; it's faster and it supports + models like LSTMs. + - The sequence-training scripts in nnet3 are refactored and are now simpler + and use less disk space. + + \htmlinclude 5.1.html + + +*/ diff --git a/src/feat/Makefile b/src/feat/Makefile index 71a34192347..e987de55b38 100644 --- a/src/feat/Makefile +++ b/src/feat/Makefile @@ -6,13 +6,12 @@ include ../kaldi.mk TESTFILES = feature-mfcc-test feature-plp-test feature-fbank-test \ feature-functions-test pitch-functions-test feature-sdc-test \ - resample-test online-feature-test sinusoid-detection-test \ - signal-test + resample-test online-feature-test signal-test OBJFILES = feature-functions.o feature-mfcc.o feature-plp.o feature-fbank.o \ feature-spectrogram.o mel-computations.o wave-reader.o \ - pitch-functions.o resample.o online-feature.o sinusoid-detection.o \ - signal.o feature-window.o + pitch-functions.o resample.o online-feature.o signal.o \ + feature-window.o LIBNAME = kaldi-feat diff --git a/src/feat/feature-fbank.cc b/src/feat/feature-fbank.cc index c54069696b5..3c53ef1ec08 100644 --- a/src/feat/feature-fbank.cc +++ b/src/feat/feature-fbank.cc @@ -28,9 +28,9 @@ FbankComputer::FbankComputer(const FbankOptions &opts): if (opts.energy_floor > 0.0) log_energy_floor_ = Log(opts.energy_floor); - int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); - if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two... - srfft_ = new SplitRadixRealFft(padded_window_size); + int32 num_fft_bins = opts.frame_opts.NumFftBins(); + if ((num_fft_bins & (num_fft_bins-1)) == 0) // Is a power of two... + srfft_ = new SplitRadixRealFft(num_fft_bins); // We'll definitely need the filterbanks info for VTLN warping factor 1.0. // [note: this call caches it.] @@ -76,7 +76,7 @@ void FbankComputer::Compute(BaseFloat signal_log_energy, const MelBanks &mel_banks = *(GetMelBanks(vtln_warp)); - KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && + KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.NumFftBins() && feature->Dim() == this->Dim()); diff --git a/src/feat/feature-mfcc.cc b/src/feat/feature-mfcc.cc index c1962a5c1d1..47912cc8693 100644 --- a/src/feat/feature-mfcc.cc +++ b/src/feat/feature-mfcc.cc @@ -29,7 +29,7 @@ void MfccComputer::Compute(BaseFloat signal_log_energy, BaseFloat vtln_warp, VectorBase *signal_frame, VectorBase *feature) { - KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && + KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.NumFftBins() && feature->Dim() == this->Dim()); const MelBanks &mel_banks = *(GetMelBanks(vtln_warp)); @@ -98,9 +98,9 @@ MfccComputer::MfccComputer(const MfccOptions &opts): if (opts.energy_floor > 0.0) log_energy_floor_ = Log(opts.energy_floor); - int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); - if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two... - srfft_ = new SplitRadixRealFft(padded_window_size); + int32 num_fft_bins = opts.frame_opts.NumFftBins(); + if ((num_fft_bins & (num_fft_bins-1)) == 0) // Is a power of two... + srfft_ = new SplitRadixRealFft(num_fft_bins); // We'll definitely need the filterbanks info for VTLN warping factor 1.0. // [note: this call caches it.] diff --git a/src/feat/feature-spectrogram.cc b/src/feat/feature-spectrogram.cc index 953f38fc54f..f5f1c420462 100644 --- a/src/feat/feature-spectrogram.cc +++ b/src/feat/feature-spectrogram.cc @@ -48,7 +48,7 @@ void SpectrogramComputer::Compute(BaseFloat signal_log_energy, BaseFloat vtln_warp, VectorBase *signal_frame, VectorBase *feature) { - KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && + KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.NumFftBins() && feature->Dim() == this->Dim()); diff --git a/src/feat/feature-spectrogram.h b/src/feat/feature-spectrogram.h index ec318556f24..6ca0697ef78 100644 --- a/src/feat/feature-spectrogram.h +++ b/src/feat/feature-spectrogram.h @@ -39,10 +39,13 @@ struct SpectrogramOptions { FrameExtractionOptions frame_opts; BaseFloat energy_floor; bool raw_energy; // If true, compute energy before preemphasis and windowing + bool use_energy; // append an extra dimension with energy to the filter banks + BaseFloat low_freq; // e.g. 20; lower frequency cutoff + BaseFloat high_freq; // an upper frequency cutoff; 0 -> no cutoff, negative SpectrogramOptions() : energy_floor(0.0), // not in log scale: a small value e.g. 1.0e-10 - raw_energy(true) {} + raw_energy(true), use_energy(true), low_freq(0), high_freq(0) {} void Register(OptionsItf *opts) { frame_opts.Register(opts); @@ -50,6 +53,12 @@ struct SpectrogramOptions { "Floor on energy (absolute, not relative) in Spectrogram computation"); opts->Register("raw-energy", &raw_energy, "If true, compute energy before preemphasis and windowing"); + opts->Register("use-energy", &use_energy, + "Add an extra dimension with energy to the spectrogram output."); + opts->Register("low-freq", &low_freq, + "Low cutoff frequency for mel bins"); + opts->Register("high-freq", &high_freq, + "High cutoff frequency for mel bins (if < 0, offset from Nyquist)"); } }; diff --git a/src/feat/feature-window.cc b/src/feat/feature-window.cc index 65c0a2a29c3..7b86e71dbb7 100644 --- a/src/feat/feature-window.cc +++ b/src/feat/feature-window.cc @@ -163,7 +163,7 @@ void ExtractWindow(int64 sample_offset, BaseFloat *log_energy_pre_window) { KALDI_ASSERT(sample_offset >= 0 && wave.Dim() != 0); int32 frame_length = opts.WindowSize(), - frame_length_padded = opts.PaddedWindowSize(); + num_fft_bins = opts.NumFftBins(); int64 num_samples = sample_offset + wave.Dim(), start_sample = FirstSampleOfFrame(f, opts), end_sample = start_sample + frame_length; @@ -175,8 +175,8 @@ void ExtractWindow(int64 sample_offset, KALDI_ASSERT(sample_offset == 0 || start_sample >= sample_offset); } - if (window->Dim() != frame_length_padded) - window->Resize(frame_length_padded, kUndefined); + if (window->Dim() != num_fft_bins) + window->Resize(num_fft_bins, kUndefined); // wave_start and wave_end are start and end indexes into 'wave', for the // piece of wave that we're trying to extract. @@ -206,8 +206,8 @@ void ExtractWindow(int64 sample_offset, } } - if (frame_length_padded > frame_length) - window->Range(frame_length, frame_length_padded - frame_length).SetZero(); + if (num_fft_bins > frame_length) + window->Range(frame_length, num_fft_bins - frame_length).SetZero(); SubVector frame(*window, 0, frame_length); diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h index 287f1bf01f6..d6acf7e2bed 100644 --- a/src/feat/feature-window.h +++ b/src/feat/feature-window.h @@ -42,6 +42,7 @@ struct FrameExtractionOptions { std::string window_type; // e.g. Hamming window bool round_to_power_of_two; BaseFloat blackman_coeff; + int32 num_fft_bins; bool snip_edges; // May be "hamming", "rectangular", "povey", "hanning", "blackman" // "povey" is a window I made to be similar to Hamming but to go to zero at the @@ -57,6 +58,7 @@ struct FrameExtractionOptions { window_type("povey"), round_to_power_of_two(true), blackman_coeff(0.42), + num_fft_bins(128), snip_edges(true){ } void Register(OptionsItf *opts) { @@ -76,7 +78,10 @@ struct FrameExtractionOptions { opts->Register("blackman-coeff", &blackman_coeff, "Constant coefficient for generalized Blackman window."); opts->Register("round-to-power-of-two", &round_to_power_of_two, - "If true, round window size to power of two."); + "If true, round window size to power of two by zero-padding " + "input to FFT."); + opts->Register("num-fft-bins", &num_fft_bins, + "Number of FFT bins to compute spectrogram"); opts->Register("snip-edges", &snip_edges, "If true, end effects will be handled by outputting only frames that " "completely fit in the file, and the number of frames depends on the " @@ -93,6 +98,13 @@ struct FrameExtractionOptions { return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize()) : WindowSize()); } + int32 NumFftBins() const { + int32 padded_window_size = PaddedWindowSize(); + if (num_fft_bins > padded_window_size) + return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(num_fft_bins) : + num_fft_bins); + return padded_window_size; + } }; diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc index 714d963f01b..db3f3334ca2 100644 --- a/src/feat/mel-computations.cc +++ b/src/feat/mel-computations.cc @@ -37,13 +37,7 @@ MelBanks::MelBanks(const MelBanksOptions &opts, int32 num_bins = opts.num_bins; if (num_bins < 3) KALDI_ERR << "Must have at least 3 mel bins"; BaseFloat sample_freq = frame_opts.samp_freq; - int32 window_length = static_cast(frame_opts.samp_freq*0.001*frame_opts.frame_length_ms); - int32 window_length_padded = - (frame_opts.round_to_power_of_two ? - RoundUpToNearestPowerOfTwo(window_length) : - window_length); - KALDI_ASSERT(window_length_padded % 2 == 0); - int32 num_fft_bins = window_length_padded/2; + int32 num_fft_bins = frame_opts.NumFftBins(); BaseFloat nyquist = 0.5 * sample_freq; BaseFloat low_freq = opts.low_freq, high_freq; @@ -59,8 +53,8 @@ MelBanks::MelBanks(const MelBanksOptions &opts, << " and high-freq " << high_freq << " vs. nyquist " << nyquist; - BaseFloat fft_bin_width = sample_freq / window_length_padded; - // fft-bin width [think of it as Nyquist-freq / half-window-length] + BaseFloat fft_bin_width = sample_freq / num_fft_bins; + // fft-bin width [think of it as Nyquist-freq / num_fft_bins] BaseFloat mel_low_freq = MelScale(low_freq); BaseFloat mel_high_freq = MelScale(high_freq); @@ -104,9 +98,9 @@ MelBanks::MelBanks(const MelBanksOptions &opts, center_freqs_(bin) = InverseMelScale(center_mel); // this_bin will be a vector of coefficients that is only // nonzero where this mel bin is active. - Vector this_bin(num_fft_bins); + Vector this_bin(num_fft_bins / 2); int32 first_index = -1, last_index = -1; - for (int32 i = 0; i < num_fft_bins; i++) { + for (int32 i = 0; i < num_fft_bins / 2; i++) { BaseFloat freq = (fft_bin_width * i); // Center frequency of this fft // bin. BaseFloat mel = MelScale(freq); diff --git a/src/feat/pitch-functions-test.cc b/src/feat/pitch-functions-test.cc index 616dbc68d54..098e590a8e9 100644 --- a/src/feat/pitch-functions-test.cc +++ b/src/feat/pitch-functions-test.cc @@ -84,10 +84,10 @@ static void UnitTestSnipEdges() { KALDI_ASSERT(wave.Data().NumRows() == 1); SubVector waveform(wave.Data(), 0); - // Process files with snip edge enabled or disabled, on various + // Process files with snip edge enabled or disabled, on various // frame shifts and frame lengths - for (int fs = 1; fs <= 10; fs++) { - for (int wl = 20; wl <= 100; wl += 10) { + for (int fs = 4; fs <= 10; fs += 2) { + for (int wl = 20; wl <= 100; wl += 20) { // Rather dirty way to round, but works fine int32 ms_fs = (int32)(wave.SampFreq() * 0.001 * fs + 0.5); int32 ms_wl = (int32)(wave.SampFreq() * 0.001 * wl + 0.5); @@ -99,11 +99,11 @@ static void UnitTestSnipEdges() { op_NoSnipEdges.frame_length_ms = wl; ComputeAndProcessKaldiPitch(op_SnipEdges, opp, waveform, &m1); ComputeAndProcessKaldiPitch(op_NoSnipEdges, opp, waveform, &m2); - + // Check the output differ in a predictable manner: // 1. The length of the output should only depend on the window size & window shift KALDI_LOG << "Output: " << m1.NumRows() << " ; " << m2.NumRows(); - // - with snip edges disabled, depends on file size and frame shift only */ + // - with snip edges disabled, depends on file size and frame shift only */ AssertEqual(m2.NumRows(), ((int)(wave.Data().NumCols() + ms_fs / 2)) / ms_fs); // - with snip edges disabled, depend on file size, frame shift, frame length */ AssertEqual(m1.NumRows(), ((int)(wave.Data().NumCols() - ms_wl + ms_fs)) / ms_fs); @@ -117,7 +117,7 @@ static void UnitTestSnipEdges() { int32 blag = -1; int32 max_lag = wl / fs * 2; int num_frames_f0 = m1.NumRows() - max_lag; - + /* Looks for the best correlation between the output signals, identify the lag, compares it with theoretical value */ SubVector sub_vec1(f0_1, 0, num_frames_f0); @@ -129,9 +129,9 @@ static void UnitTestSnipEdges() { blag = lag; } } - KALDI_LOG << "Best lag: " << blag * fs << "ms with value: " << bcorr << + KALDI_LOG << "Best lag: " << blag * fs << "ms with value: " << bcorr << "; expected lag: " << wl / 2 + 10 - fs / 2 << " ± " << fs; - // BP: the lag should in theory be equal to wl / 2 - fs / 2, but it seems + // BP: the lag should in theory be equal to wl / 2 - fs / 2, but it seems // to be: wl / 2 + 10 - fs / 2! It appears the 10 ms comes from the nccf_lag which // is 82 samples with the default settings => nccf_lag / resample_freq / 2 => 10.25ms // We should really be using the full_frame_length of the algorithm for accurate results, @@ -230,7 +230,7 @@ static void UnitTestDelay() { ext_opt.nccf_ballast_online = true; // this is necessary for the computation // to be identical regardless how many pieces we break the signal into. - int32 size = 10000 + rand() % 50000; + int32 size = 1000 + rand() % 5000; Vector v(size); // init with noise plus a sine-wave whose frequency is changing randomly. @@ -294,7 +294,7 @@ static void UnitTestSearch() { op.nccf_ballast_online = true; // this is necessary for the computation // to be identical regardless how many pieces we break the signal into. - int32 size = 10000 + rand() % 10000; + int32 size = 1000 + rand() % 1000; Vector v(size); // init with noise plus a sine-wave whose frequency is changing randomly. diff --git a/src/feat/pitch-functions.cc b/src/feat/pitch-functions.cc index 430e9bdb53a..07e1d181243 100644 --- a/src/feat/pitch-functions.cc +++ b/src/feat/pitch-functions.cc @@ -1402,7 +1402,8 @@ OnlineProcessPitch::OnlineProcessPitch( dim_ ((opts.add_pov_feature ? 1 : 0) + (opts.add_normalized_log_pitch ? 1 : 0) + (opts.add_delta_pitch ? 1 : 0) - + (opts.add_raw_log_pitch ? 1 : 0)) { + + (opts.add_raw_log_pitch ? 1 : 0) + + (opts.add_raw_pov ? 1 : 0)) { KALDI_ASSERT(dim_ > 0 && " At least one of the pitch features should be chosen. " "Check your post-process-pitch options."); @@ -1425,6 +1426,8 @@ void OnlineProcessPitch::GetFrame(int32 frame, (*feat)(index++) = GetDeltaPitchFeature(frame_delayed); if (opts_.add_raw_log_pitch) (*feat)(index++) = GetRawLogPitchFeature(frame_delayed); + if (opts_.add_raw_pov) + (*feat)(index++) = GetRawPov(frame_delayed); KALDI_ASSERT(index == dim_); } @@ -1482,6 +1485,13 @@ BaseFloat OnlineProcessPitch::GetNormalizedLogPitchFeature(int32 frame) { return normalized_log_pitch * opts_.pitch_scale; } +BaseFloat OnlineProcessPitch::GetRawPov(int32 frame) const { + Vector tmp(kRawFeatureDim); + src_->GetFrame(frame, &tmp); // (NCCF, pitch) from pitch extractor + BaseFloat nccf = tmp(0); + return NccfToPov(nccf); +} + // inline void OnlineProcessPitch::GetNormalizationWindow(int32 t, diff --git a/src/feat/pitch-functions.h b/src/feat/pitch-functions.h index 70e85380be6..b94ac661c10 100644 --- a/src/feat/pitch-functions.h +++ b/src/feat/pitch-functions.h @@ -231,6 +231,7 @@ struct ProcessPitchOptions { bool add_normalized_log_pitch; bool add_delta_pitch; bool add_raw_log_pitch; + bool add_raw_pov; ProcessPitchOptions() : pitch_scale(2.0), @@ -245,7 +246,7 @@ struct ProcessPitchOptions { add_pov_feature(true), add_normalized_log_pitch(true), add_delta_pitch(true), - add_raw_log_pitch(false) { } + add_raw_log_pitch(false), add_raw_pov(false) { } void Register(ParseOptions *opts) { @@ -286,6 +287,8 @@ struct ProcessPitchOptions { "features"); opts->Register("add-raw-log-pitch", &add_raw_log_pitch, "If true, log(pitch) is added to output features"); + opts->Register("add-raw-pov", &add_raw_pov, + "If true, add NCCF converted to POV"); } }; @@ -396,6 +399,10 @@ class OnlineProcessPitch: public OnlineFeatureInterface { /// Called from GetFrame(). inline BaseFloat GetNormalizedLogPitchFeature(int32 frame); + /// Computes and retures the raw POV for this frames. + /// Called from GetFrames(). + inline BaseFloat GetRawPov(int32 frame) const; + /// Computes the normalization window sizes. inline void GetNormalizationWindow(int32 frame, int32 src_frames_ready, diff --git a/src/feat/signal-test.cc b/src/feat/signal-test.cc index 39a379040b0..d69cf8e5798 100644 --- a/src/feat/signal-test.cc +++ b/src/feat/signal-test.cc @@ -23,10 +23,10 @@ namespace kaldi { -void UnitTestBlockConvolution() { +void UnitTestFFTbasedBlockConvolution() { for (int32 i = 0; i < 5; i++) { - int32 signal_length = 4000000 + Rand() % 400000; - int32 filter_length = 10000 + Rand() % 1000; + int32 signal_length = 400000 + Rand() % 40000; + int32 filter_length = 1000 + Rand() % 100; Vector signal(signal_length); Vector filter(filter_length); signal.SetRandn(); @@ -38,17 +38,17 @@ void UnitTestBlockConvolution() { } } -void UnitTestConvolution() { +void UnitTestFFTbasedConvolution() { for (int32 i = 0; i < 5; i++) { - int32 signal_length = 40000 + Rand() % 4000; - int32 filter_length = 100 + Rand() % 100; + int32 signal_length = 4000 + Rand() % 400; + int32 filter_length = 10 + Rand() % 10; Vector signal(signal_length); Vector filter(filter_length); signal.SetRandn(); filter.SetRandn(); Vector signal_test(signal); ConvolveSignals(filter, &signal_test); - FFTbasedBlockConvolveSignals(filter, &signal); + FFTbasedConvolveSignals(filter, &signal); AssertEqual(signal, signal_test, 0.0001 * signal.Dim()); } } @@ -56,8 +56,8 @@ void UnitTestConvolution() { int main() { using namespace kaldi; - UnitTestBlockConvolution(); - UnitTestConvolution(); + UnitTestFFTbasedConvolution(); + UnitTestFFTbasedBlockConvolution(); KALDI_LOG << "Tests succeeded."; } diff --git a/src/feat/sinusoid-detection-test.cc b/src/feat/sinusoid-detection-test.cc deleted file mode 100644 index 68148b44ccf..00000000000 --- a/src/feat/sinusoid-detection-test.cc +++ /dev/null @@ -1,452 +0,0 @@ -// feat/sinusoid-detection-test.cc - -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include - -#include "base/kaldi-math.h" -#include "feat/sinusoid-detection.h" - - -namespace kaldi { - -// this function is used for testing AddSinusoid. -void AddSinusoidSimple(BaseFloat samp_freq, - const Sinusoid &sinusoid, - VectorBase *signal) { - for (int32 i = 0; i < signal->Dim(); i++) - (*signal)(i) += sinusoid.amplitude * - cos(M_2PI * sinusoid.freq / samp_freq * i + sinusoid.phase); -} - -void UnitTestAddSinusoid() { - BaseFloat samp_freq = 560.1; - int32 length = 511; - Vector orig(length); - orig.SetRandn(); - Vector orig2(orig); - Sinusoid sinusoid(49.20, 2.111, 1.5); - - AddSinusoid(samp_freq, sinusoid, &orig); - AddSinusoidSimple(samp_freq, sinusoid, &orig2); - AssertEqual(orig, orig2); -} - - - -void UnitTestQuadraticMaximizeEqualSpaced() { - for (int32 n = 0; n < 50; n++) { - - // Let the cubic function be y = a x^2 + b x + c, and let - // y0,y1,y2 be its values evaluated at x = [0, 1, 2]; we - // want it evaluated at arbitrary x. - - BaseFloat a = -0.5 + RandUniform(), b = -0.5 + RandUniform(), c = -0.5 + RandUniform(); - BaseFloat y[3]; - for (int32 i = 0; i < 3; i++) { - BaseFloat x = i; - y[i] = a * x * x + b * x + c; - } - BaseFloat x_max, y_max; - SinusoidDetector::QuadraticMaximizeEqualSpaced(y[0], y[1], y[2], &x_max, &y_max); - - for (int32 m = 0; m <= 10; m++) { - BaseFloat x_test = 0.1 * m; - BaseFloat y_test = a * x_test * x_test + b * x_test + c; - KALDI_ASSERT(y_test <= y_max + 1.0e-05); - } - } -} - -void UnitTestQuadraticMaximize() { - for (int32 n = 0; n < 50; n++) { - - // Let the cubic function be y = a x^2 + b x + c, and let - // y0,y1,y2 be its values evaluated at x = [0, 1, 2]; we - // want it evaluated at arbitrary x. - - BaseFloat a = -0.5 + RandUniform(), b = -0.5 + RandUniform(), c = -0.5 + RandUniform(), - x = 0.1 + RandUniform() * 0.98; - BaseFloat y[3]; - for (int32 i = 0; i < 3; i++) { - BaseFloat this_x; - if (i == 0) { this_x = 0.0; } - else if (i == 1) { this_x = x; } - else { this_x = 1.0; } - y[i] = a * this_x * this_x + b * this_x + c; - } - BaseFloat x_max, y_max; - SinusoidDetector::QuadraticMaximize(x, y[0], y[1], y[2], &x_max, &y_max); - - for (int32 m = 0; m <= 10; m++) { - BaseFloat x_test = 0.1 * m; - BaseFloat y_test = a * x_test * x_test + b * x_test + c; - if (n < 100 && m == 5) { - KALDI_VLOG(2) << "Checking y_test <= y_max: " - << y_test << " <= " << y_max << " [x_max = " - << x_max << "]"; - KALDI_ASSERT(y_test <= y_max + 1.0e-05); - } - } - } -} - - -void UnitTestSinusoidDetector() { - BaseFloat samp_freq = 4000 + (rand() % 2000); - int32 num_samp = 128 + rand() % 400; - SinusoidDetector detector(samp_freq, num_samp); - - for (int32 i = 0; i < 40; i++) { - - Vector signal(num_samp); - - // Sinusoid ref_sinusoid(1.3, 312.5, M_PI * 0.0); - // Sinusoid ref_sinusoid(1.3, 324.125, M_PI * 0.5); - - BaseFloat nyquist = samp_freq * 0.5; - BaseFloat freq = nyquist * RandUniform(); - BaseFloat amplitude = RandUniform(); - BaseFloat phase = M_2PI * RandUniform(); - - Sinusoid ref_sinusoid(amplitude, freq, phase); - - AddSinusoid(samp_freq, ref_sinusoid, &signal); - - - BaseFloat orig_energy = VecVec(signal, signal); - KALDI_LOG << "Real frequency is " << freq << ", amplitude " - << amplitude << ", phase " << phase << ", samp-freq " - << samp_freq; - KALDI_LOG << "Total energy of signal (with sinusoid) is " << orig_energy; - - Sinusoid sinusoid; - BaseFloat min_energy = 0.0; - BaseFloat energy = detector.DetectSinusoid(min_energy, - signal, &sinusoid); - - Vector new_signal(signal); - sinusoid.phase += M_PI; // Reverse the phase. - AddSinusoid(samp_freq, sinusoid, &new_signal); - BaseFloat delta_energy = VecVec(signal, signal) - - VecVec(new_signal, new_signal); - KALDI_LOG << "Projected delta energy = " << energy - << " and observed was " << delta_energy; - - BaseFloat remaining_energy = VecVec(new_signal, new_signal); - if (remaining_energy > 0.01 * orig_energy) { - KALDI_WARN << "Energy remaining is " << remaining_energy - << " vs. original " << orig_energy; - BaseFloat relative_freq = freq / nyquist; - BaseFloat inv_num_samp = 1.0 / num_samp; - // We only tolerate this kind of error for very ridiculous frequency, - // close to zero or the Nyquist. - KALDI_ASSERT(relative_freq < inv_num_samp || - relative_freq > 1.0 - inv_num_samp); - } - } -} - -// as UnitTestSinusoidDetector(), but doing it in noisy signals. -void UnitTestSinusoidDetectorNoisy() { - BaseFloat samp_freq = 4000 + (rand() % 2000); - int32 num_samp = 128 + rand() % 400; - SinusoidDetector detector(samp_freq, num_samp); - - for (int32 i = 0; i < 40; i++) { - - Vector signal(num_samp); - - signal.SetRandn(); - - BaseFloat rand_energy = VecVec(signal, signal); - - // Sinusoid ref_sinusoid(1.3, 312.5, M_PI * 0.0); - // Sinusoid ref_sinusoid(1.3, 324.125, M_PI * 0.5); - - BaseFloat nyquist = samp_freq * 0.5; - BaseFloat freq = nyquist * RandUniform(); - BaseFloat amplitude = 10.0 * RandUniform(); - BaseFloat phase = M_2PI * RandUniform(); - - Sinusoid ref_sinusoid(amplitude, freq, phase); - - AddSinusoid(samp_freq, ref_sinusoid, &signal); - - BaseFloat tot_energy = VecVec(signal, signal); - - KALDI_LOG << "Real frequency is " << freq << ", amplitude " - << amplitude << ", phase " << phase << ", samp-freq " - << samp_freq; - KALDI_LOG << "Total energy of signal (with noise + sinusoid) is " << tot_energy; - - Sinusoid sinusoid; - BaseFloat min_energy = 0.0; - BaseFloat energy = detector.DetectSinusoid(min_energy, - signal, &sinusoid); - - Vector new_signal(signal); - sinusoid.phase += M_PI; // reverse the phase. - AddSinusoid(samp_freq, sinusoid, &new_signal); - BaseFloat delta_energy = VecVec(signal, signal) - - VecVec(new_signal, new_signal); - KALDI_LOG << "Projected delta energy = " << energy - << " and observed was " << delta_energy; - - BaseFloat min_energy_diff = 0.99 * (tot_energy - rand_energy); - - if (delta_energy < min_energy_diff) { - KALDI_WARN << "Energy reduction is " << delta_energy - << " vs. expected " << (tot_energy - rand_energy); - BaseFloat relative_freq = freq / nyquist; - BaseFloat inv_num_samp = 1.0 / num_samp; - // We only tolerate this kind of error for very ridiculous frequency, - // close to zero or the Nyquist. - KALDI_ASSERT(relative_freq < inv_num_samp || - relative_freq > 1.0 - inv_num_samp); - } - } -} - - -void AddFreqToSignal(BaseFloat base_freq, - BaseFloat samp_freq, - BaseFloat tolerance, - BaseFloat gain, - VectorBase *signal) { - BaseFloat error_scale = (2 * RandUniform() - 1) * tolerance; - BaseFloat freq = base_freq * (1.0 + error_scale); - KALDI_VLOG(3) << "base-freq = " << base_freq << ", factor = " << error_scale; - for (int32 i = 0; i < signal->Dim(); i++) - (*signal)(i) += gain * sin(i * 2.0 * 3.14159 * freq / samp_freq); -} - - -void GenerateDtmfTestCase( - BaseFloat sampling_rate, - Vector *signal, - std::vector *ref_output) { - // the "ref_output" should correlate with the first of each run of frames with the same label. - - BaseFloat min_duration_secs = 0.04; // min duration of dtmf or non-tone segments. - BaseFloat min_dialtone_duration_secs = 0.1; - BaseFloat frequency_tolerance = 0.035; - BaseFloat dialtone_frequency_tolerance = 0.4 * (440.0 - 425.0) / 440.0; - - int32 num_events = 2 * (5 + rand() % 5) + 1; // odd number. - int32 tot_signal_dim = 0; - - ref_output->resize(num_events); - std::vector > all_signals(num_events); - for (int32 i = 0; i < num_events; i++) { - MultiSinusoidDetectorOutput &this_output = (*ref_output)[i]; - Vector &this_signal = all_signals[i]; - BaseFloat duration_secs = min_duration_secs * (1 + rand() % 3); - int32 num_samp = sampling_rate * duration_secs; - tot_signal_dim += num_samp; - - this_signal.Resize(num_samp); - this_signal.SetRandn(); - - if (i % 2 == 0); // do nothing; - else if (rand() % 2 == 0 && duration_secs >= min_dialtone_duration_secs) { - // dialtone. - BaseFloat freq; - if (rand() % 3 == 0) { freq = 350; } - else if (rand() % 2 == 0) { freq = 440; } - else { freq = 425; } - BaseFloat gain = 10.0 * (1.0 + rand() % 2); - AddFreqToSignal(freq, sampling_rate, dialtone_frequency_tolerance, - gain, &(this_signal)); - this_output.freq1 = freq; - } else { - // dtmf. use a subset of tones as examples. - BaseFloat freq1, freq2; - char c; - if (rand() % 4 == 0) { - c = '8'; freq1 = 852; freq2 = 1336; - } else if (rand() % 3 == 0) { - c = '0'; freq1 = 941; freq2 = 1336; - } else if (rand() % 2 == 0) { - c = '#'; freq1 = 941; freq2 = 1477; - } else { - c = '1'; freq1 = 697; freq2 = 1209; - } - BaseFloat base_gain = 10.0 * (1.0 + (rand() % 3)), - gain_factor = 1.0 + 0.1 * (-2 + rand() % 5), - gain1 = base_gain, gain2 = gain_factor * base_gain; - AddFreqToSignal(freq1, sampling_rate, frequency_tolerance, gain1, - &(this_signal)); - AddFreqToSignal(freq2, sampling_rate, frequency_tolerance, gain2, - &(this_signal)); - this_output.freq1 = freq1; - this_output.freq2 = freq2; - } - } - signal->Resize(tot_signal_dim); - int32 signal_offset = 0; - for (int32 i = 0; i < num_events; i++) { - int32 this_dim = all_signals[i].Dim(); - signal->Range(signal_offset, this_dim).CopyFromVec(all_signals[i]); - signal_offset += this_dim; - } -} - - -/* - -// Just a basic test to check that it produces output. - -void UnitTestToneDetection() { - BaseFloat samp_freq = (rand() % 2) == 0 ? 8000 : 16000; - ToneDetectionConfig config; - - int32 num_frames = 100 + (rand() % 100); - int32 frame_length = static_cast(samp_freq * config.frame_length_secs); - - int32 num_samples = frame_length * num_frames + rand() % frame_length; - Vector signal(num_samples); - signal.SetRandn(); - - ToneDetector tone_detector(config, samp_freq); - - int32 signal_offset = 0; - - std::vector tone_detector_output; - - while (signal_offset < num_samples) { - int32 signal_remaining = num_samples - signal_offset, - chunk_size = std::min((rand() % 200) + 100, - signal_remaining); - SubVector signal_part(signal, signal_offset, chunk_size); - tone_detector.AcceptWaveform(signal_part); - signal_offset += chunk_size; - - if (signal_offset == num_samples) - tone_detector.WaveformFinished(); - while (!tone_detector.Done() && - (rand() % 2 == 0 || signal_offset == num_samples)) { - ToneDetectorOutput *output = new ToneDetectorOutput(); - tone_detector.GetNextFrame(output); - tone_detector_output.push_back(output); - } - } - KALDI_ASSERT(signal_offset == num_samples); - - Vector signal2(signal.Dim()); - signal_offset = 0; - for (int32 i = 0; i < tone_detector_output.size(); i++) { - ToneDetectorOutput *output = tone_detector_output[i]; - signal2.Range(signal_offset, - output->signal.Dim()).CopyFromVec(output->signal); - signal_offset += output->signal.Dim(); - if (output->frame_type != 'n') { - KALDI_ERR << "Frame " << i << " badly classified, should be 'n', is: " - << output->frame_type; - } - delete output; - } - KALDI_ASSERT(signal_offset == num_samples && - signal.ApproxEqual(signal2, 1.0e-10)); - -} - -std::ostringstream & operator << (std::ostringstream &ostr, - const ToneDetectorOutput &output) { - ostr << output.frame_type; - if (output.frame_type == 'd') - ostr << output.dialtone_freq; - ostr << ' '; - return ostr; -} - -*/ - - -// This version of the unit-test generates a signal that has tones in it, and -// runs the detection on that signal. -void UnitTestToneDetection2() { - BaseFloat samp_freq = (rand() % 2) == 0 ? 8000 : 16000; - Vector signal; - std::vector ref_output; - GenerateDtmfTestCase(samp_freq, &signal, &ref_output); - - MultiSinusoidDetectorConfig config; - - int32 num_samples = signal.Dim(); - KALDI_ASSERT(num_samples > 0); - - MultiSinusoidDetector multi_sinusoid_detector(config, samp_freq); - - int32 signal_offset = 0; - - std::vector multi_sinusoid_detector_output; - - while (signal_offset < num_samples) { - int32 signal_remaining = num_samples - signal_offset, - chunk_size = std::min((rand() % 200) + 100, - signal_remaining); - SubVector signal_part(signal, signal_offset, chunk_size); - multi_sinusoid_detector.AcceptWaveform(signal_part); - signal_offset += chunk_size; - - if (signal_offset == num_samples) - multi_sinusoid_detector.WaveformFinished(); - while (!multi_sinusoid_detector.Done() && - (rand() % 2 == 0 || signal_offset == num_samples)) { - MultiSinusoidDetectorOutput *output = new MultiSinusoidDetectorOutput(); - multi_sinusoid_detector.GetNextFrame(output); - multi_sinusoid_detector_output.push_back(output); - } - } - KALDI_ASSERT(signal_offset == num_samples); - - // std::ostringstream str_ref, str_hyp; - //for (size_t i = 0; i < ref_output.size(); i++) - // str_ref << ref_output[i]; - - - for (size_t i = 0; i < multi_sinusoid_detector_output.size(); i++) { - MultiSinusoidDetectorOutput *output = multi_sinusoid_detector_output[i]; - KALDI_LOG << "tot-energy = " << output->tot_energy - << ", freq1 " << output->freq1 << ", energy1 " << output->energy1 - << ", freq2 " << output->freq2 << ", energy2 " << output->energy2; - delete output; - } -} - - - -} // namespace kaldi - -int main() { - using namespace kaldi; - - SetVerboseLevel(4); - - UnitTestToneDetection2(); - UnitTestAddSinusoid(); - UnitTestQuadraticMaximizeEqualSpaced(); - UnitTestQuadraticMaximize(); - for (int32 i = 0; i < 10; i++) { - UnitTestSinusoidDetector(); - UnitTestSinusoidDetectorNoisy(); - } - -} diff --git a/src/feat/sinusoid-detection.cc b/src/feat/sinusoid-detection.cc deleted file mode 100644 index bf6b0b9e4fe..00000000000 --- a/src/feat/sinusoid-detection.cc +++ /dev/null @@ -1,945 +0,0 @@ -// feat/sinusoid-detection.cc - -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "feat/sinusoid-detection.h" -#include "matrix/matrix-functions.h" -#include "feat/resample.h" - -namespace kaldi { - - - -// This function adds the given sinusoid to the signal, as: -// (*signal)(t) += amplitude * cos(2 pi freq/samp_freq t + phase). -void AddSinusoid(BaseFloat samp_freq, - const Sinusoid &sinusoid, - VectorBase *signal) { - // treat "factor" as a complex variable equal to exp(i * 2 pi freq / samp_freq); it's - // the factor by which we multiply on each frame. - BaseFloat factor_real = cos(M_2PI * sinusoid.freq / samp_freq), - factor_im = sin(M_2PI * sinusoid.freq / samp_freq); - BaseFloat *signal_data = signal->Data(); - int32 dim = signal->Dim(), batch_size = 100; - // process frames in batches of size "batch_size", after which we recompute - // the starting point to prevent loss of accuracy due to drift. - for (int32 b = 0; b * batch_size < dim; b++) { - int32 t_offset = b * batch_size, - t_end = std::min(dim, t_offset + batch_size); - double phase = sinusoid.phase + M_2PI * t_offset * sinusoid.freq / samp_freq; - // treat x as a complex variable which initially is equal to amplitude * exp(i * phase), - // but which gets multiplied by "factor" on each frame. - BaseFloat x_real = sinusoid.amplitude * cos(phase), - x_im = sinusoid.amplitude * sin(phase); - for (int32 t = t_offset; t < t_end; t++) { - signal_data[t] += x_real; - ComplexMul(factor_real, factor_im, &x_real, &x_im); // x *= factor. - } - } -} - - -// static -void SinusoidDetector::QuadraticMaximizeEqualSpaced( - BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat *x_max, BaseFloat *y_max) { - // Let the function be y = a x^2 + b x + c, and - // suppose we have the values of y(0), y(1) and y(2). - // We have y0 = c, y1 = a + b + c, and y2 = 4a + 2b + c, - // so c = y0. - // Also, y2 - 2 y1 = 2a - c, so - // a = (y2 - 2 y1 + c) / 2, and - // b = y1 - a - c. - BaseFloat c = y0, a = y2 - 2 * y1 + c, b = y1 - a - c; - if (a >= 0) { - // The maximum of the function will occur at one of the end points. - if (y0 > y2) { - *x_max = 0; - *y_max = y0; - } else { - *x_max = 2; - *y_max = y2; - } - } else { - // derivative y' = 2a x + b. y' = 0 at x = -b / 2 a. - BaseFloat x = -b / (2.0 * a); - if (x <= 0.0) { - *x_max = 0; - *y_max = y0; - } else if (x >= 2.0) { - *x_max = 0; - *y_max = y2; - } else { - *x_max = x; - *y_max = a * x * x + b * x + c; - } - } -} - -// static -void SinusoidDetector::QuadraticMaximize( - BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat *x_max, BaseFloat *y_max) { - // Let the function be y = a x^2 + b x + c, and - // suppose we have the values of y(0), y(x1) and y(1), - // where 0 < x1 < 1. - // We have y0 = c, y1 = x1^2 a + x1 b + c, and y2 = a + b + c, - // so c = y0. - // Also, x1.y2 - y1 = a (x1 - x1^2) + (x1 - 1) c, so - // a = ( (x1 y2 - y1) - (x1 - 1) c) / (x1 - x1^2), and - // b = y2 - a - c. - BaseFloat c = y0, - a = (x1 * y2 - y1 - (x1 - 1.0) * c) / (x1 - x1*x1), - b = y2 - a - c; - - // TODO: remove these lines. - AssertEqual(y1, a * x1 * x1 + b * x1 + c); - AssertEqual(y2, a + b + c); - - if (a >= 0) { - // The maximum of the function will occur at one of the end points. - if (y0 > y2) { - *x_max = 0; - *y_max = y0; - } else { - *x_max = 1.0; - *y_max = y2; - } - } else { - // derivative y' = 2a x + b. y' = 0 at x = -b / 2 a. - BaseFloat x = -b / (2.0 * a); - if (x <= 0.0) { - *x_max = 0.0; - *y_max = y0; - } else if (x >= 1.0) { - *x_max = 1.0; - *y_max = y2; - } else { - *x_max = x; - *y_max = a * x * x + b * x + c; - } - } -} - -//static -BaseFloat SinusoidDetector::QuadraticInterpolate( - BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat x) { - // Let the function be y = a x^2 + b x + c, and - // suppose we have the values of y(0), y(x1) and y(1), - // where 0 < x1 < 1. - // We have y0 = c, y1 = x1^2 a + x1 b + c, and y2 = a + b + c, - // so c = y0. - // Also, x1.y2 - y1 = a (x1 - x1^2) + (x1 - 1) c, so - // a = ( (x1 y2 - y1) - (x1 - 1) c) / (x1 - x1^2), and - // b = y2 - a - c. - KALDI_ASSERT(x1 >= 0.0 && x1 <= 1.0); - if (x1 == 0.0) return y0; - else if (x1 == 1.0) return y2; - - BaseFloat c = y0, - a = (x1 * y2 - y1 - (x1 - 1.0) * c) / (x1 - x1*x1), - b = y2 - a - c; - return a * x * x + b * x + c; -} - -// This function does -// (*cos)(t) = cos(2 pi t freq / samp_freq) -// (*sin)(t) = sin(2 pi t freq / samp_freq) -//static -void SinusoidDetector::CreateCosAndSin(BaseFloat samp_freq, - BaseFloat freq, - VectorBase *cos_vec, - VectorBase *sin_vec) { - int32 dim = cos_vec->Dim(), batch_size = 100; - KALDI_ASSERT(dim == sin_vec->Dim()); - BaseFloat *cos_data = cos_vec->Data(), *sin_data = sin_vec->Data(); - BaseFloat factor_real = cos(M_2PI * freq / samp_freq), - factor_im = sin(M_2PI * freq / samp_freq); - - // process frames in batches of size "batch_size", after which we recompute - // the starting point to prevent loss of accuracy due to drift. - for (int32 b = 0; b * batch_size < dim; b++) { - int32 t_offset = b * batch_size, - t_end = std::min(dim, t_offset + batch_size); - double phase = M_2PI * t_offset * freq / samp_freq; - // treat x as a complex variable which initially is equal to amplitude * exp(i * phase), - // but which gets multiplied by "factor" on each frame. - BaseFloat x_real = cos(phase), x_im = sin(phase); - for (int32 t = t_offset; t < t_end; t++) { - cos_data[t] = x_real; - sin_data[t] = x_im; - ComplexMul(factor_real, factor_im, &x_real, &x_im); // x *= factor. - } - } -} - -SinusoidDetector::SinusoidDetector(BaseFloat samp_freq, - int32 num_samp): - samp_freq_(samp_freq), - num_samples_(num_samp), - num_samples_padded_(RoundUpToNearestPowerOfTwo(num_samp)), - fft_(num_samples_padded_), - factor1_(3.1), - factor2_(1.42) { - ComputeCoefficients(); -} - -void SinusoidDetector::SelfTest( - const VectorBase &signal, - const std::vector &info, - BaseFloat final_freq, - BaseFloat final_energy) { - int32 num_bins = num_samples_padded_ * 2 + 1; - - - { - BaseFloat cutoff = 0.0; - for (int32 k = 0; k <= num_bins; k += 4) - cutoff = std::max(cutoff, info[k].energy); - BaseFloat energy_upper_bound = factor1_ * cutoff; - if (final_energy > energy_upper_bound) { - KALDI_WARN << "Self-testing failed [factor1]: " - << final_energy << " > " << energy_upper_bound - << ", num-samples is " << num_samples_ - << ", freq/nyquist = " - << (final_freq / (samp_freq_ * 0.5)) - << "- would require factor1 >= " - << (final_energy / cutoff); - } - } - { - BaseFloat cutoff = 0.0; - for (int32 k = 0; k <= num_bins; k += 2) - if (info[k].valid) - cutoff = std::max(cutoff, info[k].energy); - BaseFloat energy_upper_bound = factor2_ * cutoff; - if (final_energy > energy_upper_bound) { - KALDI_WARN << "Self-testing failed [factor2]: " - << final_energy << " > " << energy_upper_bound - << ", num-samples is " << num_samples_ - << ", freq/nyquist = " - << (final_freq / (samp_freq_ * 0.5)) - << "- would require factor2 >= " - << (final_energy / cutoff); - - } - } - -} - - -BaseFloat SinusoidDetector::OptimizeFrequency( - const std::vector &info, - int32 *bin_out, - BaseFloat *offset_out) const { - - BaseFloat max_energy = 0.0; - *bin_out = -1; - int32 max_freq = num_samples_padded_ * 2; - - // For each bin, we consider the frequency range [bin, bin+1, bin+2], - // and if we have info for all those bins, do a quadratic interpolation to - // find the maximum within the range. - for (int32 bin = 0; bin + 2 <= max_freq; bin++) { - if (info[bin].valid && info[bin+1].valid && info[bin+2].valid) { - // First handle the left side of the bin. - BaseFloat best_x, best_y; - QuadraticMaximizeEqualSpaced(info[bin].energy, info[bin+1].energy, - info[bin+2].energy, &best_x, &best_y); - if (best_y > max_energy) { - max_energy = best_y; - if (best_x <= 1.0) { - *bin_out = bin; - *offset_out = best_x; - } else { - *bin_out = bin + 1; - *offset_out = best_x - 1; - } - } - } - } - return max_energy; -} - - -BaseFloat SinusoidDetector::DetectSinusoid( - BaseFloat min_energy, - const VectorBase &signal, - Sinusoid *sinusoid) { - if (signal(0) == 0.0 && signal.Norm(2.0) == 0.0) - return 0.0; - KALDI_ASSERT(signal.Dim() == num_samples_); - Vector fft(num_samples_padded_); - fft.Range(0, num_samples_).CopyFromVec(signal); - bool forward = true; - fft_.Compute(fft.Data(), forward); - - std::vector info; - ComputeCoarseInfo(fft, &info); - // we now have info for the "coarse" bins. - - // each element b of "bins" will be a multiple of 4: it's possible - // that the best frequency is in the range [b, b+4] - std::vector bins; - FindCandidateBins(min_energy, info, &bins); - - if (bins.empty()) - return 0.0; // not enough energy in signal. - - for (size_t i = 0; i < bins.size(); i++) { - int32 bin = bins[i]; - ComputeBinInfo(signal, bin, &(info[bin])); - } - - std::vector bins2; - FindCandidateBins2(min_energy, info, &bins2); - - for (size_t i = 0; i < bins2.size(); i++) { - int32 bin = bins2[i]; - ComputeBinInfo(signal, bin, &(info[bin])); - } - - // compute energy for the predicted-optimum point, which will usually be - // between bins, with an offset. - int32 bin; - BaseFloat offset; - - BaseFloat opt_energy = OptimizeFrequency(info, &bin, &offset); - - if (opt_energy == 0.0) - return 0.0; - - BaseFloat max_freq = (bin + offset) * samp_freq_ / (num_samples_padded_ * 4); - - KALDI_VLOG(4) << "Best frequency based on interpolation is " - << max_freq << ", best energy is " - << opt_energy << ", bin is " << bin; - - OptimizedInfo final_info; - - FineOptimizeFrequency(signal, bin, offset, &info, &final_info); - - // the following while loop will rarely be accessed. - while (final_info.offset == 0.0 && bin > 0) { - bin--; - FineOptimizeFrequency(signal, bin, 1.0, &info, &final_info); - } - - // the following while loop will rarely be accessed. - while (final_info.offset == 1.0 && bin < num_samples_padded_ * 2) { - bin++; - FineOptimizeFrequency(signal, bin, 0.0, &info, &final_info); - } - - if (bin <= 1 || bin >= num_samples_padded_ * 2 - 2) { - // If we're in the lowest or next-to-lowest bin, or the highest or - // next-to-highest allowed bin (note, "bin" here is a range, and it can - // never have the value num_samples_padded_ * 2), we tend to get more - // estimation error than usual, so do another round of optimization. - FineOptimizeFrequency(signal, bin, final_info.offset, &info, &final_info); - } - - BaseFloat final_freq = (final_info.bin + final_info.offset) * samp_freq_ / (num_samples_padded_ * 4); - KALDI_VLOG(4) << "Final optimized info is: freq " << final_freq - << ", cos coeff " << final_info.cos_coeff << ", sin coeff " - << final_info.sin_coeff << ", energy " << final_info.energy; - - if (GetVerboseLevel() > 1) - SelfTest(signal, info, final_freq, final_info.energy); - - if (final_info.energy >= min_energy) { - sinusoid->amplitude = std::sqrt(final_info.cos_coeff * final_info.cos_coeff - + final_info.sin_coeff * final_info.sin_coeff); - sinusoid->freq = final_freq; - sinusoid->phase = -std::atan2(final_info.sin_coeff, final_info.cos_coeff); - KALDI_VLOG(4) << "Phase is " << sinusoid->phase << ", amplitude is " - << sinusoid->amplitude << ", freq is " << sinusoid->freq; - return final_info.energy; - } else { - return 0.0; - } -} - - -/* - This function computes, the original FFT bins, the amount of energy in - the signal that can be explained by a sinusoid at the corresponding frequency. - - Let f be the continuous-valued frequency. - - Define the vector C_f as - C_f = [ c_0, c_1 ... c_n ] where c_k = cos(2 pi k f / samp_freq). [obviously this notation depends on f]. - and S_f the same thing with sin in place of cos. - - Let the signal, as a vector, be V. - We want to maximize the (positive) energy-difference: - ||V||^2 - || V - c C_f - s S_f ||^2 - where c and s are the coefficients of C_f and S_f. - This quantity can be expanded as follows, where . means dot product. - \delta E = -c^2 C_f.C_f - s^2 S_f.S_f - 2 c s C_f.S_f + 2 c V.C_f + 2 s V.S_f. - which can be written as follows, where . means dot-product and ' means transpose: - \delta E = 2 [c s] v - [c s] M [c s]' - where M = [ C_f.C_f, C_f.S_f, C_f.S_f, S_f.S_f ], - and v = [V.C_f, V.S_f]. - If M is invertible (i.e. for nonzero frequencies), this is maximized by - [c s] = M^-1 v - giving us the value. - \delta E = v' M^{-1} v. - We'll compute the inverse of M in advance, inside ComputeCoefficients(), using - the formula [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a] For zero frequency and at the - Nyquist, M has the value [ a 0; 0 0 ], and we have the same type of expression - limited to the first dim of v, i.e. Minv = [ a^{-1} 0; 0 0 ], a kind of pseudo-inverse. - */ - -void SinusoidDetector::ComputeCoarseInfo( - const Vector &fft, - std::vector *info) const { - info->resize(num_samples_padded_ * 2 + 1); // 4 times resolution of FFT itself. - - const BaseFloat *fft_data = fft.Data(); - - int32 num_bins = num_samples_padded_ / 2 + 1; - for (int32 k = 0; k < num_bins; k++) { - BaseFloat real, im; - if (k == 0) { - real = fft_data[0]; - im = 0.0; - } else if (k == num_samples_padded_ / 2) { - real = fft_data[1]; - im = 0.0; - } else { - real = fft_data[k * 2]; - im = fft_data[k * 2 + 1]; - } - // v1 and v2 are the two components of the vector v in the math above. - BaseFloat v1 = real, v2 = -im; - // Minv_'s row indexes correspond to frequencies with 4 times more - // resolution than the FFT bins. - const BaseFloat *Minv_data = Minv_.RowData(k * 4); - // The Matrix M^{-1} is of the form [a b; b d] - BaseFloat a = Minv_data[0], b = Minv_data[1], d = Minv_data[2]; - // compute \delta E = v' M^{-1} v. - BaseFloat delta_e = v1 * v1 * a + v2 * v2 * d + 2 * v1 * v2 * b; - InfoForBin &this_info = (*info)[k * 4]; - this_info.valid = true; - this_info.cos_dot = real; - this_info.sin_dot = -im; - this_info.energy = delta_e; - } -} - - -void SinusoidDetector::ComputeCoefficients() { - int32 num_samp = num_samples_; - int32 num_freq = num_samples_padded_ * 2 + 1; - cos_.Resize(num_freq, num_samp); - sin_.Resize(num_freq, num_samp); - - Vector cc(num_freq), cs(num_freq); - for (int32 k = 0; k < num_freq; k++) { - BaseFloat freq = k * samp_freq_ / (num_samples_padded_ * 4); - SubVector c(cos_, k), s(sin_, k); - CreateCosAndSin(samp_freq_, freq, &c, &s); - cc(k) = VecVec(c, c); - cs(k) = VecVec(c, s); - } - - M_.Resize(num_freq, 3, kUndefined); - Minv_.Resize(num_freq, 3, kUndefined); - - for (int32 k = 0; k < num_freq; k++) { - // Let the matrix M be [ a b; b d ]. [we don't write c because c == b]. - // We want to compute Minv_. - BaseFloat a = cc(k), b = cs(k), d = num_samples_ - a; - M_(k, 0) = a; - M_(k, 1) = b; - M_(k, 2) = d; - if (k == 0 || k == num_freq - 1) { - // this is a special case; it's not really the inverse of M but it will - - // give us the expression we want; it's like an inverse in just one dimension. - Minv_(k, 0) = 1.0 / a; - Minv_(k, 1) = 0.0; - Minv_(k, 2) = 0.0; - } else { - BaseFloat inv_det = 1.0 / (a * d - b * b); - // check for NaN and inf. - KALDI_ASSERT(inv_det == inv_det && inv_det - inv_det == 0.0); - // use: [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a], special case where c = b. - BaseFloat inv_a = d * inv_det, inv_b = -b * inv_det, inv_d = a * inv_det; - Minv_(k, 0) = inv_a; - Minv_(k, 1) = inv_b; - Minv_(k, 2) = inv_d; - } - } -} - - -// Does fine optimization of the frequency within this bin; returns the -// final energy, the optimized frequency, and the cos and sin coefficients. -void SinusoidDetector::FineOptimizeFrequency( - const VectorBase &signal, - int32 bin, - BaseFloat bin_offset, - std::vector *info_in, - OptimizedInfo *opt_info) const { - std::vector &info = *info_in; - if (!info[bin].valid) ComputeBinInfo(signal, bin, &(info[bin])); - if (!info[bin+1].valid) ComputeBinInfo(signal, bin+1, &(info[bin+1])); - - const BaseFloat epsilon = 0.02, delta = 0.001; - - // If the offset is very close to the edges of the bin, move it - // closer to the center. Otherwise we may have problems with the - // steps below. The initial offset is only used as a starting point - // anyway, so this won't affect the final value much. - if (bin_offset < epsilon) - bin_offset = epsilon; - if (bin_offset > 1.0 - epsilon) - bin_offset = 1.0 - epsilon; - KALDI_VLOG(4) << "Initial bin offset = " << bin_offset << ", bin = " << bin; - - // create cos and sin waves of the specified frequency. - BaseFloat freq = (bin + bin_offset) * samp_freq_ / (num_samples_padded_ * 4); - Vector c(num_samples_, kUndefined), s(num_samples_, kUndefined); - CreateCosAndSin(samp_freq_, freq, &c, &s); - - // these a, b and d values are the elements of the M matrix at this frequency - // "freq", i.e. the matrix M_f [ a b; b d ]. This will be invertible because - // we have ensured that the frequency is not too close to zero or the Nyquist. - BaseFloat a = VecVec(c, c), b = VecVec(c, s), d = num_samples_ - a; - BaseFloat inv_det = 1.0 / (a * d - b * b); - BaseFloat inv_a = d * inv_det, inv_b = -b * inv_det, inv_d = a * inv_det; - - - BaseFloat v1 = VecVec(c, signal), v2 = VecVec(s, signal); - - BaseFloat delta_e = v1 * v1 * inv_a + v2 * v2 * inv_d + 2 * v1 * v2 * inv_b; - - KALDI_VLOG(4) << "Actual energy-change at frequency " << freq << " is " - << delta_e; - // "freq" is frequency somewhere in the middle of the bin. - - BaseFloat final_offset, final_energy; - QuadraticMaximize(bin_offset, info[bin].energy, delta_e, info[bin+1].energy, - &final_offset, &final_energy); - - KALDI_VLOG(4) << "After further optimizing, offset was " << final_offset - << " giving freq " - << ((bin+final_offset) * samp_freq_ / (num_samples_padded_*4)) - << ", with energy " << final_energy; - - // Use interpolation (using a quadratic function) to get the entries of the M matrix - // the the final, tuned frequency. Interpolation on M is better than M^{-1}, as its - // elements are much better behaved as the frequency varies. - const BaseFloat *M_left_data = M_.RowData(bin), - *M_right_data = M_.RowData(bin + 1); - - BaseFloat a_interp = QuadraticInterpolate(bin_offset, M_left_data[0], a, M_right_data[0], - final_offset); - BaseFloat b_interp = QuadraticInterpolate(bin_offset, M_left_data[1], b, M_right_data[1], - final_offset); - BaseFloat d_interp = QuadraticInterpolate(bin_offset, M_left_data[2], d, M_right_data[2], - final_offset); - - // Now get the inverse of the M matrix at the final point. - BaseFloat a_inv_interp, b_inv_interp, d_inv_interp; - - if ((bin == 0 && final_offset < delta) || - (bin == num_samples_padded_ * 2 && final_offset > 1.0 - delta)) { - // If we're extremely close to zero or the Nyquist, we'll have trouble - // inverting M; just invert in the 1st dimension (only have a cos - // component). - a_inv_interp = 1.0 / a_interp; - b_inv_interp = 0.0; - d_inv_interp = 0.0; - } else { - BaseFloat inv_det = 1.0 / (a_interp * d_interp - b_interp * b_interp); - // check for NaN and inf. - KALDI_ASSERT(inv_det == inv_det && inv_det - inv_det == 0.0); - // use: [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a], special case where c = b. - a_inv_interp = d_interp * inv_det; - b_inv_interp = -b_interp * inv_det; - d_inv_interp = a_interp * inv_det; - } - - BaseFloat v1_interp = QuadraticInterpolate(bin_offset, info[bin].cos_dot, v1, - info[bin+1].cos_dot, final_offset); - BaseFloat v2_interp = QuadraticInterpolate(bin_offset, info[bin].sin_dot, v2, - info[bin+1].sin_dot, final_offset); - - opt_info->bin = bin; - opt_info->offset = final_offset; - // Recompute the energy-reduction using the more accurate interpolated values of - // v1 and v2 (the dot-products of the cos and sin with the signal), and - // of M. - opt_info->energy = v1_interp * v1_interp * a_inv_interp + - v2_interp * v2_interp * d_inv_interp + - 2 * v1_interp * v2_interp * b_inv_interp; - // Compute the coefficients of the cos and sin in the optimal sinusoid, as - // M^{-1} v. - opt_info->cos_coeff = a_inv_interp * v1_interp + b_inv_interp * v2_interp; - opt_info->sin_coeff = b_inv_interp * v1_interp + d_inv_interp * v2_interp; -} - -void SinusoidDetector::FindCandidateBins( - BaseFloat min_energy, - const std::vector &info, - std::vector *bins) const { - - int32 max_bin = num_samples_padded_ * 2; - - BaseFloat cutoff = min_energy; - for (int32 k = 0; k <= max_bin; k += 4) { - KALDI_ASSERT(info[k].valid); - cutoff = std::max(cutoff, info[k].energy); - } - - for (int32 k = 0; k < max_bin; k += 4) { - BaseFloat energy_upper_bound = - factor1_ * std::max(info[k].energy, - info[k+4].energy); - if (energy_upper_bound >= cutoff) - bins->push_back(k + 2); - } -} - - -void SinusoidDetector::FindCandidateBins2( - BaseFloat min_energy, - const std::vector &info, - std::vector *bins2) const { - - int32 max_bin = num_samples_padded_ * 2; - - BaseFloat cutoff = min_energy; - for (int32 k = 0; k <= max_bin; k += 2) { - if (info[k].valid) - cutoff = std::max(cutoff, info[k].energy); - } - - for (int32 k = 0; k < max_bin; k += 2) { - if (info[k].valid && info[k+2].valid) { - BaseFloat energy_upper_bound = - factor2_ * std::max(info[k].energy, - info[k+2].energy); - if (energy_upper_bound >= cutoff) - bins2->push_back(k + 1); - } - } -} - - -void SinusoidDetector::ComputeBinInfo( - const VectorBase &signal, - int32 bin, - InfoForBin *info) const { - KALDI_ASSERT(!info->valid); // or wasted time. - info->valid = true; - BaseFloat v1 = info->cos_dot = VecVec(cos_.Row(bin), signal); - BaseFloat v2 = info->sin_dot = VecVec(sin_.Row(bin), signal); - const BaseFloat *Minv_data = Minv_.RowData(bin); - BaseFloat a = Minv_data[0], b = Minv_data[1], d = Minv_data[2]; - // compute \delta E = v' M^{-1} v. - BaseFloat delta_e = v1 * v1 * a + v2 * v2 * d + 2 * v1 * v2 * b; - info->energy = delta_e; -} - - -MultiSinusoidDetector::MultiSinusoidDetector( - const MultiSinusoidDetectorConfig &config, - int32 sampling_freq): - config_(config), - sample_freq_(sampling_freq), - samples_per_frame_subsampled_(0.001 * config.frame_length_ms * - static_cast(config.subsample_freq)), - waveform_finished_(false), - samples_consumed_(0), - resampler_(sampling_freq, config.subsample_freq, - config.subsample_filter_cutoff, config.subsample_filter_zeros), - detector_(config.subsample_freq, samples_per_frame_subsampled_) { - config.Check(); -} - - -void MultiSinusoidDetector::Reset() { - waveform_finished_ = false; - samples_consumed_ = 0; - while(!subsampled_signal_.empty()) { - delete subsampled_signal_.front(); - subsampled_signal_.pop_front(); - } - resampler_.Reset(); -} - -void MultiSinusoidDetector::WaveformFinished() { - KALDI_ASSERT(!waveform_finished_ && - "WaveformFinished() called twice."); - - Vector empty_waveform; - subsampled_signal_.push_back(new Vector()); - bool flush = true; - resampler_.Resample(empty_waveform, flush, - subsampled_signal_.back()); - waveform_finished_ = true; - if (subsampled_signal_.back()->Dim() == 0) { - delete subsampled_signal_.back(); - subsampled_signal_.pop_back(); - } -} - -void MultiSinusoidDetector::AcceptWaveform( - const VectorBase &waveform) { - - - subsampled_signal_.push_back(new Vector()); - bool flush = false; - resampler_.Resample(waveform, flush, - subsampled_signal_.back()); - if (subsampled_signal_.back()->Dim() == 0) { - delete subsampled_signal_.back(); - subsampled_signal_.pop_back(); - } -} - -int32 MultiSinusoidDetector::NumSubsampledSamplesReady(int32 max_samp) const { - KALDI_ASSERT(samples_consumed_ >= 0 && - ((subsampled_signal_.empty() && samples_consumed_ == 0) || - (!subsampled_signal_.empty () && samples_consumed_ < - subsampled_signal_[0]->Dim()))); - - int32 ans = -samples_consumed_; - for (size_t i = 0; i < subsampled_signal_.size(); i++) { - ans += subsampled_signal_[i]->Dim(); - if (ans > max_samp) break; - } - KALDI_ASSERT(ans >= 0); - return std::min(ans, max_samp); -} - -bool MultiSinusoidDetector::Done() const { - int32 samp_ready = NumSubsampledSamplesReady(samples_per_frame_subsampled_); - if ((samp_ready >= samples_per_frame_subsampled_ && !waveform_finished_) || - (samp_ready > 0 && waveform_finished_)) - return false; - else - return true; -} - -void MultiSinusoidDetector::GetNextFrameOfSignal(Vector *frame) { - frame->Resize(samples_per_frame_subsampled_, kUndefined); - - int32 sample_offset = 0, - samples_needed = samples_per_frame_subsampled_; - while (samples_needed > 0 && - !subsampled_signal_.empty()) { - Vector *src = subsampled_signal_.front(); - int32 num_samples_avail = src->Dim() - samples_consumed_; - KALDI_ASSERT(num_samples_avail > 0); - int32 chunk_size = std::min(num_samples_avail, - samples_needed); - frame->Range(sample_offset, chunk_size).CopyFromVec( - src->Range(samples_consumed_, chunk_size)); - sample_offset += chunk_size; - samples_needed -= chunk_size; - samples_consumed_ += chunk_size; - if (samples_consumed_ == src->Dim()) { - samples_consumed_ = 0; - delete src; - subsampled_signal_.pop_front(); - } - } - if (samples_needed > 0) { - KALDI_ASSERT(waveform_finished_ && sample_offset > 0); // or code error. - frame->Range(sample_offset, samples_needed).SetZero(); - } -} - - -void MultiSinusoidDetector::GetNextFrame(MultiSinusoidDetectorOutput *output) { - Vector frame; - GetNextFrameOfSignal(&frame); - // Mean subtraction - frame.Add(-1.0 * frame.Sum() / frame.Dim()); - *output = MultiSinusoidDetectorOutput(); // reset to default. - - BaseFloat signal_energy = VecVec(frame, frame); - output->tot_energy = signal_energy / frame.Dim(); - if (signal_energy == 0.0) return; - - // min_energy1 is the lowest energy we might care about. - BaseFloat min_energy1 = signal_energy * - std::min(config_.two_freq_min_total_energy * 0.5, - config_.one_freq_min_energy); - - Sinusoid sinusoid1; - BaseFloat energy1 = detector_.DetectSinusoid(min_energy1, - frame, - &sinusoid1); - - if (energy1 == 0.0) return; // Nothing detected. - - // we only care about the 2nd sinusoid if - // energy1 + energy2 >= signal_energy * two_freq_min_total_energy, - // and energy2 >= signal_energy * config.two_freq_min_energy. - - BaseFloat min_energy2 = - std::max(signal_energy * config_.two_freq_min_energy, - signal_energy * config_.two_freq_min_total_energy - - energy1); - - BaseFloat energy2; - Sinusoid sinusoid2; - - // If there is enough energy left in the signal that we could - // possibly detect a sinusoid of energy at least min_energy2... - if (min_energy2 <= signal_energy - energy1) { - sinusoid1.phase += M_PI; // reverse the phase. - AddSinusoid(config_.subsample_freq, sinusoid1, &frame); - - - energy2 = detector_.DetectSinusoid(min_energy2, - frame, - &sinusoid2); - - if (energy2 > energy1) { - // The following is just for our information, so we are aware - // when the sinusoid detection gives us the non-optimal sinusoid - // first. - BaseFloat factor = energy2 / energy1; - KALDI_VLOG(2) << "Second sinusoid greater than first by a factor of " - << factor << ". (This means sinusoid detection is not " - << " working ideally)."; - } - - if (DetectedTwoFrequency(signal_energy, - sinusoid1, energy1, - sinusoid2, energy2, - output)) - return; - } else { - energy2 = 0.0; - } - // We don't need the return status of the following; we just return anyway. - DetectedOneFrequency(signal_energy, - sinusoid1, energy1, - sinusoid2, energy2, - output); -} - -// acceptable two-frequency tone. -bool MultiSinusoidDetector::DetectedTwoFrequency( - BaseFloat signal_energy, - const Sinusoid &sinusoid1, - BaseFloat energy1, - const Sinusoid &sinusoid2, - BaseFloat energy2, - MultiSinusoidDetectorOutput *output) { - - if (energy1 + energy2 >= signal_energy * - config_.two_freq_min_total_energy && - std::min(energy1, energy2) >= signal_energy * - config_.two_freq_min_energy && - std::min(sinusoid1.freq, sinusoid2.freq) >= config_.min_freq && - std::max(sinusoid1.freq, sinusoid2.freq) <= config_.max_freq) { - output->freq1 = sinusoid1.freq; - output->energy1 = energy1 / signal_energy; - output->freq2 = sinusoid2.freq; - output->energy2 = energy2 / signal_energy; - if (output->freq1 > output->freq2) { - std::swap(output->freq1, output->freq2); - std::swap(output->energy1, output->energy2); - } - return true; - } else { - return false; - } -} - - -// acceptable two-frequency tone. -bool MultiSinusoidDetector::DetectedOneFrequency( - BaseFloat signal_energy, - const Sinusoid &sinusoid1, - BaseFloat energy1, - const Sinusoid &sinusoid2, - BaseFloat energy2, - MultiSinusoidDetectorOutput *output) { - // If sinusoid detection were performing exactly to spec, we could assume - // energy1 >= energy2, but we don't assume this as it's not guaranteed. - if (energy1 > energy2 && energy1 > signal_energy * - config_.one_freq_min_energy && - sinusoid1.freq >= config_.min_freq && - sinusoid1.freq <= config_.max_freq) { - output->freq1 = sinusoid1.freq; - output->energy1 = energy1 / signal_energy; - output->freq2 = 0.0; - output->energy2 = 0.0; - return true; - } else if (energy2 > energy1 && energy2 > signal_energy * - config_.one_freq_min_energy && - sinusoid2.freq >= config_.min_freq && - sinusoid2.freq <= config_.max_freq) { - output->freq1 = sinusoid2.freq; - output->energy1 = energy2 / signal_energy; - output->freq2 = 0.0; - output->energy2 = 0.0; - return true; - } else { - return false; - } -} - - -void DetectSinusoids(const VectorBase &signal, - MultiSinusoidDetector *detector, - Matrix *output) { - std::vector output_vec; - detector->AcceptWaveform(signal); - detector->WaveformFinished(); - - int32 safety_margin = 10, approx_num_frames = safety_margin + - (signal.Dim() / (detector->SamplingFrequency() * - detector->FrameShiftSecs())); - output_vec.reserve(approx_num_frames); - while (!detector->Done()) { - output_vec.resize(output_vec.size() + 1); - detector->GetNextFrame(&(output_vec.back())); - } - detector->Reset(); - if (output_vec.empty()) { - output->Resize(0, 0); - } else { - output->Resize(output_vec.size(), 5, kUndefined); - for (int32 i = 0; i < output->NumRows(); i++) { - BaseFloat *row_data = output->RowData(i); - MultiSinusoidDetectorOutput &this_output = output_vec[i]; - row_data[0] = this_output.tot_energy; - row_data[1] = this_output.freq1; - row_data[2] = this_output.energy1; - row_data[3] = this_output.freq2; - row_data[4] = this_output.energy2; - } - } -} - - -} // namespace kaldi - diff --git a/src/feat/sinusoid-detection.h b/src/feat/sinusoid-detection.h deleted file mode 100644 index f6addc0b530..00000000000 --- a/src/feat/sinusoid-detection.h +++ /dev/null @@ -1,436 +0,0 @@ -// feat/sinusoid-detection.h - -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_FEAT_SINUSOID_DETECTION_H_ -#define KALDI_FEAT_SINUSOID_DETECTION_H_ - - -#include "base/kaldi-error.h" -#include "matrix/matrix-lib.h" -#include "util/common-utils.h" -#include "feat/resample.h" -#include - -namespace kaldi { -/// @addtogroup feat FeatureExtraction -/// @{ - - -struct Sinusoid { - // this structure used to represent a sinusoid of type amplitude cos (2 pi - // freq t + phase), in the SinusoidDetector code. - BaseFloat amplitude; - BaseFloat freq; - BaseFloat phase; - Sinusoid(BaseFloat a, BaseFloat f, BaseFloat p): - amplitude(a), freq(f), phase(p) { } - Sinusoid() {} -}; - - -// This function adds the given sinusoid to the signal, as: -// (*signal)(t) += amplitude * cos(2 pi freq/samp_freq t + phase). -void AddSinusoid(BaseFloat samp_freq, - const Sinusoid &sinusoid, - VectorBase *signal); - - -class SinusoidDetector { - public: - SinusoidDetector(BaseFloat samp_freq, - int32 num_samp); - - - // Detect the dominant sinusoid component in the signal, as long as the - // energy-reduction of the signal from subtracting that sinuoid would be >= - // "min_energy_change", and return that energy reduction; or zero if no - // candidate was found. - // non-const because the FFT class has a temporary buffer. - BaseFloat DetectSinusoid(BaseFloat min_energy_change, - const VectorBase &signal, - Sinusoid *sinusoid); - - // This function does quadratic interpolation for a function that is known at - // three equally spaced points [x0 x1 x2] = [0 1 2], and we want the x-value - // and corresponding y-value at the maximum of the function within the range - // 0 <= x <= 2. It's public for testing reasons. - static void QuadraticMaximizeEqualSpaced( - BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat *x, BaseFloat *y); - - - // This function does quadratic interpolation for a function that is known at - // three points x0, x1 and x2 with x0 = 0, 0 < x1 < 1 and x2 = 1, where we - // want the x-value and corresponding y-value at the maximum of the function - // within the range 0 <= x <= 1. It's public for testing reasons. - static void QuadraticMaximize( - BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat *x, BaseFloat *y); - - // This function does quadratic interpolation for a function that is known at - // three points x0, x1 and x2 with x0 = 0, 0 <= x1 <= 1 and x2 = 1, where - // we want the value at a specific value x. The corresponding y-value is returned. - static BaseFloat QuadraticInterpolate( - BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat x); - - - private: - BaseFloat samp_freq_; - int32 num_samples_; - int32 num_samples_padded_; // Number of samples, after zero-padding to power of 2. - SplitRadixRealFft fft_; // Object used to compute FFT of padded_signal_. - - BaseFloat factor1_; // When we search the range between two FFT bins, we - // assume that the maximum energy-reduction within the - // range may be greater than the maximum of the - // energy-reductions at either side, by at most - // "factor1", with factor1 > 1.0. The analysis is quite - // hard so we determine this factor empirically. Making - // this as small as possible helps us avoid searching too - // many bins. - - BaseFloat factor2_; // As factor1, but for searches within a half-fft-bin - // range. Again determined empirically. After that we - // use quadratic interpolation to find the maximum energy. - - // This matrix, of dimension (num_samples_padded_ * 2 + 1) by - // num_samples_, has in each row, a different frequency of cosine wave. - Matrix cos_; - // This matrix, of dimension (num_samples_padded_ * 2 + 1) by - // num_samples_, has in each row, a different frequency of sine wave. - Matrix sin_; - - // M_ is a precomputed matrix of dimension (num_samples_padded_ * 2 + 1) by 3, - // containing the values x y z of a symmetric matrix [ a b; b c ]. There is - // one of these matrices for each frequency, sampled at one quarter the - // spacing of the FFT bins. There is a long comment next to the definition of - // ComputeCoefficients that describes this. - Matrix M_; - - // Minv_ is the coefficients in the same format as M_, but containing the - // corresponding coefficients of the inverse matrix. There is a long comment - // next to the definition of ComputeCoefficients that describes this. - Matrix Minv_; - - - struct InfoForBin { - bool valid; - BaseFloat cos_dot; // dot product of signal with cosine on left frequency - BaseFloat sin_dot; // dot product of signal with sine on left frequency - BaseFloat energy; // energy. - InfoForBin(): valid(false) { } - }; - - // Info after fine optimization within a bin. - struct OptimizedInfo { - int32 bin; - BaseFloat offset; - BaseFloat energy; - BaseFloat cos_coeff; - BaseFloat sin_coeff; - }; - - // Compute the coefficients and energies at the original FFT bins (every - // fourth entry in "info"). - void ComputeCoarseInfo(const Vector &fft, - std::vector *info) const; - - - // After the coarse-level info is computed using ComputeCoarseInfo, finds a - // set of intermediate bin indexes to compute, that are the midpoints of - // coarse-level bins. - void FindCandidateBins(BaseFloat min_energy, - const std::vector &info, - std::vector *bins) const; - - void FindCandidateBins2(BaseFloat min_energy, - const std::vector &info, - std::vector *bins) const; - - - void ComputeBinInfo(const VectorBase &signal, - int32 bin, InfoForBin *info) const; - - - // For each bin b such that we have valid "info" data for bins b, b+1 and b+2, - // does quadratic interpolation to find the maximum predicted energy in the - // range [b, b+2]. The location of the maximum predicted energy is output to - // "bin_out" and "offset_out", and the corresponding predicted energy is - // returned. - // - // Note: if there are two different frequencies with similar maximum energies - // (e.g. within a factor of probably around 1.2 or so), the fact that - // OptimizeFrequency only returns one maximum may potentially lead to the - // smaller maximum being output. We could have modified this to output - // multiple different maxima, which could have been more accurate in terms of - // being guaranteed to output the best maximum, but this probably wouldn't - // have a measurable impact on our application so we haven't bothered. - BaseFloat OptimizeFrequency( - const std::vector &info, - int32 *bin_out, - BaseFloat *offset_out) const; - - - // This function does - // (*cos)(t) = cos(2 pi t freq / samp_freq) - // (*sin)(t) = sin(2 pi t freq / samp_freq) - static void CreateCosAndSin(BaseFloat samp_freq, - BaseFloat freq, - VectorBase *cos, - VectorBase *sin); - - // Do fine optimization of the frequency within a bin, given a reasonable - // approximate position within it based on interpolation (that should be close - // to the optimum). - void FineOptimizeFrequency( - const VectorBase &signal, - int32 bin, - BaseFloat offset, - std::vector *info, - OptimizedInfo *opt_info) const; - - // Computes the coefficients cos_, sin_, and Minv_. - void ComputeCoefficients(); - - // Calls some self-testing code that prints warnings if - // some of our assumptions were wrong. - void SelfTest(const VectorBase &signal, - const std::vector &info, - BaseFloat final_freq, - BaseFloat final_energy); - -}; - - - -/** - This configuration class is for the frame-by-frame detection of - cases where there are one or two sinusoids that can explain - a lot of the energy in the signal. -*/ -struct MultiSinusoidDetectorConfig { - - // frame length in milliseconds - BaseFloat frame_length_ms; - // frame shift in milliseconds - BaseFloat frame_shift_ms; - - // Proportion of the total energy of the signal that the quieter of - // the two sinusoids must comprise, in order to be counted, if two - // sinusoids are detected. - BaseFloat two_freq_min_energy; - - // Proportion of the total energy of the signal that both sinusoids (if - // two are detected) must comprise, in order to be output. - BaseFloat two_freq_min_total_energy; - - // Proportion of the total energy of the signal that a single sinusoid - // must comprise, in order to be output, if we are considering - // reporting a single sinusoid. Note: detection of two sinusoids - // will take precedence over detection of a single sinusoid. - BaseFloat one_freq_min_energy; - - // Lower end of frequency range that we consider; frequencies outside - // this range are not candidates to appear in the detected output. - BaseFloat min_freq; - // Upper end of frequency range that we consider, see min_freq. - BaseFloat max_freq; - - // Frequency to which we subsample the signal before processing it. - // Must be integer because of how LinearResample code works. - int32 subsample_freq; - - // Filter cut-off frequency used in sub-sampling. - BaseFloat subsample_filter_cutoff; - - // the following is not critical and is not exported to the - // command line. - int32 subsample_filter_zeros; - - MultiSinusoidDetectorConfig(): - frame_length_ms(20), frame_shift_ms(10), - two_freq_min_energy(0.2), two_freq_min_total_energy(0.6), - one_freq_min_energy(0.75), min_freq(300.0), - max_freq(1800.0), subsample_freq(4000), - subsample_filter_cutoff(1900.0), subsample_filter_zeros(5) {} - - void Register(OptionsItf *opts) { - opts->Register("frame-length", &frame_length_ms, - "Frame length in milliseconds"); - opts->Register("frame-shift", &frame_shift_ms, - "Frame shift in milliseconds"); - opts->Register("two-freq-min-energy", &two_freq_min_energy, - "For detecting two-frequency tones, minimum energy that " - "the quieter frequency must have (relative to total " - "enegy of frame)"); - opts->Register("two-freq-min-total-energy", &two_freq_min_total_energy, - "For detecting two-frequency tones, minimum energy that " - "the two frequencies together must have (relative to total " - "energy of frame)"); - opts->Register("one-freq-min-energy", &one_freq_min_energy, "For detecting " - "single-frequency tones, minimum energy that the frequency " - "must have relative to total energy of frame"); - opts->Register("min-freq", &min_freq, "Minimum frequency of sinusoid that " - "will be detected"); - opts->Register("max-freq", &max_freq, "Maximum frequency of sinusoid that " - "will be detected"); - opts->Register("subsample-freq", &subsample_freq, "Frequency at which " - "we subsample the signal"); - opts->Register("subsample-filter-cutoff", &subsample_filter_cutoff, "Filter " - "cut-off frequency used in subsampling"); - } - void Check() const { - KALDI_ASSERT(frame_length_ms > 0 && frame_length_ms >= frame_shift_ms && - min_freq > 0 && max_freq > min_freq && - subsample_filter_cutoff > max_freq && - subsample_freq/2 > subsample_filter_cutoff && - subsample_filter_zeros > 2 && - subsample_filter_cutoff > 0.25 * subsample_freq && - two_freq_min_total_energy > two_freq_min_energy && - two_freq_min_energy <= 0.5 * two_freq_min_total_energy); - BaseFloat samples_per_frame_shift = - frame_shift_ms * 0.001 * subsample_freq; - // The following assert ensures that the frame-shift is an exact - // number of samples, so that the locations of the frames - // don't gradually drift out of sync. - KALDI_ASSERT(fabs(samples_per_frame_shift - - static_cast(samples_per_frame_shift)) < - 0.001); - - } -}; - -struct MultiSinusoidDetectorOutput { - BaseFloat tot_energy; // Total energy per sample of this frame (sum-square of - // signal divided by number of samples... this is after - // downsampling and mean subtraction. - BaseFloat freq1; // Lower frequency detected, or 0 if none detected. - BaseFloat energy1; // Energy of lower frequency divided by total energy, or 0 - // if none detected. - BaseFloat freq2; // Lower frequency detected, or 0 if zero or one - // frequencies detected. - BaseFloat energy2; // Energy of higher frequency divided by total energy, or 0 - // if zero or one freqencies detected. - MultiSinusoidDetectorOutput(): tot_energy(0.0), freq1(0.0), - energy1(0.0), freq2(0.0), energy2(0.0) { } -}; - - -class MultiSinusoidDetector { - public: - - // Initialize sinusoid detector. Sampling frequency must be integer. - MultiSinusoidDetector(const MultiSinusoidDetectorConfig &config, - int32 sampling_freq); - - /// This is how the class acccepts its input. You can put the waveform in - /// piece by piece, if it's an online application. - void AcceptWaveform(const VectorBase &waveform); - - /// The user calls this to announce to the class that the waveform has ended; - /// this forces any pending data to be flushed. - void WaveformFinished(); - - /// Resets the state of the class so you can start processing another waveform. - void Reset(); - - /// This returns true if the class currently has no more data ready to output. - bool Done() const; - - /// Outputs the next frame of output to "frame", which must be non-NULL. - /// It is an error to call this if Done() has returned true, or has not been - /// checked. - void GetNextFrame(MultiSinusoidDetectorOutput *output); - - BaseFloat FrameShiftSecs() const { return 0.001 * config_.frame_shift_ms; } - - BaseFloat SamplingFrequency() const { return sample_freq_; } - - private: - // Gets the next frame of subsampled signal, and consumes the appropriate - // amount of stored data. It is an error to call this if Done() returned - // true. - void GetNextFrameOfSignal(Vector *frame); - - // returns true and sets freq1, freq1, energy1 and energy2 in "output" if we - // successfully detected an acceptable two-frequency tone. - bool DetectedTwoFrequency(BaseFloat signal_energy, - const Sinusoid &sinusoid1, - BaseFloat energy1, - const Sinusoid &sinusoid2, - BaseFloat energy2, - MultiSinusoidDetectorOutput *output); - - // returns true and sets freq1, freq1, energy1 and energy2 in "output" if we - // successfully detected an acceptable one-frequency tone. - bool DetectedOneFrequency(BaseFloat signal_energy, - const Sinusoid &sinusoid1, - BaseFloat energy1, - const Sinusoid &sinusoid2, - BaseFloat energy2, - MultiSinusoidDetectorOutput *output); - - - // Returns std::min(max_samp, sum-of-samples-in-subsampled_signal_). - // (the std::min is for efficiency so we don't have to visit the - // whole list). - int32 NumSubsampledSamplesReady(int32 max_samp) const; - - MultiSinusoidDetectorConfig config_; - int32 sample_freq_; - int32 samples_per_frame_subsampled_; // (samples per frame at subsampled - // rate). - - // True if the user has called WaveformFinished(). - bool waveform_finished_; - - // Pieces of the subsampled signal that are awaiting processing. - // Normally there will be just one element here, but if someone calls - // AcceptWaveform multiple times before getting output, there could - // be more elements. All of these pieces are nonempty. - std::deque* > subsampled_signal_; - - // stores the number of samples consumed from the first member of - // subsampled_signal_. We will always have samples_consumed_ >= 0 and either - // (subsampled_signal_.empty() && samples_consumed_ == 0) or - // samples_consumed_ < subsampled_signal_[0]->Dim(). - int32 samples_consumed_; - - - // This object is used to subsample the signal. - LinearResample resampler_; - - // This object is used to detect sinusoids in the subsampled - // frames. - SinusoidDetector detector_; -}; - -// Detect sinusoids. Signal should be sampled at detector->SamplingFrequency(). -void DetectSinusoids(const VectorBase &signal, - MultiSinusoidDetector *detector, - Matrix *output); - - - - - -/// @} End of "addtogroup feat" -} // namespace kaldi -#endif // KALDI_FEAT_SINUSOID_DETECTION_H_ diff --git a/src/featbin/Makefile b/src/featbin/Makefile index dc2bea215d8..d6d85893289 100644 --- a/src/featbin/Makefile +++ b/src/featbin/Makefile @@ -14,8 +14,9 @@ BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \ apply-cmvn-sliding compute-cmvn-stats-two-channel compute-kaldi-pitch-feats \ process-kaldi-pitch-feats compare-feats wav-to-duration add-deltas-sdc \ compute-and-process-kaldi-pitch-feats modify-cmvn-stats wav-copy \ - wav-reverberate append-vector-to-feats detect-sinusoids shift-feats \ - concat-feats append-post-to-feats post-to-feats + wav-reverberate append-vector-to-feats shift-feats concat-feats \ + append-post-to-feats post-to-feats vector-to-feat \ + extract-column compute-snr-targets OBJFILES = diff --git a/src/featbin/apply-cmvn-sliding.cc b/src/featbin/apply-cmvn-sliding.cc index 4a6d02d16cd..105319761b5 100644 --- a/src/featbin/apply-cmvn-sliding.cc +++ b/src/featbin/apply-cmvn-sliding.cc @@ -35,10 +35,13 @@ int main(int argc, char *argv[]) { "Useful for speaker-id; see also apply-cmvn-online\n" "\n" "Usage: apply-cmvn-sliding [options] \n"; - + + std::string skip_dims_str; ParseOptions po(usage); SlidingWindowCmnOptions opts; opts.Register(&po); + po.Register("skip-dims", &skip_dims_str, "Dimensions for which to skip " + "normalization: colon-separated list of integers, e.g. 13:14:15)"); po.Read(argc, argv); @@ -47,15 +50,24 @@ int main(int argc, char *argv[]) { exit(1); } + std::vector skip_dims; // optionally use "fake" + // (zero-mean/unit-variance) stats for some + // dims to disable normalization. + if (!SplitStringToIntegers(skip_dims_str, ":", false, &skip_dims)) { + KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of " + << "integers)"; + } + + int32 num_done = 0, num_err = 0; - + std::string feat_rspecifier = po.GetArg(1); std::string feat_wspecifier = po.GetArg(2); SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier); BaseFloatMatrixWriter feat_writer(feat_wspecifier); - - for (;!feat_reader.Done(); feat_reader.Next()) { + + for (; !feat_reader.Done(); feat_reader.Next()) { std::string utt = feat_reader.Key(); Matrix feat(feat_reader.Value()); if (feat.NumRows() == 0) { @@ -67,7 +79,7 @@ int main(int argc, char *argv[]) { feat.NumCols(), kUndefined); SlidingWindowCmn(opts, feat, &cmvn_feat); - + feat_writer.Write(utt, cmvn_feat); num_done++; } diff --git a/src/featbin/compute-snr-targets.cc b/src/featbin/compute-snr-targets.cc new file mode 100644 index 00000000000..cdb7ef66c2a --- /dev/null +++ b/src/featbin/compute-snr-targets.cc @@ -0,0 +1,273 @@ +// featbin/compute-snr-targets.cc + +// Copyright 2015-2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Compute snr targets using clean and noisy speech features.\n" + "The targets can be of 3 types -- \n" + "Irm (Ideal Ratio Mask) = Clean fbank / (Clean fbank + Noise fbank)\n" + "FbankMask = Clean fbank / Noisy fbank\n" + "Snr (Signal To Noise Ratio) = Clean fbank / Noise fbank\n" + "Both input and output features are assumed to be in log domain.\n" + "ali-rspecifier and silence-phones are used to identify whether " + "a particular frame is \"clean\" or not. Silence frames in " + "\"clean\" fbank are treated as \"noise\" and hence the SNR for those " + "frames are -inf in log scale.\n" + "Usage: compute-snr-targets [options] \n" + " or compute-snr-targets [options] --binary-targets \n" + "e.g.: compute-snr-targets scp:clean.scp scp:noisy.scp ark:targets.ark\n"; + + std::string target_type = "Irm"; + std::string ali_rspecifier; + std::string silence_phones_str; + std::string floor_str = "-inf", ceiling_str = "inf"; + int32 length_tolerance = 0; + bool binary_targets = false; + int32 target_dim = -1; + + ParseOptions po(usage); + po.Register("target_type", &target_type, "Target type can be FbankMask or IRM"); + po.Register("ali-rspecifier", &ali_rspecifier, "If provided, all the " + "energy in the silence region of clean file is considered noise"); + po.Register("silence-phones", &silence_phones_str, "Comma-separated list of " + "silence phones"); + po.Register("floor", &floor_str, "If specified, the target is floored at " + "this value. You may want to do this if you are using targets " + "in original log form as is usual in the case of Snr, but may " + "not if you are applying Exp() as is usual in the case of Irm"); + po.Register("ceiling", &ceiling_str, "If specified, the target is ceiled " + "at this value. You may want to do this if you expect " + "infinities or very large values, particularly for Snr targets."); + po.Register("length-tolerance", &length_tolerance, "Tolerate differences " + "in utterance lengths of these many frames"); + po.Register("binary-targets", &binary_targets, "If specified, then the " + "targets are created considering each frame to be either " + "completely signal or completely noise as decided by the " + "ali-rspecifier option. When ali-rspecifier is not specified, " + "then the entire utterance is considered to be just signal." + "If this option is specified, then only a single argument " + "-- the clean features -- is must be specified."); + po.Register("target-dim", &target_dim, "Overrides the target dimension. " + "Applicable only with --binary-targets is specified"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3 && po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::vector silence_phones; + if (!silence_phones_str.empty()) { + if (!SplitStringToIntegers(silence_phones_str, ":", false, &silence_phones)) { + KALDI_ERR << "Invalid silence-phones string " << silence_phones_str; + } + std::sort(silence_phones.begin(), silence_phones.end()); + } + + double floor = kLogZeroDouble, ceiling = -kLogZeroDouble; + + if (floor_str != "-inf") + if (!ConvertStringToReal(floor_str, &floor)) { + KALDI_ERR << "Invalid --floor value " << floor_str; + } + + if (ceiling_str != "inf") + if (!ConvertStringToReal(ceiling_str, &ceiling)) { + KALDI_ERR << "Invalid --ceiling value " << ceiling_str; + } + + int32 num_done = 0, num_err = 0, num_success = 0; + int64 num_sil_frames = 0; + int64 num_speech_frames = 0; + + if (!binary_targets) { + // This is the 'normal' case, where we have both clean and + // noise/corrupted input features. + // The word 'noisy' in the variable names is used to mean 'corrupted'. + std::string clean_rspecifier = po.GetArg(1), + noisy_rspecifier = po.GetArg(2), + targets_wspecifier = po.GetArg(3); + + SequentialBaseFloatMatrixReader noisy_reader(noisy_rspecifier); + RandomAccessBaseFloatMatrixReader clean_reader(clean_rspecifier); + BaseFloatMatrixWriter kaldi_writer(targets_wspecifier); + + RandomAccessInt32VectorReader alignment_reader(ali_rspecifier); + + for (; !noisy_reader.Done(); noisy_reader.Next(), num_done++) { + const std::string &key = noisy_reader.Key(); + Matrix total_energy(noisy_reader.Value()); + // Although this is called 'energy', it is actually log filterbank + // features of noise or corrupted files + // Actually noise feats in the case of Irm and Snr + + // TODO: Support multiple corrupted version for a particular clean file + std::string uniq_key = key; + if (!clean_reader.HasKey(uniq_key)) { + KALDI_WARN << "Could not find uniq key " << uniq_key << " " + << "in clean feats " << clean_rspecifier; + num_err++; + continue; + } + + Matrix clean_energy(clean_reader.Value(uniq_key)); + + if (target_type == "Irm") { + total_energy.LogAddExpMat(1.0, clean_energy, kNoTrans); + } + + if (!ali_rspecifier.empty()) { + if (!alignment_reader.HasKey(uniq_key)) { + KALDI_WARN << "Could not find uniq key " << uniq_key + << "in alignment " << ali_rspecifier; + num_err++; + continue; + } + const std::vector &ali = alignment_reader.Value(key); + + if (std::abs(static_cast (ali.size()) - clean_energy.NumRows()) > length_tolerance) { + KALDI_WARN << "Mismatch in number of frames in alignment " + << "and feats; " << static_cast(ali.size()) + << " vs " << clean_energy.NumRows(); + num_err++; + continue; + } + + int32 length = std::min(static_cast(ali.size()), clean_energy.NumRows()); + if (ali.size() < length) + // TODO: Support this case + KALDI_ERR << "This code currently does not support the case " + << "where alignment smaller than features because " + << "it is not expected to happen"; + + KALDI_ASSERT(clean_energy.NumRows() == length); + KALDI_ASSERT(total_energy.NumRows() == length); + + if (clean_energy.NumRows() < length) clean_energy.Resize(length, clean_energy.NumCols(), kCopyData); + if (total_energy.NumRows() < length) total_energy.Resize(length, total_energy.NumCols(), kCopyData); + + for (int32 i = 0; i < clean_energy.NumRows(); i++) { + if (std::binary_search(silence_phones.begin(), silence_phones.end(), ali[i])) { + clean_energy.Row(i).Set(kLogZeroDouble); + num_sil_frames++; + } else num_speech_frames++; + } + } + + clean_energy.AddMat(-1.0, total_energy); + if (ceiling_str != "inf") { + clean_energy.ApplyCeiling(ceiling); + } + + if (floor_str != "-inf") { + clean_energy.ApplyFloor(floor); + } + + kaldi_writer.Write(key, Matrix(clean_energy)); + num_success++; + } + } else { + // Copying tables of features. + std::string feats_rspecifier = po.GetArg(1), + targets_wspecifier = po.GetArg(2); + + SequentialBaseFloatMatrixReader feats_reader(feats_rspecifier); + BaseFloatMatrixWriter kaldi_writer(targets_wspecifier); + + RandomAccessInt32VectorReader alignment_reader(ali_rspecifier); + + int64 num_sil_frames = 0; + int64 num_speech_frames = 0; + + for (; !feats_reader.Done(); feats_reader.Next(), num_done++) { + const std::string &key = feats_reader.Key(); + const Matrix &feats = feats_reader.Value(); + + Matrix targets; + + if (target_dim < 0) + targets.Resize(feats.NumRows(), feats.NumCols()); + else + targets.Resize(feats.NumRows(), target_dim); + + if (target_type == "Snr") + targets.Set(-kLogZeroDouble); + + if (!ali_rspecifier.empty()) { + if (!alignment_reader.HasKey(key)) { + KALDI_WARN << "Could not find uniq key " << key + << " in alignment " << ali_rspecifier; + num_err++; + continue; + } + + const std::vector &ali = alignment_reader.Value(key); + + if (std::abs(static_cast (ali.size()) - feats.NumRows()) > length_tolerance) { + KALDI_WARN << "Mismatch in number of frames in alignment " + << "and feats; " << static_cast(ali.size()) + << " vs " << feats.NumRows(); + num_err++; + continue; + } + + int32 length = std::min(static_cast(ali.size()), feats.NumRows()); + KALDI_ASSERT(ali.size() >= length); + + for (int32 i = 0; i < feats.NumRows(); i++) { + if (std::binary_search(silence_phones.begin(), silence_phones.end(), ali[i])) { + targets.Row(i).Set(kLogZeroDouble); + num_sil_frames++; + } else { + num_speech_frames++; + } + } + + if (ceiling_str != "inf") { + targets.ApplyCeiling(ceiling); + } + + if (floor_str != "-inf") { + targets.ApplyFloor(floor); + } + + kaldi_writer.Write(key, targets); + } + } + } + + KALDI_LOG << "Computed SNR targets for " << num_success + << " out of " << num_done << " utterances; failed for " + << num_err; + KALDI_LOG << "Got [ " << num_speech_frames << "," + << num_sil_frames << "] frames of silence and speech"; + return (num_success > 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/featbin/copy-feats.cc b/src/featbin/copy-feats.cc index 0fbcca6399a..f1f58653f2f 100644 --- a/src/featbin/copy-feats.cc +++ b/src/featbin/copy-feats.cc @@ -102,19 +102,31 @@ int main(int argc, char *argv[]) { CompressedMatrixWriter kaldi_writer(wspecifier); if (htk_in) { SequentialTableReader htk_reader(rspecifier); - for (; !htk_reader.Done(); htk_reader.Next(), num_done++) + for (; !htk_reader.Done(); htk_reader.Next(), num_done++) { kaldi_writer.Write(htk_reader.Key(), CompressedMatrix(htk_reader.Value().first)); + if (!num_frames_wspecifier.empty()) + num_frames_writer.Write(htk_reader.Key(), + htk_reader.Value().first.NumRows()); + } } else if (sphinx_in) { SequentialTableReader > sphinx_reader(rspecifier); - for (; !sphinx_reader.Done(); sphinx_reader.Next(), num_done++) + for (; !sphinx_reader.Done(); sphinx_reader.Next(), num_done++) { kaldi_writer.Write(sphinx_reader.Key(), CompressedMatrix(sphinx_reader.Value())); + if (!num_frames_wspecifier.empty()) + num_frames_writer.Write(sphinx_reader.Key(), + sphinx_reader.Value().NumRows()); + } } else { SequentialBaseFloatMatrixReader kaldi_reader(rspecifier); - for (; !kaldi_reader.Done(); kaldi_reader.Next(), num_done++) + for (; !kaldi_reader.Done(); kaldi_reader.Next(), num_done++) { kaldi_writer.Write(kaldi_reader.Key(), CompressedMatrix(kaldi_reader.Value())); + if (!num_frames_wspecifier.empty()) + num_frames_writer.Write(kaldi_reader.Key(), + kaldi_reader.Value().NumRows()); + } } } KALDI_LOG << "Copied " << num_done << " feature matrices."; diff --git a/src/featbin/detect-sinusoids.cc b/src/featbin/detect-sinusoids.cc deleted file mode 100644 index 6c104d5ab5f..00000000000 --- a/src/featbin/detect-sinusoids.cc +++ /dev/null @@ -1,113 +0,0 @@ -// featbin/detect-sinusoids.cc - -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "feat/sinusoid-detection.h" -#include "feat/wave-reader.h" - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - const char *usage = - "Detect sinusoids (one or two at a time) in waveform input and output\n" - "frame-by-frame information on their frequencies and energies. Useful\n" - "as part of DTMF and dialtone detection. Output is an archive of\n" - "matrices; for each file, there is a row per frame, containing\n" - " \n" - "where the frequencies and energies may be zero if no sufficiently\n" - "dominant sinusoid(s) was/were detected. If two frequencies were\n" - "detected, frequency1 < frequency2. See options for more detail on\n" - "configuration options.\n" - "\n" - "Usage: detect-sinusoids [options] \n" - "e.g.: detect-sinusoids scp:wav.scp ark,t:sinusoids.ark\n"; - - ParseOptions po(usage); - MultiSinusoidDetectorConfig config; - - config.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 2) { - po.PrintUsage(); - exit(1); - } - - std::string wav_rspecifier = po.GetArg(1), - matrix_wspecifier = po.GetArg(2); - - int32 num_done = 0, num_err = 0; - - SequentialTableReader wav_reader(wav_rspecifier); - BaseFloatMatrixWriter matrix_writer(matrix_wspecifier); - - MultiSinusoidDetector *detector = NULL; - - for (; !wav_reader.Done(); wav_reader.Next()) { - const WaveData &wav_data = wav_reader.Value(); - const Matrix &data = wav_data.Data(); - BaseFloat samp_freq = wav_data.SampFreq(); - int32 num_channels = data.NumRows(); - if (num_channels != 1) { - KALDI_WARN << "detect-sinusoids requires data with one " - << "channel. Recording " << wav_reader.Key() << " has " - << num_channels << ". First select one channel of your " - << "data (e.g. using sox)"; - num_err++; - continue; - } - if (samp_freq < config.subsample_freq) { - KALDI_WARN << "Sampling frequency of data " << wav_reader.Key() - << " is too low " << samp_freq << " < " - << config.subsample_freq << ". Reduce --subsample-freq " - << "if you want to run on this data."; - num_err++; - continue; - } - - if (detector == NULL || - samp_freq != detector->SamplingFrequency()) { - delete detector; - detector = new MultiSinusoidDetector(config, samp_freq); - } - - Matrix output; - DetectSinusoids(data.Row(0), detector, &output); - - if (output.NumRows() == 0) { - KALDI_WARN << "No output for " << wav_reader.Key(); - num_err++; - } else { - matrix_writer.Write(wav_reader.Key(), output); - num_done++; - } - } - delete detector; - KALDI_LOG << "Detected sinusoids in " << num_done << " wave files," - << num_err << " with errors."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/featbin/extract-column.cc b/src/featbin/extract-column.cc new file mode 100644 index 00000000000..7fa6644af03 --- /dev/null +++ b/src/featbin/extract-column.cc @@ -0,0 +1,84 @@ +// featbin/extract-column.cc + +// Copyright 2015 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace std; + + const char *usage = + "Extract a column out of a matrix. \n" + "This is most useful to extract log-energies \n" + "from feature files\n" + "\n" + "Usage: extract-column [options] --column-index= " + " \n" + " e.g. extract-column ark:feats-in.ark ark:energies.ark\n" + "See also: select-feats, subset-feats, subsample-feats, extract-rows\n"; + + ParseOptions po(usage); + + int32 column_index = 0; + + po.Register("column-index", &column_index, + "Index of column to extract"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + string feat_rspecifier = po.GetArg(1); + string vector_wspecifier = po.GetArg(2); + + SequentialBaseFloatMatrixReader reader(feat_rspecifier); + BaseFloatVectorWriter writer(vector_wspecifier); + + int32 num_done = 0, num_err = 0; + + string line; + + for (; !reader.Done(); reader.Next(), num_done++) { + const Matrix& feats(reader.Value()); + Vector col(feats.NumRows()); + if (column_index >= feats.NumCols()) { + KALDI_ERR << "Column index " << column_index << " is " + << "not less than number of columns " << feats.NumCols(); + } + col.CopyColFromMat(feats, column_index); + writer.Write(reader.Key(), col); + } + + KALDI_LOG << "Processed " << num_done << " matrices successfully; " + << "errors on " << num_err; + + return (num_done > 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/featbin/extract-rows.cc b/src/featbin/extract-rows.cc index e14f9cc0e82..e4e2a927e6b 100644 --- a/src/featbin/extract-rows.cc +++ b/src/featbin/extract-rows.cc @@ -149,7 +149,7 @@ int main(int argc, char *argv[]) { KALDI_LOG << "Processed " << num_done << " segments successfully; " << "errors on " << num_err; - return (num_done > 0); + return (num_done > 0 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); return -1; diff --git a/src/featbin/shift-feats.cc b/src/featbin/shift-feats.cc index 7b970e92248..5d392c9d15a 100644 --- a/src/featbin/shift-feats.cc +++ b/src/featbin/shift-feats.cc @@ -22,20 +22,41 @@ #include "util/common-utils.h" #include "matrix/kaldi-matrix.h" +namespace kaldi { + void ShiftFeatureMatrix(const Matrix &src, int32 shift, + Matrix* rearranged) { + for (int32 r = 0; r < src.NumRows(); r++) { + int32 src_r = r - shift; + if (src_r < 0) src_r = 0; + if (src_r >= src.NumRows()) src_r = src.NumRows() - 1; + rearranged->Row(r).CopyFromVec(src.Row(src_r)); + } + } +} int main(int argc, char *argv[]) { try { using namespace kaldi; const char *usage = - "Copy features and possibly shift them in time while maintaining the length, e.g.\n" - "shift-feats --shift=1 will shift all frames to the\n" - "right by one (the first frame would be duplicated).\n" - "See also: copy-feats, copy-matrix\n"; + "Copy features, and possibly shift them while maintaining the " + "num-frames.\n" + "Usage: shift-feats [options] " + "\n" + "or: shift-feats [options] \n" + "e.g.: shift-feats --shift=-1 foo.scp bar.ark\n" + "or: shift-feats --shift=1 foo.mat bar.mat\n" + "See also: copy-feats, copy-matrix, select-feats, extract-rows,\n" + "subset-feats, subsample-feats, splice-feats, paste-feats, " + "concat-feats\n"; ParseOptions po(usage); + bool binary = true; int32 shift = 0; - po.Register("shift", &shift, "Number of frames by which to shift the features."); + po.Register("shift", &shift, "Number of frames by which to shift the " + "features."); + po.Register("binary", &binary, "Binary-mode output (not relevant if " + "writing to archive)"); po.Read(argc, argv); @@ -46,32 +67,40 @@ int main(int argc, char *argv[]) { int32 num_done = 0, num_err = 0; - SequentialBaseFloatMatrixReader feat_reader(po.GetArg(1)); - BaseFloatMatrixWriter feat_writer(po.GetArg(2)); - - - for (; !feat_reader.Done(); feat_reader.Next()) { - const std::string &key = feat_reader.Key(); - const Matrix &src = feat_reader.Value(); - if (src.NumRows() == 0) { - KALDI_WARN << "Empty matrix for key " << key; - num_err++; - continue; + if (ClassifyRspecifier(po.GetArg(1), NULL, NULL) != kNoRspecifier) { + SequentialBaseFloatMatrixReader feat_reader(po.GetArg(1)); + BaseFloatMatrixWriter feat_writer(po.GetArg(2)); + + + for (; !feat_reader.Done(); feat_reader.Next()) { + const std::string &key = feat_reader.Key(); + const Matrix &src = feat_reader.Value(); + if (src.NumRows() == 0) { + KALDI_WARN << "Empty matrix for key " << key; + num_err++; + continue; + } + Matrix rearranged(src.NumRows(), src.NumCols()); + ShiftFeatureMatrix(src, shift, &rearranged); + feat_writer.Write(key, rearranged); + num_done++; } + + KALDI_LOG << "Shifted " << num_done << " features by " + << shift << " frames; " << num_err << " with errors."; + return (num_done > 0 ? 0 : 1); + } else { + std::string feat_rxfilename = po.GetArg(1), + feat_wxfilename = po.GetArg(2); + Matrix src; + ReadKaldiObject(feat_rxfilename, &src); + if (src.NumRows() == 0) + KALDI_ERR << "Empty input matrix"; Matrix rearranged(src.NumRows(), src.NumCols()); - for (int32 r = 0; r < src.NumRows(); r++) { - int32 src_r = r - shift; - if (src_r < 0) src_r = 0; - if (src_r >= src.NumRows()) src_r = src.NumRows() - 1; - rearranged.Row(r).CopyFromVec(src.Row(src_r)); - } - feat_writer.Write(key, rearranged); - num_done++; + ShiftFeatureMatrix(src, shift, &rearranged); + WriteKaldiObject(rearranged, feat_wxfilename, binary); + // we do not print any log messages here } - - KALDI_LOG << "Shifted " << num_done << " features by " - << shift << " frames; " << num_err << " with errors."; - return (num_done > 0 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); return -1; @@ -87,4 +116,8 @@ int main(int argc, char *argv[]) { 1 1 1 1 2 2 ] + + + echo "[ 1 1; 2 2; 3 3 ]" | ./shift-feats --print-args=false --binary=false \ + --shift=1 - - */ diff --git a/src/featbin/vector-to-feat.cc b/src/featbin/vector-to-feat.cc new file mode 100644 index 00000000000..1fe521db864 --- /dev/null +++ b/src/featbin/vector-to-feat.cc @@ -0,0 +1,100 @@ +// featbin/vector-to-feat.cc + +// Copyright 2015 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Convert a vector into a single feature so that it can be appended \n" + "to other feature matrices\n" + "Usage: vector-to-feats \n" + "or: vector-to-feats \n" + "e.g.: vector-to-feats scp:weights.scp ark:weight_feats.ark\n" + " or: vector-to-feats weight_vec feat_mat\n" + "See also: copy-feats, copy-matrix, paste-feats, \n" + "subsample-feats, splice-feats\n"; + + ParseOptions po(usage); + bool compress = false, binary = true; + + po.Register("binary", &binary, "Binary-mode output (not relevant if writing " + "to archive)"); + po.Register("compress", &compress, "If true, write output in compressed form" + "(only currently supported for wxfilename, i.e. archive/script," + "output)"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + int32 num_done = 0; + + if (ClassifyRspecifier(po.GetArg(1), NULL, NULL) != kNoRspecifier) { + std::string vector_rspecifier = po.GetArg(1); + std::string feature_wspecifier = po.GetArg(2); + + SequentialBaseFloatVectorReader vector_reader(vector_rspecifier); + BaseFloatMatrixWriter feat_writer(feature_wspecifier); + CompressedMatrixWriter compressed_feat_writer(feature_wspecifier); + + for (; !vector_reader.Done(); vector_reader.Next(), ++num_done) { + const Vector &vec = vector_reader.Value(); + Matrix feat(vec.Dim(), 1); + feat.CopyColFromVec(vec, 0); + + if (!compress) + feat_writer.Write(vector_reader.Key(), feat); + else + compressed_feat_writer.Write(vector_reader.Key(), + CompressedMatrix(feat)); + } + KALDI_LOG << "Converted " << num_done << " vectors into features"; + return (num_done != 0 ? 0 : 1); + } + + KALDI_ASSERT(!compress && "Compression not yet supported for single files"); + + std::string vector_rxfilename = po.GetArg(1), + feature_wxfilename = po.GetArg(2); + + Vector vec; + ReadKaldiObject(vector_rxfilename, &vec); + + Matrix feat(vec.Dim(), 1); + feat.CopyColFromVec(vec, 0); + + WriteKaldiObject(feat, feature_wxfilename, binary); + + KALDI_LOG << "Converted vector " << PrintableRxfilename(vector_rxfilename) + << " to " << PrintableWxfilename(feature_wxfilename); + return 0; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/featbin/wav-reverberate.cc b/src/featbin/wav-reverberate.cc index a9e6d3509c1..3b92f6e0b3e 100644 --- a/src/featbin/wav-reverberate.cc +++ b/src/featbin/wav-reverberate.cc @@ -156,6 +156,8 @@ int main(int argc, char *argv[]) { bool normalize_output = true; BaseFloat volume = 0; BaseFloat duration = 0; + std::string reverb_wxfilename; + std::string additive_noise_wxfilename; po.Register("multi-channel-output", &multi_channel_output, "Specifies if the output should be multi-channel or not"); @@ -212,6 +214,14 @@ int main(int argc, char *argv[]) { "after reverberating and possibly adding noise. " "If you set this option to a nonzero value, it will be as " "if you had also specified --normalize-output=false."); + po.Register("reverb-out-wxfilename", &reverb_wxfilename, + "Output the reverberated wave file, i.e. before adding the " + "additive noise. " + "Useful for computing SNR features or for debugging"); + po.Register("additive-noise-out-wxfilename", + &additive_noise_wxfilename, + "Output the additive noise file used to corrupt the input wave." + "Useful for computing SNR features or for debugging"); po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -314,10 +324,23 @@ int main(int argc, char *argv[]) { int32 num_samp_output = (duration > 0 ? samp_freq_input * duration : (shift_output ? num_samp_input : num_samp_input + num_samp_rir - 1)); + Matrix out_matrix(num_output_channels, num_samp_output); + Matrix out_reverb_matrix; + if (!reverb_wxfilename.empty()) + out_reverb_matrix.Resize(num_output_channels, num_samp_output); + + Matrix out_noise_matrix; + if (!additive_noise_wxfilename.empty()) + out_noise_matrix.Resize(num_output_channels, num_samp_output); + for (int32 output_channel = 0; output_channel < num_output_channels; output_channel++) { Vector input(num_samp_input); + + Vector out_reverb(0); + Vector out_noise(0); + input.CopyRowFromMat(input_matrix, input_channel); float power_before_reverb = VecVec(input, input) / input.Dim(); @@ -337,6 +360,16 @@ int main(int argc, char *argv[]) { } } + if (!reverb_wxfilename.empty()) { + out_reverb.Resize(input.Dim()); + out_reverb.CopyFromVec(input); + } + + if (!additive_noise_wxfilename.empty()) { + out_noise.Resize(input.Dim()); + out_noise.SetZero(); + } + if (additive_signal_matrices.size() > 0) { Vector noise(0); int32 this_noise_channel = (multi_channel_output ? output_channel : noise_channel); @@ -345,33 +378,86 @@ int main(int argc, char *argv[]) { for (int32 i = 0; i < additive_signal_matrices.size(); i++) { noise.Resize(additive_signal_matrices[i].NumCols()); noise.CopyRowFromMat(additive_signal_matrices[i], this_noise_channel); - AddNoise(&noise, snr_vector[i], start_time_vector[i], - samp_freq_input, early_energy, &input); + + if (!additive_noise_wxfilename.empty()) { + AddNoise(&noise, snr_vector[i], start_time_vector[i], + samp_freq_input, early_energy, &out_noise); + } else { + AddNoise(&noise, snr_vector[i], start_time_vector[i], + samp_freq_input, early_energy, &input); + } + } + + if (!additive_noise_wxfilename.empty()) { + input.AddVec(1.0, out_noise); } } float power_after_reverb = VecVec(input, input) / input.Dim(); - if (volume > 0) + if (volume > 0) { input.Scale(volume); - else if (normalize_output) + out_reverb.Scale(volume); + out_noise.Scale(volume); + } else if (normalize_output) { input.Scale(sqrt(power_before_reverb / power_after_reverb)); + out_reverb.Scale(sqrt(power_before_reverb / power_after_reverb)); + out_noise.Scale(sqrt(power_before_reverb / power_after_reverb)); + } if (num_samp_output <= num_samp_input) { // trim the signal from the start out_matrix.CopyRowFromVec(input.Range(shift_index, num_samp_output), output_channel); + + if (!reverb_wxfilename.empty()) { + out_reverb_matrix.CopyRowFromVec(out_reverb.Range(shift_index, num_samp_output), output_channel); + } + + if (!additive_noise_wxfilename.empty()) { + out_noise_matrix.CopyRowFromVec(out_noise.Range(shift_index, num_samp_output), output_channel); + } } else { - // repeat the signal to fill up the duration - Vector extended_input(num_samp_output); - extended_input.SetZero(); - AddVectorsOfUnequalLength(input.Range(shift_index, num_samp_input), &extended_input); - out_matrix.CopyRowFromVec(extended_input, output_channel); + { + // repeat the signal to fill up the duration + Vector extended_input(num_samp_output); + extended_input.SetZero(); + AddVectorsOfUnequalLength(input.Range(shift_index, num_samp_input), &extended_input); + out_matrix.CopyRowFromVec(extended_input, output_channel); + } + if (!reverb_wxfilename.empty()) { + // repeat the signal to fill up the duration + Vector extended_input(num_samp_output); + extended_input.SetZero(); + AddVectorsOfUnequalLength(out_reverb.Range(shift_index, num_samp_input), &extended_input); + out_reverb_matrix.CopyRowFromVec(extended_input, output_channel); + } + if (!additive_noise_wxfilename.empty()) { + // repeat the signal to fill up the duration + Vector extended_input(num_samp_output); + extended_input.SetZero(); + AddVectorsOfUnequalLength(out_noise.Range(shift_index, num_samp_input), &extended_input); + out_noise_matrix.CopyRowFromVec(extended_input, output_channel); + } } } + + { + WaveData out_wave(samp_freq_input, out_matrix); + Output ko(output_wave_file, false); + out_wave.Write(ko.Stream()); + } + + if (!reverb_wxfilename.empty()) { + WaveData out_wave(samp_freq_input, out_reverb_matrix); + Output ko(reverb_wxfilename, false); + out_wave.Write(ko.Stream()); + } - WaveData out_wave(samp_freq_input, out_matrix); - Output ko(output_wave_file, false); - out_wave.Write(ko.Stream()); + if (!additive_noise_wxfilename.empty()) { + WaveData out_wave(samp_freq_input, out_noise_matrix); + Output ko(additive_noise_wxfilename, false); + out_wave.Write(ko.Stream()); + } return 0; } catch(const std::exception &e) { diff --git a/src/fgmmbin/fgmm-global-init-from-accs.cc b/src/fgmmbin/fgmm-global-init-from-accs.cc index 23dc6be75cf..70b43e05d11 100644 --- a/src/fgmmbin/fgmm-global-init-from-accs.cc +++ b/src/fgmmbin/fgmm-global-init-from-accs.cc @@ -1,8 +1,8 @@ // fgmmbin/fgmm-global-init-from-accs.cc -// Copyright 2015 David Snyder -// 2015 Johns Hopkins University (Author: Daniel Povey) -// 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) +// Copyright 2015-2017 David Snyder +// 2015 Johns Hopkins University (Author: Daniel Povey) +// 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) // See ../../COPYING for clarification regarding multiple authors // @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) { } int32 num_gauss = gmm_accs.NumGauss(), dim = gmm_accs.Dim(), - tot_floored = 0, gauss_floored = 0; + tot_floored = 0, gauss_floored = 0, tot_low_occ = 0; FullGmm fgmm(num_components, dim); @@ -69,23 +69,30 @@ int main(int argc, char *argv[]) { Matrix means(num_gauss, dim); std::vector > invcovars; - BaseFloat occ_sum = gmm_accs.occupancy().Sum(); for (int32 i = 0; i < num_components; i++) { - BaseFloat occ = gmm_accs.occupancy()(i), - prob; - if (occ_sum > 0.0) - prob = occ / occ_sum; - else - prob = 1.0 / num_gauss; - weights(i) = prob; - - Vector mean(gmm_accs.mean_accumulator().Row(i)); - mean.Scale(1.0 / occ); + BaseFloat occ = gmm_accs.occupancy()(i); + weights(i) = occ; + Vector mean(dim, kSetZero); + SpMatrix covar(dim, kSetZero); + + // If the occupancy for a Gaussian is very low, set it to a small value. + if (occ < 1e-10) { + weights(i) = 1e-10; + mean.SetRandn(); + Vector diag(mean.Dim()); + diag.Set(1.0); + covar.AddDiagVec(1.0, diag); + tot_low_occ++; + // This is the typical case. + } else { + mean.CopyRowFromMat(gmm_accs.mean_accumulator(), i); + mean.Scale(1.0 / occ); + covar.CopyFromSp(gmm_accs.covariance_accumulator()[i]); + covar.Scale(1.0 / occ); + covar.AddVec2(-1.0, mean); // subtract squared means. + } means.CopyRowFromVec(mean, i); - SpMatrix covar(gmm_accs.covariance_accumulator()[i]); - covar.Scale(1.0 / occ); - covar.AddVec2(-1.0, means.Row(i)); // subtract squared means. // Floor variance Eigenvalues. BaseFloat floor = std::max( static_cast(gmm_opts.variance_floor), @@ -98,14 +105,21 @@ int main(int argc, char *argv[]) { covar.InvertDouble(); invcovars.push_back(covar); } + weights.Scale(1.0 / weights.Sum()); fgmm.SetWeights(weights); fgmm.SetInvCovarsAndMeans(invcovars, means); int32 num_bad = fgmm.ComputeGconsts(); KALDI_LOG << "FullGmm has " << num_bad << " bad GConsts"; + if (tot_floored > 0) { KALDI_WARN << tot_floored << " variances floored in " << gauss_floored << " Gaussians."; } + if (tot_low_occ > 0) { + KALDI_WARN << tot_low_occ << " out of " << num_gauss + << " Gaussians had very low occupancy."; + } + WriteKaldiObject(fgmm, model_out_filename, binary_write); KALDI_LOG << "Written model to " << model_out_filename; diff --git a/src/fstbin/Makefile b/src/fstbin/Makefile index a508ed95bd7..da26c58edd7 100644 --- a/src/fstbin/Makefile +++ b/src/fstbin/Makefile @@ -15,7 +15,7 @@ BINFILES = fstdeterminizestar \ fstmakecontextsyms fstaddsubsequentialloop fstaddselfloops \ fstrmepslocal fstcomposecontext fsttablecompose fstrand fstfactor \ fstdeterminizelog fstphicompose fstrhocompose fstpropfinal fstcopy \ - fstpushspecial fsts-to-transcripts + fstpushspecial fsts-to-transcripts fsts-project fsts-union OBJFILES = @@ -26,6 +26,6 @@ LIBFILE = ADDLIBS = ../fstext/kaldi-fstext.a ../util/kaldi-util.a \ ../thread/kaldi-thread.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a + ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/fstbin/fstmakecontextsyms.cc b/src/fstbin/fstmakecontextsyms.cc index 9a12c7c05cd..e3c7d279053 100644 --- a/src/fstbin/fstmakecontextsyms.cc +++ b/src/fstbin/fstmakecontextsyms.cc @@ -35,14 +35,14 @@ fstrandgen C.fst | fstprint --isymbols=context_syms.txt --osymbols=phones.txt Example output: -0 1 #0 #0 -1 2 #-1 a -2 3 /a/a a -3 4 a/a/a a -4 5 #0 #0 -5 6 a/a/b b -6 7 a/b/ #$ -7 8 #1 #1 +0 1 #0 #0 +1 2 #-1 a +2 3 /a/a a +3 4 a/a/a a +4 5 #0 #0 +5 6 a/a/b b +6 7 a/b/ #$ +7 8 #1 #1 8 */ @@ -67,7 +67,7 @@ int main(int argc, char *argv[]) { po.Register("initial-disambig", &initial_disambig, "Name for special disambiguation symbol that occurs at start " "of context-dependent phone sequences"); - + po.Read(argc, argv); if (po.NumArgs() < 2 || po.NumArgs() > 3) { @@ -115,4 +115,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/fstbin/fstrmsymbols.cc b/src/fstbin/fstrmsymbols.cc index 75f5ab18654..e713d9e149f 100644 --- a/src/fstbin/fstrmsymbols.cc +++ b/src/fstbin/fstrmsymbols.cc @@ -167,26 +167,26 @@ int main(int argc, char *argv[]) { ( echo "0 0 1 1"; echo " 0 0 3 2"; echo "0 0"; ) | fstcompile | fstrmsymbols "echo 3; echo 4|" | fstprint # should produce: - # 0 0 1 1 - # 0 0 0 2 + # 0 0 1 1 + # 0 0 0 2 # 0 ( echo "0 0 1 1"; echo " 0 0 3 2"; echo "0 0"; ) | fstcompile | fstrmsymbols --apply-to-output=true "echo 2; echo 3|" | fstprint # should produce: - # 0 0 1 1 - # 0 0 3 0 + # 0 0 1 1 + # 0 0 3 0 # 0 ( echo "0 0 1 1"; echo " 0 0 3 2"; echo "0 0"; ) | fstcompile | fstrmsymbols --remove-arcs=true "echo 3; echo 4|" | fstprint # should produce: - # 0 0 1 1 + # 0 0 1 1 # 0 ( echo "0 0 1 1"; echo " 0 0 3 2"; echo "0 0"; ) | fstcompile | fstrmsymbols --penalty=2 "echo 3; echo 4; echo 5|" | fstprint # should produce: - # 0 0 1 1 - # 0 0 3 2 2 + # 0 0 1 1 + # 0 0 3 2 2 # 0 */ diff --git a/src/fstbin/fsts-project.cc b/src/fstbin/fsts-project.cc new file mode 100644 index 00000000000..015f1431725 --- /dev/null +++ b/src/fstbin/fsts-project.cc @@ -0,0 +1,82 @@ +// fstbin/fsts-project.cc + +// Copyright 2016 Johns Hopkins University (Authors: Jan "Yenda" Trmal) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "fstext/fstext-utils.h" +#include "fstext/kaldi-fst-io.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace fst; + typedef kaldi::int32 int32; + typedef kaldi::uint64 uint64; + + const char *usage = + "Reads kaldi archive of FSTs; for each element, performs the project\n" + "operation either on input (default) or on the output (if the option\n" + "--project-output is true).\n" + "\n" + "Usage: fsts-project [options] \n" + " e.g.: fsts-project ark:train.fsts ark,t:train.fsts\n" + "\n" + "see also: fstproject (from the OpenFst toolkit)\n"; + + ParseOptions po(usage); + + bool project_output = false; + + po.Register("project-output", &project_output, + "If true, project output vs input"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string fsts_rspecifier = po.GetArg(1), + fsts_wspecifier = po.GetArg(2); + + + SequentialTableReader fst_reader(fsts_rspecifier); + TableWriter fst_writer(fsts_wspecifier); + + int32 n_done = 0; + for (; !fst_reader.Done(); fst_reader.Next()) { + std::string key = fst_reader.Key(); + VectorFst fst(fst_reader.Value()); + + Project(&fst, project_output ? PROJECT_OUTPUT : PROJECT_INPUT); + + fst_writer.Write(key, fst); + n_done++; + } + + KALDI_LOG << "Projected " << n_done << " FSTs"; + return (n_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/fstbin/fsts-to-transcripts.cc b/src/fstbin/fsts-to-transcripts.cc index 7c301e10390..ae74d5de6e9 100644 --- a/src/fstbin/fsts-to-transcripts.cc +++ b/src/fstbin/fsts-to-transcripts.cc @@ -1,6 +1,7 @@ // fstbin/fsts-to-transcripts.cc -// Copyright 2012-2013 Johns Hopkins University (Authors: Guoguo Chen, Daniel Povey) +// Copyright 2012-2013 Johns Hopkins University (Authors: Guoguo Chen, +// Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // @@ -32,12 +33,15 @@ int main(int argc, char *argv[]) { typedef kaldi::uint64 uint64; const char *usage = - "Reads a table of FSTs; for each element, finds the best path and prints out the\n" - "output-symbol sequence (if --output-side=true), or input-symbol sequence " - "otherwise.\n" + "Reads a table of FSTs; for each element, finds the best path and \n" + "prints out the output-symbol sequence (if --output-side=true), or \n" + "input-symbol sequence otherwise.\n" "\n" - "Usage: fsts-to-transcripts [options] \n" - " e.g.: fsts-to-transcripts ark:train.fsts ark,t:train.text\n"; + "Usage:\n" + " fsts-to-transcripts [options] " + " \n" + "e.g.:\n" + " fsts-to-transcripts ark:train.fsts ark,t:train.text\n"; ParseOptions po(usage); @@ -48,13 +52,13 @@ int main(int argc, char *argv[]) { po.Read(argc, argv); - if (po.NumArgs() < 2 || po.NumArgs() > 3) { + if (po.NumArgs() != 2) { po.PrintUsage(); exit(1); } std::string fst_rspecifier = po.GetArg(1), - transcript_wspecifier = po.GetOptArg(2); + transcript_wspecifier = po.GetArg(2); SequentialTableReader fst_reader(fst_rspecifier); @@ -67,11 +71,11 @@ int main(int argc, char *argv[]) { VectorFst shortest_path; - ShortestPath(fst, &shortest_path); // the OpenFst algorithm ShortestPath. + ShortestPath(fst, &shortest_path); // the OpenFst algorithm ShortestPath. if (shortest_path.NumStates() == 0) { - KALDI_WARN << "Input FST (after shortest path) was empty. Producing no " - << "output for key " << key; + KALDI_WARN << "Input FST (after shortest path) was empty. Producing " + << "no output for key " << key; n_err++; continue; } @@ -80,7 +84,8 @@ int main(int argc, char *argv[]) { bool ans; if (output_side) ans = fst::GetLinearSymbolSequence( shortest_path, NULL, &transcript, NULL); - else ans = fst::GetLinearSymbolSequence( + else + ans = fst::GetLinearSymbolSequence( shortest_path, &transcript, NULL, NULL); if (!ans) { KALDI_ERR << "GetLinearSymbolSequence returned false (code error);"; diff --git a/src/fstbin/fsts-union.cc b/src/fstbin/fsts-union.cc new file mode 100644 index 00000000000..ed68cea76e9 --- /dev/null +++ b/src/fstbin/fsts-union.cc @@ -0,0 +1,100 @@ +// fstbin/fsts-union.cc + +// Copyright 2016 Johns Hopkins University (Authors: Jan "Yenda" Trmal) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "fstext/fstext-utils.h" +#include "fstext/kaldi-fst-io.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace fst; + typedef kaldi::int32 int32; + typedef kaldi::uint64 uint64; + + const char *usage = + "Reads a kaldi archive of FSTs. Performs the FST operation union on\n" + "all fsts sharing the same key. Assumes the archive is sorted by key.\n" + "\n" + "Usage: fsts-union [options] \n" + " e.g.: fsts-union ark:keywords_tmp.fsts ark,t:keywords.fsts\n" + "\n" + "see also: fstunion (from the OpenFst toolkit)\n"; + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string fsts_rspecifier = po.GetArg(1), + fsts_wspecifier = po.GetArg(2); + + + SequentialTableReader fst_reader(fsts_rspecifier); + TableWriter fst_writer(fsts_wspecifier); + + int32 n_out_done = 0, + n_in_done = 0; + std::string res_key = ""; + VectorFst res_fst; + + for (; !fst_reader.Done(); fst_reader.Next()) { + std::string key = fst_reader.Key(); + VectorFst fst(fst_reader.Value()); + + n_in_done++; + if (key == res_key) { + fst::Union(&res_fst, fst); + } else { + if (res_key != "") { + VectorFst out_fst; + fst::Determinize(res_fst, &out_fst); + fst::Minimize(&out_fst); + fst::RmEpsilon(&out_fst); + fst_writer.Write(res_key, out_fst); + n_out_done++; + } + res_fst = fst; + res_key = key; + } + } + if (res_key != "") { + VectorFst out_fst; + fst::Determinize(res_fst, &out_fst); + fst::Minimize(&out_fst); + fst::RmEpsilon(&out_fst); + fst_writer.Write(res_key, out_fst); + n_out_done++; + } + + KALDI_LOG << "Applied fst union on " << n_in_done + << " FSTs, produced " << n_out_done << " FSTs"; + return (n_out_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/fstext/context-fst-inl.h b/src/fstext/context-fst-inl.h index 5127e7ae584..dc8a4a8370b 100644 --- a/src/fstext/context-fst-inl.h +++ b/src/fstext/context-fst-inl.h @@ -31,6 +31,7 @@ namespace fst { /// \addtogroup context_fst_group /// @{ +namespace internal { template typename ContextFstImpl::StateId @@ -41,12 +42,6 @@ typename ContextFstImpl::StateId VectorToStateIter iter = state_map_.find(seq); if (iter == state_map_.end()) { // Not already in map. StateId this_state_id = (StateId)state_seqs_.size(); - //This check is not needed with OpenFst >= 1.4 -#ifndef HAVE_OPENFST_GE_10400 - StateId this_state_id_check = CacheImpl::AddState(); - // goes back to VectorFstBaseImpl, inherited via CacheFst - assert(this_state_id == this_state_id_check); -#endif state_seqs_.push_back(seq); state_map_[seq] = this_state_id; return this_state_id; @@ -325,53 +320,26 @@ void ContextFstImpl::Expand(StateId s) { // expands arcs only [not // We just try adding all possible symbols on the output side. Arc arc; if (this->CreateArc(s, subsequential_symbol_, &arc)) { -#ifdef HAVE_OPENFST_GE_10400 this->PushArc(s, arc); -#else - this->AddArc(s, arc); -#endif } for (typename kaldi::ConstIntegerSet